266 lines
10 KiB
SQL
266 lines
10 KiB
SQL
-- ------------------------------------------------------------------
|
|
-- Mortality verification for Sepsis-3 in MIMIC-III v1.3.
|
|
--
|
|
-- Usage:
|
|
-- psql -d mimic -v ON_ERROR_STOP=1 \
|
|
-- -c 'SET search_path TO mimiciii, public;' \
|
|
-- -f sql/sepsis/mortality_checks.sql
|
|
--
|
|
-- Purpose:
|
|
-- `sanity_checks.sql` reported a 14.6% in-hospital mortality among
|
|
-- Sepsis-3 = TRUE patients, well below the 25-35% range in the
|
|
-- literature. The hypothesis was that the broad cohort (neonates,
|
|
-- re-admissions, short stays included) drags the number down.
|
|
--
|
|
-- This script walks an exclusion funnel and shows mortality at
|
|
-- each step so you can confirm. It also shows 30-day mortality,
|
|
-- stratification by age band, and a direct comparison with the
|
|
-- numbers published in:
|
|
--
|
|
-- Johnson AEW et al., Crit Care Med 2018.
|
|
-- "A Comparative Analysis of Sepsis Identification Methods
|
|
-- in an Electronic Database."
|
|
-- Reported on MIMIC-III v1.4 with adult, first-ICU-stay,
|
|
-- LOS >= 4 h cohort:
|
|
-- n = 21 927 sepsis-3 stays
|
|
-- in-hospital mortality = 21.0%
|
|
-- 30-day mortality = 25.4%
|
|
--
|
|
-- All ages use a clamp at 91 (MIMIC-III shifts DOB by 300 y for
|
|
-- patients > 89; we treat them as 91 for stratification).
|
|
-- ------------------------------------------------------------------
|
|
|
|
\set ON_ERROR_STOP on
|
|
\timing on
|
|
|
|
-- Build a working cohort table once with everything we need
|
|
DROP TABLE IF EXISTS sepsis3_cohort_check;
|
|
CREATE TEMP TABLE sepsis3_cohort_check AS
|
|
SELECT ie.subject_id
|
|
, ie.hadm_id
|
|
, ie.icustay_id
|
|
, ie.intime
|
|
, ie.outtime
|
|
, ie.first_careunit
|
|
, EXTRACT(EPOCH FROM (ie.outtime - ie.intime)) / 3600.0 AS los_hours
|
|
, LEAST(
|
|
91.0,
|
|
EXTRACT(EPOCH FROM (ie.intime - pat.dob))
|
|
/ (365.242 * 86400.0)
|
|
) AS age_at_intime
|
|
, ROW_NUMBER() OVER (
|
|
PARTITION BY ie.subject_id
|
|
ORDER BY ie.intime
|
|
) AS icustay_seq
|
|
, adm.hospital_expire_flag AS died_in_hospital
|
|
, (pat.dod IS NOT NULL
|
|
AND pat.dod <= ie.intime + INTERVAL '30 days')::int AS died_within_30d
|
|
, COALESCE(s.sepsis3, FALSE) AS sepsis3
|
|
FROM icustays ie
|
|
JOIN admissions adm ON adm.hadm_id = ie.hadm_id
|
|
JOIN patients pat ON pat.subject_id = ie.subject_id
|
|
LEFT JOIN sepsis3 s ON s.icustay_id = ie.icustay_id;
|
|
|
|
CREATE INDEX ON sepsis3_cohort_check (icustay_id);
|
|
ANALYZE sepsis3_cohort_check;
|
|
|
|
\echo
|
|
\echo '=================================================================='
|
|
\echo ' 1. Cohort exclusion funnel (incremental filtering)'
|
|
\echo '=================================================================='
|
|
\echo "Each row applies an additional restriction. The 'sepsis3' columns"
|
|
\echo "report stats among rows where sepsis3 = TRUE within that cohort."
|
|
\echo
|
|
\echo "EXPECTED progression: as we narrow to the canonical adult/first-stay/"
|
|
\echo "LOS >= 24h cohort, in-hospital mortality among Sepsis-3 should rise"
|
|
\echo "from ~14% toward ~25-30%."
|
|
\echo
|
|
WITH levels AS (
|
|
-- 0. Everyone
|
|
SELECT 0 AS lvl, '0. all icustays' AS step
|
|
, c.* FROM sepsis3_cohort_check c
|
|
|
|
UNION ALL
|
|
-- 1. Exclude neonatal ICU
|
|
SELECT 1, '1. + exclude NICU'
|
|
, c.* FROM sepsis3_cohort_check c
|
|
WHERE c.first_careunit != 'NICU'
|
|
|
|
UNION ALL
|
|
-- 2. Adult (age >= 18) on top of (1)
|
|
SELECT 2, '2. + age >= 18'
|
|
, c.* FROM sepsis3_cohort_check c
|
|
WHERE c.first_careunit != 'NICU'
|
|
AND c.age_at_intime >= 18
|
|
|
|
UNION ALL
|
|
-- 3. First ICU stay only on top of (2)
|
|
SELECT 3, '3. + first ICU stay only'
|
|
, c.* FROM sepsis3_cohort_check c
|
|
WHERE c.first_careunit != 'NICU'
|
|
AND c.age_at_intime >= 18
|
|
AND c.icustay_seq = 1
|
|
|
|
UNION ALL
|
|
-- 4. LOS >= 24h on top of (3) -- the canonical Seymour cohort
|
|
SELECT 4, '4. + LOS >= 24 h (canonical)'
|
|
, c.* FROM sepsis3_cohort_check c
|
|
WHERE c.first_careunit != 'NICU'
|
|
AND c.age_at_intime >= 18
|
|
AND c.icustay_seq = 1
|
|
AND c.los_hours >= 24
|
|
)
|
|
SELECT lvl
|
|
, step
|
|
, count(*) AS n_total
|
|
, sum(CASE WHEN sepsis3 THEN 1 ELSE 0 END) AS n_sepsis3
|
|
, round(100.0 * sum(CASE WHEN sepsis3 THEN 1 ELSE 0 END)
|
|
/ count(*), 1) AS pct_sepsis3
|
|
-- in-hospital mortality among Sepsis-3 = TRUE
|
|
, round(100.0 * sum(CASE WHEN sepsis3 AND died_in_hospital = 1
|
|
THEN 1 ELSE 0 END)
|
|
/ NULLIF(sum(CASE WHEN sepsis3 THEN 1 ELSE 0 END), 0), 1)
|
|
AS sep_inhosp_pct
|
|
-- 30-day mortality among Sepsis-3 = TRUE
|
|
, round(100.0 * sum(CASE WHEN sepsis3 AND died_within_30d = 1
|
|
THEN 1 ELSE 0 END)
|
|
/ NULLIF(sum(CASE WHEN sepsis3 THEN 1 ELSE 0 END), 0), 1)
|
|
AS sep_30d_pct
|
|
-- in-hospital mortality among NOT Sepsis-3
|
|
, round(100.0 * sum(CASE WHEN NOT sepsis3 AND died_in_hospital = 1
|
|
THEN 1 ELSE 0 END)
|
|
/ NULLIF(sum(CASE WHEN NOT sepsis3 THEN 1 ELSE 0 END), 0), 1)
|
|
AS nonsep_inhosp_pct
|
|
FROM levels
|
|
GROUP BY lvl, step
|
|
ORDER BY lvl;
|
|
|
|
|
|
\echo
|
|
\echo '=================================================================='
|
|
\echo ' 2. Mortality stratified by age band, canonical cohort only'
|
|
\echo '=================================================================='
|
|
\echo "EXPECTED:"
|
|
\echo " Mortality among Sepsis-3 = TRUE rises monotonically with age."
|
|
\echo " Adults < 30: ~10-15%"
|
|
\echo " 30 - 50: ~15-20%"
|
|
\echo " 50 - 70: ~20-25%"
|
|
\echo " 70 - 90+: ~30-40%"
|
|
\echo
|
|
SELECT CASE
|
|
WHEN age_at_intime < 30 THEN '1. <30'
|
|
WHEN age_at_intime < 50 THEN '2. 30-49'
|
|
WHEN age_at_intime < 70 THEN '3. 50-69'
|
|
WHEN age_at_intime < 90 THEN '4. 70-89'
|
|
ELSE '5. 90+'
|
|
END AS age_band
|
|
, count(*) AS n_sepsis3
|
|
, round(100.0 * sum(died_in_hospital) / count(*), 1) AS pct_inhosp
|
|
, round(100.0 * sum(died_within_30d) / count(*), 1) AS pct_30d
|
|
FROM sepsis3_cohort_check
|
|
WHERE sepsis3
|
|
AND first_careunit != 'NICU'
|
|
AND age_at_intime >= 18
|
|
AND icustay_seq = 1
|
|
AND los_hours >= 24
|
|
GROUP BY 1
|
|
ORDER BY 1;
|
|
|
|
|
|
\echo
|
|
\echo '=================================================================='
|
|
\echo ' 3. Mortality stratified by max-SOFA day-1 (canonical cohort)'
|
|
\echo '=================================================================='
|
|
\echo "EXPECTED:"
|
|
\echo " Mortality should rise monotonically with SOFA. This is the"
|
|
\echo " classic dose-response curve of organ dysfunction vs death,"
|
|
\echo " and is the strongest semantic check that the SOFA pipeline"
|
|
\echo " itself is computing the right thing."
|
|
\echo " SOFA 0-3: ~5-10%"
|
|
\echo " SOFA 4-7: ~15-25%"
|
|
\echo " SOFA 8-11: ~30-40%"
|
|
\echo " SOFA 12+: ~50-65%"
|
|
\echo
|
|
WITH d1 AS (
|
|
SELECT icustay_id, max(sofa_24hours) AS d1_sofa
|
|
FROM sofa_hourly
|
|
WHERE hr <= 24
|
|
GROUP BY icustay_id
|
|
)
|
|
SELECT CASE
|
|
WHEN d1_sofa <= 3 THEN '1. 0-3'
|
|
WHEN d1_sofa <= 7 THEN '2. 4-7'
|
|
WHEN d1_sofa <= 11 THEN '3. 8-11'
|
|
ELSE '4. 12+'
|
|
END AS sofa_band
|
|
, count(*) AS n
|
|
, round(100.0 * sum(died_in_hospital) / count(*), 1) AS pct_inhosp
|
|
, round(100.0 * sum(died_within_30d) / count(*), 1) AS pct_30d
|
|
FROM sepsis3_cohort_check c
|
|
JOIN d1 USING (icustay_id)
|
|
WHERE first_careunit != 'NICU'
|
|
AND age_at_intime >= 18
|
|
AND icustay_seq = 1
|
|
AND los_hours >= 24
|
|
GROUP BY 1
|
|
ORDER BY 1;
|
|
|
|
|
|
\echo
|
|
\echo '=================================================================='
|
|
\echo ' 4. Direct comparison with Johnson 2018 (canonical cohort)'
|
|
\echo '=================================================================='
|
|
\echo "Johnson 2018 published numbers for Sepsis-3 on MIMIC-III v1.4"
|
|
\echo "with the cohort: adult, first ICU stay only, LOS >= 4 h."
|
|
\echo "They reported:"
|
|
\echo " n_sepsis3 = 21 927"
|
|
\echo " in-hospital pct_died = 21.0%"
|
|
\echo " 30-day pct_died = 25.4%"
|
|
\echo
|
|
\echo "We use LOS >= 24 h here (the more common Seymour 2016 cutoff),"
|
|
\echo "so our n will be a bit smaller and our mortality slightly"
|
|
\echo "higher than Johnson's."
|
|
\echo
|
|
SELECT count(*) AS n_sepsis3
|
|
, round(100.0 * sum(died_in_hospital) / count(*), 1) AS pct_inhosp
|
|
, round(100.0 * sum(died_within_30d) / count(*), 1) AS pct_30d
|
|
FROM sepsis3_cohort_check
|
|
WHERE sepsis3
|
|
AND first_careunit != 'NICU'
|
|
AND age_at_intime >= 18
|
|
AND icustay_seq = 1
|
|
AND los_hours >= 24;
|
|
|
|
|
|
\echo
|
|
\echo '=================================================================='
|
|
\echo ' 5. Sanity: where did the missing mortality "go"?'
|
|
\echo '=================================================================='
|
|
\echo "Decompose the gap between the broad-cohort 14.6% and the"
|
|
\echo "canonical-cohort number from section 4. This shows how much"
|
|
\echo "of the gap is explained by each filter individually."
|
|
\echo
|
|
WITH s3 AS (
|
|
SELECT * FROM sepsis3_cohort_check WHERE sepsis3
|
|
)
|
|
SELECT 'all sepsis3' AS slice
|
|
, count(*) AS n
|
|
, round(100.0 * sum(died_in_hospital)/count(*), 1) AS pct_inhosp
|
|
FROM s3
|
|
UNION ALL SELECT 'NICU only',
|
|
count(*), round(100.0 * sum(died_in_hospital)/count(*), 1)
|
|
FROM s3 WHERE first_careunit = 'NICU'
|
|
UNION ALL SELECT 'age < 18 only',
|
|
count(*), round(100.0 * sum(died_in_hospital)/count(*), 1)
|
|
FROM s3 WHERE age_at_intime < 18
|
|
UNION ALL SELECT 're-admission only',
|
|
count(*), round(100.0 * sum(died_in_hospital)/count(*), 1)
|
|
FROM s3 WHERE icustay_seq > 1
|
|
UNION ALL SELECT 'LOS < 24h only',
|
|
count(*), round(100.0 * sum(died_in_hospital)/count(*), 1)
|
|
FROM s3 WHERE los_hours < 24;
|
|
|
|
\echo
|
|
\echo 'Done. Compare the section-4 result to Johnson 2018 (~21% in-hospital,'
|
|
\echo '~25% 30-day) for the headline check.'
|