-- ------------------------------------------------------------------ -- Mortality verification for Sepsis-3 in MIMIC-III v1.3. -- -- Usage: -- psql -d mimic -v ON_ERROR_STOP=1 \ -- -c 'SET search_path TO mimiciii, public;' \ -- -f sql/sepsis/mortality_checks.sql -- -- Purpose: -- `sanity_checks.sql` reported a 14.6% in-hospital mortality among -- Sepsis-3 = TRUE patients, well below the 25-35% range in the -- literature. The hypothesis was that the broad cohort (neonates, -- re-admissions, short stays included) drags the number down. -- -- This script walks an exclusion funnel and shows mortality at -- each step so you can confirm. It also shows 30-day mortality, -- stratification by age band, and a direct comparison with the -- numbers published in: -- -- Johnson AEW et al., Crit Care Med 2018. -- "A Comparative Analysis of Sepsis Identification Methods -- in an Electronic Database." -- Reported on MIMIC-III v1.4 with adult, first-ICU-stay, -- LOS >= 4 h cohort: -- n = 21 927 sepsis-3 stays -- in-hospital mortality = 21.0% -- 30-day mortality = 25.4% -- -- All ages use a clamp at 91 (MIMIC-III shifts DOB by 300 y for -- patients > 89; we treat them as 91 for stratification). -- ------------------------------------------------------------------ \set ON_ERROR_STOP on \timing on -- Build a working cohort table once with everything we need DROP TABLE IF EXISTS sepsis3_cohort_check; CREATE TEMP TABLE sepsis3_cohort_check AS SELECT ie.subject_id , ie.hadm_id , ie.icustay_id , ie.intime , ie.outtime , ie.first_careunit , EXTRACT(EPOCH FROM (ie.outtime - ie.intime)) / 3600.0 AS los_hours , LEAST( 91.0, EXTRACT(EPOCH FROM (ie.intime - pat.dob)) / (365.242 * 86400.0) ) AS age_at_intime , ROW_NUMBER() OVER ( PARTITION BY ie.subject_id ORDER BY ie.intime ) AS icustay_seq , adm.hospital_expire_flag AS died_in_hospital , (pat.dod IS NOT NULL AND pat.dod <= ie.intime + INTERVAL '30 days')::int AS died_within_30d , COALESCE(s.sepsis3, FALSE) AS sepsis3 FROM icustays ie JOIN admissions adm ON adm.hadm_id = ie.hadm_id JOIN patients pat ON pat.subject_id = ie.subject_id LEFT JOIN sepsis3 s ON s.icustay_id = ie.icustay_id; CREATE INDEX ON sepsis3_cohort_check (icustay_id); ANALYZE sepsis3_cohort_check; \echo \echo '==================================================================' \echo ' 1. Cohort exclusion funnel (incremental filtering)' \echo '==================================================================' \echo "Each row applies an additional restriction. The 'sepsis3' columns" \echo "report stats among rows where sepsis3 = TRUE within that cohort." \echo \echo "EXPECTED progression: as we narrow to the canonical adult/first-stay/" \echo "LOS >= 24h cohort, in-hospital mortality among Sepsis-3 should rise" \echo "from ~14% toward ~25-30%." \echo WITH levels AS ( -- 0. Everyone SELECT 0 AS lvl, '0. all icustays' AS step , c.* FROM sepsis3_cohort_check c UNION ALL -- 1. Exclude neonatal ICU SELECT 1, '1. + exclude NICU' , c.* FROM sepsis3_cohort_check c WHERE c.first_careunit != 'NICU' UNION ALL -- 2. Adult (age >= 18) on top of (1) SELECT 2, '2. + age >= 18' , c.* FROM sepsis3_cohort_check c WHERE c.first_careunit != 'NICU' AND c.age_at_intime >= 18 UNION ALL -- 3. First ICU stay only on top of (2) SELECT 3, '3. + first ICU stay only' , c.* FROM sepsis3_cohort_check c WHERE c.first_careunit != 'NICU' AND c.age_at_intime >= 18 AND c.icustay_seq = 1 UNION ALL -- 4. LOS >= 24h on top of (3) -- the canonical Seymour cohort SELECT 4, '4. + LOS >= 24 h (canonical)' , c.* FROM sepsis3_cohort_check c WHERE c.first_careunit != 'NICU' AND c.age_at_intime >= 18 AND c.icustay_seq = 1 AND c.los_hours >= 24 ) SELECT lvl , step , count(*) AS n_total , sum(CASE WHEN sepsis3 THEN 1 ELSE 0 END) AS n_sepsis3 , round(100.0 * sum(CASE WHEN sepsis3 THEN 1 ELSE 0 END) / count(*), 1) AS pct_sepsis3 -- in-hospital mortality among Sepsis-3 = TRUE , round(100.0 * sum(CASE WHEN sepsis3 AND died_in_hospital = 1 THEN 1 ELSE 0 END) / NULLIF(sum(CASE WHEN sepsis3 THEN 1 ELSE 0 END), 0), 1) AS sep_inhosp_pct -- 30-day mortality among Sepsis-3 = TRUE , round(100.0 * sum(CASE WHEN sepsis3 AND died_within_30d = 1 THEN 1 ELSE 0 END) / NULLIF(sum(CASE WHEN sepsis3 THEN 1 ELSE 0 END), 0), 1) AS sep_30d_pct -- in-hospital mortality among NOT Sepsis-3 , round(100.0 * sum(CASE WHEN NOT sepsis3 AND died_in_hospital = 1 THEN 1 ELSE 0 END) / NULLIF(sum(CASE WHEN NOT sepsis3 THEN 1 ELSE 0 END), 0), 1) AS nonsep_inhosp_pct FROM levels GROUP BY lvl, step ORDER BY lvl; \echo \echo '==================================================================' \echo ' 2. Mortality stratified by age band, canonical cohort only' \echo '==================================================================' \echo "EXPECTED:" \echo " Mortality among Sepsis-3 = TRUE rises monotonically with age." \echo " Adults < 30: ~10-15%" \echo " 30 - 50: ~15-20%" \echo " 50 - 70: ~20-25%" \echo " 70 - 90+: ~30-40%" \echo SELECT CASE WHEN age_at_intime < 30 THEN '1. <30' WHEN age_at_intime < 50 THEN '2. 30-49' WHEN age_at_intime < 70 THEN '3. 50-69' WHEN age_at_intime < 90 THEN '4. 70-89' ELSE '5. 90+' END AS age_band , count(*) AS n_sepsis3 , round(100.0 * sum(died_in_hospital) / count(*), 1) AS pct_inhosp , round(100.0 * sum(died_within_30d) / count(*), 1) AS pct_30d FROM sepsis3_cohort_check WHERE sepsis3 AND first_careunit != 'NICU' AND age_at_intime >= 18 AND icustay_seq = 1 AND los_hours >= 24 GROUP BY 1 ORDER BY 1; \echo \echo '==================================================================' \echo ' 3. Mortality stratified by max-SOFA day-1 (canonical cohort)' \echo '==================================================================' \echo "EXPECTED:" \echo " Mortality should rise monotonically with SOFA. This is the" \echo " classic dose-response curve of organ dysfunction vs death," \echo " and is the strongest semantic check that the SOFA pipeline" \echo " itself is computing the right thing." \echo " SOFA 0-3: ~5-10%" \echo " SOFA 4-7: ~15-25%" \echo " SOFA 8-11: ~30-40%" \echo " SOFA 12+: ~50-65%" \echo WITH d1 AS ( SELECT icustay_id, max(sofa_24hours) AS d1_sofa FROM sofa_hourly WHERE hr <= 24 GROUP BY icustay_id ) SELECT CASE WHEN d1_sofa <= 3 THEN '1. 0-3' WHEN d1_sofa <= 7 THEN '2. 4-7' WHEN d1_sofa <= 11 THEN '3. 8-11' ELSE '4. 12+' END AS sofa_band , count(*) AS n , round(100.0 * sum(died_in_hospital) / count(*), 1) AS pct_inhosp , round(100.0 * sum(died_within_30d) / count(*), 1) AS pct_30d FROM sepsis3_cohort_check c JOIN d1 USING (icustay_id) WHERE first_careunit != 'NICU' AND age_at_intime >= 18 AND icustay_seq = 1 AND los_hours >= 24 GROUP BY 1 ORDER BY 1; \echo \echo '==================================================================' \echo ' 4. Direct comparison with Johnson 2018 (canonical cohort)' \echo '==================================================================' \echo "Johnson 2018 published numbers for Sepsis-3 on MIMIC-III v1.4" \echo "with the cohort: adult, first ICU stay only, LOS >= 4 h." \echo "They reported:" \echo " n_sepsis3 = 21 927" \echo " in-hospital pct_died = 21.0%" \echo " 30-day pct_died = 25.4%" \echo \echo "We use LOS >= 24 h here (the more common Seymour 2016 cutoff)," \echo "so our n will be a bit smaller and our mortality slightly" \echo "higher than Johnson's." \echo SELECT count(*) AS n_sepsis3 , round(100.0 * sum(died_in_hospital) / count(*), 1) AS pct_inhosp , round(100.0 * sum(died_within_30d) / count(*), 1) AS pct_30d FROM sepsis3_cohort_check WHERE sepsis3 AND first_careunit != 'NICU' AND age_at_intime >= 18 AND icustay_seq = 1 AND los_hours >= 24; \echo \echo '==================================================================' \echo ' 5. Sanity: where did the missing mortality "go"?' \echo '==================================================================' \echo "Decompose the gap between the broad-cohort 14.6% and the" \echo "canonical-cohort number from section 4. This shows how much" \echo "of the gap is explained by each filter individually." \echo WITH s3 AS ( SELECT * FROM sepsis3_cohort_check WHERE sepsis3 ) SELECT 'all sepsis3' AS slice , count(*) AS n , round(100.0 * sum(died_in_hospital)/count(*), 1) AS pct_inhosp FROM s3 UNION ALL SELECT 'NICU only', count(*), round(100.0 * sum(died_in_hospital)/count(*), 1) FROM s3 WHERE first_careunit = 'NICU' UNION ALL SELECT 'age < 18 only', count(*), round(100.0 * sum(died_in_hospital)/count(*), 1) FROM s3 WHERE age_at_intime < 18 UNION ALL SELECT 're-admission only', count(*), round(100.0 * sum(died_in_hospital)/count(*), 1) FROM s3 WHERE icustay_seq > 1 UNION ALL SELECT 'LOS < 24h only', count(*), round(100.0 * sum(died_in_hospital)/count(*), 1) FROM s3 WHERE los_hours < 24; \echo \echo 'Done. Compare the section-4 result to Johnson 2018 (~21% in-hospital,' \echo '~25% 30-day) for the headline check.'