Files
mimic-iv-concepts-port/sql/sepsis/mortality_checks.sql

266 lines
10 KiB
MySQL
Raw Normal View History

2026-05-05 10:22:17 +02:00
-- ------------------------------------------------------------------
-- Mortality verification for Sepsis-3 in MIMIC-III v1.3.
--
-- Usage:
-- psql -d mimic -v ON_ERROR_STOP=1 \
-- -c 'SET search_path TO mimiciii, public;' \
-- -f sql/sepsis/mortality_checks.sql
--
-- Purpose:
-- `sanity_checks.sql` reported a 14.6% in-hospital mortality among
-- Sepsis-3 = TRUE patients, well below the 25-35% range in the
-- literature. The hypothesis was that the broad cohort (neonates,
-- re-admissions, short stays included) drags the number down.
--
-- This script walks an exclusion funnel and shows mortality at
-- each step so you can confirm. It also shows 30-day mortality,
-- stratification by age band, and a direct comparison with the
-- numbers published in:
--
-- Johnson AEW et al., Crit Care Med 2018.
-- "A Comparative Analysis of Sepsis Identification Methods
-- in an Electronic Database."
-- Reported on MIMIC-III v1.4 with adult, first-ICU-stay,
-- LOS >= 4 h cohort:
-- n = 21 927 sepsis-3 stays
-- in-hospital mortality = 21.0%
-- 30-day mortality = 25.4%
--
-- All ages use a clamp at 91 (MIMIC-III shifts DOB by 300 y for
-- patients > 89; we treat them as 91 for stratification).
-- ------------------------------------------------------------------
\set ON_ERROR_STOP on
\timing on
-- Build a working cohort table once with everything we need
DROP TABLE IF EXISTS sepsis3_cohort_check;
CREATE TEMP TABLE sepsis3_cohort_check AS
SELECT ie.subject_id
, ie.hadm_id
, ie.icustay_id
, ie.intime
, ie.outtime
, ie.first_careunit
, EXTRACT(EPOCH FROM (ie.outtime - ie.intime)) / 3600.0 AS los_hours
, LEAST(
91.0,
EXTRACT(EPOCH FROM (ie.intime - pat.dob))
/ (365.242 * 86400.0)
) AS age_at_intime
, ROW_NUMBER() OVER (
PARTITION BY ie.subject_id
ORDER BY ie.intime
) AS icustay_seq
, adm.hospital_expire_flag AS died_in_hospital
, (pat.dod IS NOT NULL
AND pat.dod <= ie.intime + INTERVAL '30 days')::int AS died_within_30d
, COALESCE(s.sepsis3, FALSE) AS sepsis3
FROM icustays ie
JOIN admissions adm ON adm.hadm_id = ie.hadm_id
JOIN patients pat ON pat.subject_id = ie.subject_id
LEFT JOIN sepsis3 s ON s.icustay_id = ie.icustay_id;
CREATE INDEX ON sepsis3_cohort_check (icustay_id);
ANALYZE sepsis3_cohort_check;
\echo
\echo '=================================================================='
\echo ' 1. Cohort exclusion funnel (incremental filtering)'
\echo '=================================================================='
\echo "Each row applies an additional restriction. The 'sepsis3' columns"
\echo "report stats among rows where sepsis3 = TRUE within that cohort."
\echo
\echo "EXPECTED progression: as we narrow to the canonical adult/first-stay/"
\echo "LOS >= 24h cohort, in-hospital mortality among Sepsis-3 should rise"
\echo "from ~14% toward ~25-30%."
\echo
WITH levels AS (
-- 0. Everyone
SELECT 0 AS lvl, '0. all icustays' AS step
, c.* FROM sepsis3_cohort_check c
UNION ALL
-- 1. Exclude neonatal ICU
SELECT 1, '1. + exclude NICU'
, c.* FROM sepsis3_cohort_check c
WHERE c.first_careunit != 'NICU'
UNION ALL
-- 2. Adult (age >= 18) on top of (1)
SELECT 2, '2. + age >= 18'
, c.* FROM sepsis3_cohort_check c
WHERE c.first_careunit != 'NICU'
AND c.age_at_intime >= 18
UNION ALL
-- 3. First ICU stay only on top of (2)
SELECT 3, '3. + first ICU stay only'
, c.* FROM sepsis3_cohort_check c
WHERE c.first_careunit != 'NICU'
AND c.age_at_intime >= 18
AND c.icustay_seq = 1
UNION ALL
-- 4. LOS >= 24h on top of (3) -- the canonical Seymour cohort
SELECT 4, '4. + LOS >= 24 h (canonical)'
, c.* FROM sepsis3_cohort_check c
WHERE c.first_careunit != 'NICU'
AND c.age_at_intime >= 18
AND c.icustay_seq = 1
AND c.los_hours >= 24
)
SELECT lvl
, step
, count(*) AS n_total
, sum(CASE WHEN sepsis3 THEN 1 ELSE 0 END) AS n_sepsis3
, round(100.0 * sum(CASE WHEN sepsis3 THEN 1 ELSE 0 END)
/ count(*), 1) AS pct_sepsis3
-- in-hospital mortality among Sepsis-3 = TRUE
, round(100.0 * sum(CASE WHEN sepsis3 AND died_in_hospital = 1
THEN 1 ELSE 0 END)
/ NULLIF(sum(CASE WHEN sepsis3 THEN 1 ELSE 0 END), 0), 1)
AS sep_inhosp_pct
-- 30-day mortality among Sepsis-3 = TRUE
, round(100.0 * sum(CASE WHEN sepsis3 AND died_within_30d = 1
THEN 1 ELSE 0 END)
/ NULLIF(sum(CASE WHEN sepsis3 THEN 1 ELSE 0 END), 0), 1)
AS sep_30d_pct
-- in-hospital mortality among NOT Sepsis-3
, round(100.0 * sum(CASE WHEN NOT sepsis3 AND died_in_hospital = 1
THEN 1 ELSE 0 END)
/ NULLIF(sum(CASE WHEN NOT sepsis3 THEN 1 ELSE 0 END), 0), 1)
AS nonsep_inhosp_pct
FROM levels
GROUP BY lvl, step
ORDER BY lvl;
\echo
\echo '=================================================================='
\echo ' 2. Mortality stratified by age band, canonical cohort only'
\echo '=================================================================='
\echo "EXPECTED:"
\echo " Mortality among Sepsis-3 = TRUE rises monotonically with age."
\echo " Adults < 30: ~10-15%"
\echo " 30 - 50: ~15-20%"
\echo " 50 - 70: ~20-25%"
\echo " 70 - 90+: ~30-40%"
\echo
SELECT CASE
WHEN age_at_intime < 30 THEN '1. <30'
WHEN age_at_intime < 50 THEN '2. 30-49'
WHEN age_at_intime < 70 THEN '3. 50-69'
WHEN age_at_intime < 90 THEN '4. 70-89'
ELSE '5. 90+'
END AS age_band
, count(*) AS n_sepsis3
, round(100.0 * sum(died_in_hospital) / count(*), 1) AS pct_inhosp
, round(100.0 * sum(died_within_30d) / count(*), 1) AS pct_30d
FROM sepsis3_cohort_check
WHERE sepsis3
AND first_careunit != 'NICU'
AND age_at_intime >= 18
AND icustay_seq = 1
AND los_hours >= 24
GROUP BY 1
ORDER BY 1;
\echo
\echo '=================================================================='
\echo ' 3. Mortality stratified by max-SOFA day-1 (canonical cohort)'
\echo '=================================================================='
\echo "EXPECTED:"
\echo " Mortality should rise monotonically with SOFA. This is the"
\echo " classic dose-response curve of organ dysfunction vs death,"
\echo " and is the strongest semantic check that the SOFA pipeline"
\echo " itself is computing the right thing."
\echo " SOFA 0-3: ~5-10%"
\echo " SOFA 4-7: ~15-25%"
\echo " SOFA 8-11: ~30-40%"
\echo " SOFA 12+: ~50-65%"
\echo
WITH d1 AS (
SELECT icustay_id, max(sofa_24hours) AS d1_sofa
FROM sofa_hourly
WHERE hr <= 24
GROUP BY icustay_id
)
SELECT CASE
WHEN d1_sofa <= 3 THEN '1. 0-3'
WHEN d1_sofa <= 7 THEN '2. 4-7'
WHEN d1_sofa <= 11 THEN '3. 8-11'
ELSE '4. 12+'
END AS sofa_band
, count(*) AS n
, round(100.0 * sum(died_in_hospital) / count(*), 1) AS pct_inhosp
, round(100.0 * sum(died_within_30d) / count(*), 1) AS pct_30d
FROM sepsis3_cohort_check c
JOIN d1 USING (icustay_id)
WHERE first_careunit != 'NICU'
AND age_at_intime >= 18
AND icustay_seq = 1
AND los_hours >= 24
GROUP BY 1
ORDER BY 1;
\echo
\echo '=================================================================='
\echo ' 4. Direct comparison with Johnson 2018 (canonical cohort)'
\echo '=================================================================='
\echo "Johnson 2018 published numbers for Sepsis-3 on MIMIC-III v1.4"
\echo "with the cohort: adult, first ICU stay only, LOS >= 4 h."
\echo "They reported:"
\echo " n_sepsis3 = 21 927"
\echo " in-hospital pct_died = 21.0%"
\echo " 30-day pct_died = 25.4%"
\echo
\echo "We use LOS >= 24 h here (the more common Seymour 2016 cutoff),"
\echo "so our n will be a bit smaller and our mortality slightly"
\echo "higher than Johnson's."
\echo
SELECT count(*) AS n_sepsis3
, round(100.0 * sum(died_in_hospital) / count(*), 1) AS pct_inhosp
, round(100.0 * sum(died_within_30d) / count(*), 1) AS pct_30d
FROM sepsis3_cohort_check
WHERE sepsis3
AND first_careunit != 'NICU'
AND age_at_intime >= 18
AND icustay_seq = 1
AND los_hours >= 24;
\echo
\echo '=================================================================='
\echo ' 5. Sanity: where did the missing mortality "go"?'
\echo '=================================================================='
\echo "Decompose the gap between the broad-cohort 14.6% and the"
\echo "canonical-cohort number from section 4. This shows how much"
\echo "of the gap is explained by each filter individually."
\echo
WITH s3 AS (
SELECT * FROM sepsis3_cohort_check WHERE sepsis3
)
SELECT 'all sepsis3' AS slice
, count(*) AS n
, round(100.0 * sum(died_in_hospital)/count(*), 1) AS pct_inhosp
FROM s3
UNION ALL SELECT 'NICU only',
count(*), round(100.0 * sum(died_in_hospital)/count(*), 1)
FROM s3 WHERE first_careunit = 'NICU'
UNION ALL SELECT 'age < 18 only',
count(*), round(100.0 * sum(died_in_hospital)/count(*), 1)
FROM s3 WHERE age_at_intime < 18
UNION ALL SELECT 're-admission only',
count(*), round(100.0 * sum(died_in_hospital)/count(*), 1)
FROM s3 WHERE icustay_seq > 1
UNION ALL SELECT 'LOS < 24h only',
count(*), round(100.0 * sum(died_in_hospital)/count(*), 1)
FROM s3 WHERE los_hours < 24;
\echo
\echo 'Done. Compare the section-4 result to Johnson 2018 (~21% in-hospital,'
\echo '~25% 30-day) for the headline check.'