This commit is contained in:
2026-05-05 10:22:17 +02:00
commit 9a24865016
30 changed files with 5735 additions and 0 deletions

215
sql/sepsis/antibiotic.sql Normal file
View File

@@ -0,0 +1,215 @@
-- ------------------------------------------------------------------
-- Title: Antibiotic prescriptions
--
-- Adapted from the MIMIC-IV upstream
-- concepts/medication/antibiotic.sql
-- and ported to MIMIC-III v1.3 vanilla PostgreSQL.
--
-- PORT NOTES:
-- 1. MIMIC-III `prescriptions` has DATE-precision `startdate` /
-- `enddate` instead of MIMIC-IV's TIMESTAMP `starttime` /
-- `stoptime`. We carry these through unchanged; downstream
-- consumers (suspicion_of_infection.sql) treat the antibiotic
-- time as the start-of-day timestamp of `startdate`.
-- 2. MIMIC-III `prescriptions` already has a populated `icustay_id`
-- column on most rows, so we can pick the ICU stay directly
-- rather than recomputing from `intime`/`outtime`. We still
-- back-fill it from `icustays` where missing, the same way
-- MIMIC-IV does.
-- ------------------------------------------------------------------
DROP TABLE IF EXISTS antibiotic;
CREATE TABLE antibiotic AS
WITH abx AS
(
SELECT DISTINCT
drug
, route
, CASE
WHEN LOWER(drug) LIKE '%adoxa%' THEN 1
WHEN LOWER(drug) LIKE '%ala-tet%' THEN 1
WHEN LOWER(drug) LIKE '%alodox%' THEN 1
WHEN LOWER(drug) LIKE '%amikacin%' THEN 1
WHEN LOWER(drug) LIKE '%amikin%' THEN 1
WHEN LOWER(drug) LIKE '%amoxicill%' THEN 1
WHEN LOWER(drug) LIKE '%amphotericin%' THEN 1
WHEN LOWER(drug) LIKE '%anidulafungin%' THEN 1
WHEN LOWER(drug) LIKE '%ancef%' THEN 1
WHEN LOWER(drug) LIKE '%clavulanate%' THEN 1
WHEN LOWER(drug) LIKE '%ampicillin%' THEN 1
WHEN LOWER(drug) LIKE '%augmentin%' THEN 1
WHEN LOWER(drug) LIKE '%avelox%' THEN 1
WHEN LOWER(drug) LIKE '%avidoxy%' THEN 1
WHEN LOWER(drug) LIKE '%azactam%' THEN 1
WHEN LOWER(drug) LIKE '%azithromycin%' THEN 1
WHEN LOWER(drug) LIKE '%aztreonam%' THEN 1
WHEN LOWER(drug) LIKE '%axetil%' THEN 1
WHEN LOWER(drug) LIKE '%bactocill%' THEN 1
WHEN LOWER(drug) LIKE '%bactrim%' THEN 1
WHEN LOWER(drug) LIKE '%bactroban%' THEN 1
WHEN LOWER(drug) LIKE '%bethkis%' THEN 1
WHEN LOWER(drug) LIKE '%biaxin%' THEN 1
WHEN LOWER(drug) LIKE '%bicillin l-a%' THEN 1
WHEN LOWER(drug) LIKE '%cayston%' THEN 1
WHEN LOWER(drug) LIKE '%cefazolin%' THEN 1
WHEN LOWER(drug) LIKE '%cedax%' THEN 1
WHEN LOWER(drug) LIKE '%cefoxitin%' THEN 1
WHEN LOWER(drug) LIKE '%ceftazidime%' THEN 1
WHEN LOWER(drug) LIKE '%cefaclor%' THEN 1
WHEN LOWER(drug) LIKE '%cefadroxil%' THEN 1
WHEN LOWER(drug) LIKE '%cefdinir%' THEN 1
WHEN LOWER(drug) LIKE '%cefditoren%' THEN 1
WHEN LOWER(drug) LIKE '%cefepime%' THEN 1
WHEN LOWER(drug) LIKE '%cefotan%' THEN 1
WHEN LOWER(drug) LIKE '%cefotetan%' THEN 1
WHEN LOWER(drug) LIKE '%cefotaxime%' THEN 1
WHEN LOWER(drug) LIKE '%ceftaroline%' THEN 1
WHEN LOWER(drug) LIKE '%cefpodoxime%' THEN 1
WHEN LOWER(drug) LIKE '%cefpirome%' THEN 1
WHEN LOWER(drug) LIKE '%cefprozil%' THEN 1
WHEN LOWER(drug) LIKE '%ceftibuten%' THEN 1
WHEN LOWER(drug) LIKE '%ceftin%' THEN 1
WHEN LOWER(drug) LIKE '%ceftriaxone%' THEN 1
WHEN LOWER(drug) LIKE '%cefuroxime%' THEN 1
WHEN LOWER(drug) LIKE '%cephalexin%' THEN 1
WHEN LOWER(drug) LIKE '%cephalothin%' THEN 1
WHEN LOWER(drug) LIKE '%cephapririn%' THEN 1
WHEN LOWER(drug) LIKE '%chloramphenicol%' THEN 1
WHEN LOWER(drug) LIKE '%cipro%' THEN 1
WHEN LOWER(drug) LIKE '%ciprofloxacin%' THEN 1
WHEN LOWER(drug) LIKE '%claforan%' THEN 1
WHEN LOWER(drug) LIKE '%clarithromycin%' THEN 1
WHEN LOWER(drug) LIKE '%cleocin%' THEN 1
WHEN LOWER(drug) LIKE '%clindamycin%' THEN 1
WHEN LOWER(drug) LIKE '%cubicin%' THEN 1
WHEN LOWER(drug) LIKE '%dicloxacillin%' THEN 1
WHEN LOWER(drug) LIKE '%dirithromycin%' THEN 1
WHEN LOWER(drug) LIKE '%doryx%' THEN 1
WHEN LOWER(drug) LIKE '%doxycy%' THEN 1
WHEN LOWER(drug) LIKE '%duricef%' THEN 1
WHEN LOWER(drug) LIKE '%dynacin%' THEN 1
WHEN LOWER(drug) LIKE '%ery-tab%' THEN 1
WHEN LOWER(drug) LIKE '%eryped%' THEN 1
WHEN LOWER(drug) LIKE '%eryc%' THEN 1
WHEN LOWER(drug) LIKE '%erythrocin%' THEN 1
WHEN LOWER(drug) LIKE '%erythromycin%' THEN 1
WHEN LOWER(drug) LIKE '%factive%' THEN 1
WHEN LOWER(drug) LIKE '%flagyl%' THEN 1
WHEN LOWER(drug) LIKE '%fortaz%' THEN 1
WHEN LOWER(drug) LIKE '%furadantin%' THEN 1
WHEN LOWER(drug) LIKE '%garamycin%' THEN 1
WHEN LOWER(drug) LIKE '%gentamicin%' THEN 1
WHEN LOWER(drug) LIKE '%kanamycin%' THEN 1
WHEN LOWER(drug) LIKE '%keflex%' THEN 1
WHEN LOWER(drug) LIKE '%kefzol%' THEN 1
WHEN LOWER(drug) LIKE '%ketek%' THEN 1
WHEN LOWER(drug) LIKE '%levaquin%' THEN 1
WHEN LOWER(drug) LIKE '%levofloxacin%' THEN 1
WHEN LOWER(drug) LIKE '%lincocin%' THEN 1
WHEN LOWER(drug) LIKE '%linezolid%' THEN 1
WHEN LOWER(drug) LIKE '%macrobid%' THEN 1
WHEN LOWER(drug) LIKE '%macrodantin%' THEN 1
WHEN LOWER(drug) LIKE '%maxipime%' THEN 1
WHEN LOWER(drug) LIKE '%mefoxin%' THEN 1
WHEN LOWER(drug) LIKE '%metronidazole%' THEN 1
WHEN LOWER(drug) LIKE '%meropenem%' THEN 1
WHEN LOWER(drug) LIKE '%methicillin%' THEN 1
WHEN LOWER(drug) LIKE '%minocin%' THEN 1
WHEN LOWER(drug) LIKE '%minocycline%' THEN 1
WHEN LOWER(drug) LIKE '%monodox%' THEN 1
WHEN LOWER(drug) LIKE '%monurol%' THEN 1
WHEN LOWER(drug) LIKE '%morgidox%' THEN 1
WHEN LOWER(drug) LIKE '%moxatag%' THEN 1
WHEN LOWER(drug) LIKE '%moxifloxacin%' THEN 1
WHEN LOWER(drug) LIKE '%mupirocin%' THEN 1
WHEN LOWER(drug) LIKE '%myrac%' THEN 1
WHEN LOWER(drug) LIKE '%nafcillin%' THEN 1
WHEN LOWER(drug) LIKE '%neomycin%' THEN 1
WHEN LOWER(drug) LIKE '%nicazel doxy 30%' THEN 1
WHEN LOWER(drug) LIKE '%nitrofurantoin%' THEN 1
WHEN LOWER(drug) LIKE '%norfloxacin%' THEN 1
WHEN LOWER(drug) LIKE '%noroxin%' THEN 1
WHEN LOWER(drug) LIKE '%ocudox%' THEN 1
WHEN LOWER(drug) LIKE '%ofloxacin%' THEN 1
WHEN LOWER(drug) LIKE '%omnicef%' THEN 1
WHEN LOWER(drug) LIKE '%oracea%' THEN 1
WHEN LOWER(drug) LIKE '%oraxyl%' THEN 1
WHEN LOWER(drug) LIKE '%oxacillin%' THEN 1
WHEN LOWER(drug) LIKE '%pc pen vk%' THEN 1
WHEN LOWER(drug) LIKE '%pce dispertab%' THEN 1
WHEN LOWER(drug) LIKE '%panixine%' THEN 1
WHEN LOWER(drug) LIKE '%pediazole%' THEN 1
WHEN LOWER(drug) LIKE '%penicillin%' THEN 1
WHEN LOWER(drug) LIKE '%periostat%' THEN 1
WHEN LOWER(drug) LIKE '%pfizerpen%' THEN 1
WHEN LOWER(drug) LIKE '%piperacillin%' THEN 1
WHEN LOWER(drug) LIKE '%tazobactam%' THEN 1
WHEN LOWER(drug) LIKE '%primsol%' THEN 1
WHEN LOWER(drug) LIKE '%proquin%' THEN 1
WHEN LOWER(drug) LIKE '%raniclor%' THEN 1
WHEN LOWER(drug) LIKE '%rifadin%' THEN 1
WHEN LOWER(drug) LIKE '%rifampin%' THEN 1
WHEN LOWER(drug) LIKE '%rocephin%' THEN 1
WHEN LOWER(drug) LIKE '%smz-tmp%' THEN 1
WHEN LOWER(drug) LIKE '%septra%' THEN 1
WHEN LOWER(drug) LIKE '%septra ds%' THEN 1
WHEN LOWER(drug) LIKE '%solodyn%' THEN 1
WHEN LOWER(drug) LIKE '%spectracef%' THEN 1
WHEN LOWER(drug) LIKE '%streptomycin%' THEN 1
WHEN LOWER(drug) LIKE '%sulfadiazine%' THEN 1
WHEN LOWER(drug) LIKE '%sulfamethoxazole%' THEN 1
WHEN LOWER(drug) LIKE '%trimethoprim%' THEN 1
WHEN LOWER(drug) LIKE '%sulfatrim%' THEN 1
WHEN LOWER(drug) LIKE '%sulfisoxazole%' THEN 1
WHEN LOWER(drug) LIKE '%suprax%' THEN 1
WHEN LOWER(drug) LIKE '%synercid%' THEN 1
WHEN LOWER(drug) LIKE '%tazicef%' THEN 1
WHEN LOWER(drug) LIKE '%tetracycline%' THEN 1
WHEN LOWER(drug) LIKE '%timentin%' THEN 1
WHEN LOWER(drug) LIKE '%tobramycin%' THEN 1
WHEN LOWER(drug) LIKE '%unasyn%' THEN 1
WHEN LOWER(drug) LIKE '%vancocin%' THEN 1
WHEN LOWER(drug) LIKE '%vancomycin%' THEN 1
WHEN LOWER(drug) LIKE '%vantin%' THEN 1
WHEN LOWER(drug) LIKE '%vibativ%' THEN 1
WHEN LOWER(drug) LIKE '%vibra-tabs%' THEN 1
WHEN LOWER(drug) LIKE '%vibramycin%' THEN 1
WHEN LOWER(drug) LIKE '%zinacef%' THEN 1
WHEN LOWER(drug) LIKE '%zithromax%' THEN 1
WHEN LOWER(drug) LIKE '%zosyn%' THEN 1
WHEN LOWER(drug) LIKE '%zyvox%' THEN 1
ELSE 0
END AS antibiotic
FROM prescriptions
WHERE drug_type NOT IN ('BASE')
-- match upstream: NULL routes are excluded by the NOT IN.
AND route NOT IN ('OU','OS','OD','AU','AS','AD','TP')
AND LOWER(route) NOT LIKE '%ear%'
AND LOWER(route) NOT LIKE '%eye%'
AND LOWER(drug) NOT LIKE '%cream%'
AND LOWER(drug) NOT LIKE '%desensitization%'
AND LOWER(drug) NOT LIKE '%ophth oint%'
AND LOWER(drug) NOT LIKE '%gel%'
)
SELECT pr.subject_id
, pr.hadm_id
, COALESCE(pr.icustay_id, ie.icustay_id) AS icustay_id
, pr.drug AS antibiotic
, pr.route
-- DATE-precision in MIMIC-III; downstream treats this as the
-- start-of-day timestamp.
, pr.startdate AS starttime
, pr.enddate AS stoptime
FROM prescriptions pr
INNER JOIN abx
ON pr.drug = abx.drug
AND pr.route = abx.route
LEFT JOIN icustays ie
ON pr.hadm_id = ie.hadm_id
AND pr.startdate >= CAST(ie.intime AS DATE)
AND pr.startdate <= CAST(ie.outtime AS DATE)
WHERE abx.antibiotic = 1;
CREATE INDEX IF NOT EXISTS antibiotic_idx
ON antibiotic (subject_id, hadm_id, starttime);

View File

@@ -0,0 +1,230 @@
-- ------------------------------------------------------------------
-- All-time arterial blood-gas pivot (PaO2 / FiO2 ratio at every gas).
--
-- This script is a fusion of the upstream MIMIC-III concepts_postgres
-- files
-- firstday/blood_gas_first_day.sql
-- firstday/blood_gas_first_day_arterial.sql
-- with their day-1 time predicate removed, so we get one row per
-- (icustay_id, charttime) for the entire ICU stay.
--
-- Output table: blood_gas_arterial
-- Output cols : subject_id, hadm_id, icustay_id, charttime,
-- specimen, specimen_pred, specimen_prob,
-- so2, spo2, po2, pco2, fio2_chartevents, fio2,
-- aado2, aado2_calc, pao2fio2, ph, baseexcess,
-- bicarbonate, totalco2, hematocrit, hemoglobin,
-- carboxyhemoglobin, methemoglobin, chloride, calcium,
-- temperature, potassium, sodium, lactate, glucose,
-- intubated, tidalvolume, ventilationrate, ventilator,
-- peep, o2flow, requiredo2
--
-- Restricted to *arterial* samples (specimen = 'ART' or
-- specimen_prob > 0.75).
-- ------------------------------------------------------------------
DROP TABLE IF EXISTS blood_gas_arterial;
CREATE TABLE blood_gas_arterial AS
WITH bg_pvt AS
(
SELECT ie.subject_id, ie.hadm_id, ie.icustay_id
, CASE
WHEN itemid = 50800 THEN 'SPECIMEN'
WHEN itemid = 50801 THEN 'AADO2'
WHEN itemid = 50802 THEN 'BASEEXCESS'
WHEN itemid = 50803 THEN 'BICARBONATE'
WHEN itemid = 50804 THEN 'TOTALCO2'
WHEN itemid = 50805 THEN 'CARBOXYHEMOGLOBIN'
WHEN itemid = 50806 THEN 'CHLORIDE'
WHEN itemid = 50808 THEN 'CALCIUM'
WHEN itemid = 50809 THEN 'GLUCOSE'
WHEN itemid = 50810 THEN 'HEMATOCRIT'
WHEN itemid = 50811 THEN 'HEMOGLOBIN'
WHEN itemid = 50812 THEN 'INTUBATED'
WHEN itemid = 50813 THEN 'LACTATE'
WHEN itemid = 50814 THEN 'METHEMOGLOBIN'
WHEN itemid = 50815 THEN 'O2FLOW'
WHEN itemid = 50816 THEN 'FIO2'
WHEN itemid = 50817 THEN 'SO2'
WHEN itemid = 50818 THEN 'PCO2'
WHEN itemid = 50819 THEN 'PEEP'
WHEN itemid = 50820 THEN 'PH'
WHEN itemid = 50821 THEN 'PO2'
WHEN itemid = 50822 THEN 'POTASSIUM'
WHEN itemid = 50823 THEN 'REQUIREDO2'
WHEN itemid = 50824 THEN 'SODIUM'
WHEN itemid = 50825 THEN 'TEMPERATURE'
WHEN itemid = 50826 THEN 'TIDALVOLUME'
WHEN itemid = 50827 THEN 'VENTILATIONRATE'
WHEN itemid = 50828 THEN 'VENTILATOR'
ELSE NULL
END AS label
, le.charttime
, le.value
, CASE
WHEN valuenum <= 0 AND itemid != 50802 THEN NULL
WHEN itemid = 50810 AND valuenum > 100 THEN NULL
WHEN itemid = 50816 AND valuenum < 20 THEN NULL
WHEN itemid = 50816 AND valuenum > 100 THEN NULL
WHEN itemid = 50817 AND valuenum > 100 THEN NULL
WHEN itemid = 50815 AND valuenum > 70 THEN NULL
WHEN itemid = 50821 AND valuenum > 800 THEN NULL
ELSE valuenum
END AS valuenum
FROM icustays ie
INNER JOIN labevents le
ON le.subject_id = ie.subject_id
AND le.hadm_id = ie.hadm_id
AND le.charttime BETWEEN ie.intime AND ie.outtime
AND le.itemid IN (
50800,50801,50802,50803,50804,50805,50806,50807,50808,50809
, 50810,50811,50812,50813,50814,50815,50816,50817,50818,50819
, 50820,50821,50822,50823,50824,50825,50826,50827,50828
, 51545
)
)
, bg AS
(
SELECT subject_id, hadm_id, icustay_id, charttime
, MAX(CASE WHEN label = 'SPECIMEN' THEN value END) AS specimen
, MAX(CASE WHEN label = 'AADO2' THEN valuenum END) AS aado2
, MAX(CASE WHEN label = 'BASEEXCESS' THEN valuenum END) AS baseexcess
, MAX(CASE WHEN label = 'BICARBONATE' THEN valuenum END) AS bicarbonate
, MAX(CASE WHEN label = 'TOTALCO2' THEN valuenum END) AS totalco2
, MAX(CASE WHEN label = 'CARBOXYHEMOGLOBIN' THEN valuenum END) AS carboxyhemoglobin
, MAX(CASE WHEN label = 'CHLORIDE' THEN valuenum END) AS chloride
, MAX(CASE WHEN label = 'CALCIUM' THEN valuenum END) AS calcium
, MAX(CASE WHEN label = 'GLUCOSE' THEN valuenum END) AS glucose
, MAX(CASE WHEN label = 'HEMATOCRIT' THEN valuenum END) AS hematocrit
, MAX(CASE WHEN label = 'HEMOGLOBIN' THEN valuenum END) AS hemoglobin
, MAX(CASE WHEN label = 'INTUBATED' THEN valuenum END) AS intubated
, MAX(CASE WHEN label = 'LACTATE' THEN valuenum END) AS lactate
, MAX(CASE WHEN label = 'METHEMOGLOBIN' THEN valuenum END) AS methemoglobin
, MAX(CASE WHEN label = 'O2FLOW' THEN valuenum END) AS o2flow
, MAX(CASE WHEN label = 'FIO2' THEN valuenum END) AS fio2
, MAX(CASE WHEN label = 'SO2' THEN valuenum END) AS so2
, MAX(CASE WHEN label = 'PCO2' THEN valuenum END) AS pco2
, MAX(CASE WHEN label = 'PEEP' THEN valuenum END) AS peep
, MAX(CASE WHEN label = 'PH' THEN valuenum END) AS ph
, MAX(CASE WHEN label = 'PO2' THEN valuenum END) AS po2
, MAX(CASE WHEN label = 'POTASSIUM' THEN valuenum END) AS potassium
, MAX(CASE WHEN label = 'REQUIREDO2' THEN valuenum END) AS requiredo2
, MAX(CASE WHEN label = 'SODIUM' THEN valuenum END) AS sodium
, MAX(CASE WHEN label = 'TEMPERATURE' THEN valuenum END) AS temperature
, MAX(CASE WHEN label = 'TIDALVOLUME' THEN valuenum END) AS tidalvolume
, MAX(CASE WHEN label = 'VENTILATIONRATE' THEN valuenum END) AS ventilationrate
, MAX(CASE WHEN label = 'VENTILATOR' THEN valuenum END) AS ventilator
FROM bg_pvt
GROUP BY subject_id, hadm_id, icustay_id, charttime
)
, stg_spo2 AS
(
SELECT subject_id, hadm_id, icustay_id, charttime
, MAX(CASE WHEN valuenum <= 0 OR valuenum > 100 THEN NULL ELSE valuenum END) AS spo2
FROM chartevents
WHERE itemid IN (646, 220277)
GROUP BY subject_id, hadm_id, icustay_id, charttime
)
, stg_fio2 AS
(
SELECT subject_id, hadm_id, icustay_id, charttime
, MAX(
CASE
WHEN itemid = 223835 THEN
CASE
WHEN valuenum > 0 AND valuenum <= 1 THEN valuenum * 100
WHEN valuenum > 1 AND valuenum < 21 THEN NULL
WHEN valuenum >= 21 AND valuenum <= 100 THEN valuenum
ELSE NULL
END
WHEN itemid IN (3420, 3422) THEN valuenum
WHEN itemid = 190 AND valuenum > 0.20 AND valuenum < 1
THEN valuenum * 100
ELSE NULL
END
) AS fio2_chartevents
FROM chartevents
WHERE itemid IN (3420, 190, 223835, 3422)
AND COALESCE(error, 0) = 0
GROUP BY subject_id, hadm_id, icustay_id, charttime
)
, stg2 AS
(
SELECT bg.*
, ROW_NUMBER() OVER (
PARTITION BY bg.icustay_id, bg.charttime
ORDER BY s1.charttime DESC
) AS lastrowspo2
, s1.spo2
FROM bg
LEFT JOIN stg_spo2 s1
ON bg.icustay_id = s1.icustay_id
AND s1.charttime >= DATETIME_SUB(bg.charttime, INTERVAL '2' HOUR)
AND s1.charttime <= bg.charttime
WHERE bg.po2 IS NOT NULL
)
, stg3 AS
(
SELECT stg2.*
, ROW_NUMBER() OVER (
PARTITION BY stg2.icustay_id, stg2.charttime
ORDER BY s2.charttime DESC
) AS lastrowfio2
, s2.fio2_chartevents
, 1 / (1 + EXP(-(-0.02544
+ 0.04598 * po2
+ COALESCE(-0.15356 * spo2 , -0.15356 * 97.49420 + 0.13429)
+ COALESCE( 0.00621 * s2.fio2_chartevents, 0.00621 * 51.49550 + -0.24958)
+ COALESCE( 0.10559 * hemoglobin , 0.10559 * 10.32307 + 0.05954)
+ COALESCE( 0.13251 * so2 , 0.13251 * 93.66539 + -0.23172)
+ COALESCE(-0.01511 * pco2 , -0.01511 * 42.08866 + -0.01630)
+ COALESCE( 0.01480 * fio2 , 0.01480 * 63.97836 + -0.31142)
+ COALESCE(-0.00200 * aado2 , -0.00200 * 442.21186 + -0.01328)
+ COALESCE(-0.03220 * bicarbonate , -0.03220 * 22.96894 + -0.06535)
+ COALESCE( 0.05384 * totalco2 , 0.05384 * 24.72632 + -0.01405)
+ COALESCE( 0.08202 * lactate , 0.08202 * 3.06436 + 0.06038)
+ COALESCE( 0.10956 * ph , 0.10956 * 7.36233 + -0.00617)
+ COALESCE( 0.00848 * o2flow , 0.00848 * 7.59362 + -0.35803)
))) AS specimen_prob
FROM stg2
LEFT JOIN stg_fio2 s2
ON stg2.icustay_id = s2.icustay_id
AND s2.charttime BETWEEN DATETIME_SUB(stg2.charttime, INTERVAL '4' HOUR)
AND stg2.charttime
WHERE stg2.lastrowspo2 = 1
)
SELECT subject_id, hadm_id, icustay_id, charttime
, specimen
, CASE
WHEN specimen IS NOT NULL THEN specimen
WHEN specimen_prob > 0.75 THEN 'ART'
ELSE NULL
END AS specimen_pred
, specimen_prob
, so2, spo2, po2, pco2
, fio2_chartevents, fio2
, aado2
, CASE
WHEN po2 IS NOT NULL
AND pco2 IS NOT NULL
AND COALESCE(fio2, fio2_chartevents) IS NOT NULL
THEN (COALESCE(fio2, fio2_chartevents) / 100) * (760 - 47) - (pco2 / 0.8) - po2
ELSE NULL
END AS aado2_calc
, CASE
WHEN po2 IS NOT NULL AND COALESCE(fio2, fio2_chartevents) IS NOT NULL
THEN 100 * po2 / COALESCE(fio2, fio2_chartevents)
ELSE NULL
END AS pao2fio2
, ph, baseexcess, bicarbonate, totalco2
, hematocrit, hemoglobin, carboxyhemoglobin, methemoglobin
, chloride, calcium, temperature, potassium, sodium, lactate, glucose
, intubated, tidalvolume, ventilationrate, ventilator
, peep, o2flow, requiredo2
FROM stg3
WHERE lastrowfio2 = 1
AND (specimen = 'ART' OR specimen_prob > 0.75);
CREATE INDEX IF NOT EXISTS blood_gas_arterial_idx
ON blood_gas_arterial (icustay_id, charttime);

78
sql/sepsis/gcs_all.sql Normal file
View File

@@ -0,0 +1,78 @@
-- ------------------------------------------------------------------
-- All-time GCS pivot.
--
-- Adapted from the upstream MIMIC-III concepts_postgres file
-- firstday/gcs_first_day.sql
-- with the day-1 time predicate removed and the row reduced to one
-- row per (icustay_id, charttime, gcs) for the entire ICU stay.
--
-- The carry-forward logic (impute missing components from the
-- immediately preceding charttime within 6 h) is preserved. GCS
-- during sedation/intubation is set to 15, matching upstream.
-- ------------------------------------------------------------------
DROP TABLE IF EXISTS gcs_all;
CREATE TABLE gcs_all AS
WITH base AS
(
SELECT pvt.icustay_id
, pvt.charttime
, MAX(CASE WHEN pvt.itemid = 454 THEN pvt.valuenum END) AS gcsmotor
, MAX(CASE WHEN pvt.itemid = 723 THEN pvt.valuenum END) AS gcsverbal
, MAX(CASE WHEN pvt.itemid = 184 THEN pvt.valuenum END) AS gcseyes
, CASE
WHEN MAX(CASE WHEN pvt.itemid = 723 THEN pvt.valuenum END) = 0
THEN 1 ELSE 0
END AS endotrachflag
, ROW_NUMBER() OVER (PARTITION BY pvt.icustay_id ORDER BY pvt.charttime ASC) AS rn
FROM (
SELECT l.icustay_id
, CASE
WHEN l.itemid IN (723, 223900) THEN 723
WHEN l.itemid IN (454, 223901) THEN 454
WHEN l.itemid IN (184, 220739) THEN 184
ELSE l.itemid
END AS itemid
, CASE
WHEN l.itemid = 723 AND l.value = '1.0 ET/Trach' THEN 0
WHEN l.itemid = 223900 AND l.value = 'No Response-ETT' THEN 0
ELSE l.valuenum
END AS valuenum
, l.charttime
FROM chartevents l
INNER JOIN icustays b
ON l.icustay_id = b.icustay_id
WHERE l.itemid IN (184, 454, 723, 223900, 223901, 220739)
AND l.charttime BETWEEN b.intime AND b.outtime
AND COALESCE(l.error, 0) = 0
) pvt
GROUP BY pvt.icustay_id, pvt.charttime
)
, gcs AS
(
SELECT b.icustay_id
, b.charttime
, CASE
WHEN b.gcsverbal = 0 THEN 15
WHEN b.gcsverbal IS NULL AND b2.gcsverbal = 0 THEN 15
WHEN b2.gcsverbal = 0 THEN
COALESCE(b.gcsmotor , 6)
+ COALESCE(b.gcsverbal, 5)
+ COALESCE(b.gcseyes , 4)
ELSE
COALESCE(b.gcsmotor , COALESCE(b2.gcsmotor , 6))
+ COALESCE(b.gcsverbal, COALESCE(b2.gcsverbal, 5))
+ COALESCE(b.gcseyes , COALESCE(b2.gcseyes , 4))
END AS gcs
, b.endotrachflag
FROM base b
LEFT JOIN base b2
ON b.icustay_id = b2.icustay_id
AND b.rn = b2.rn + 1
AND b2.charttime > DATETIME_SUB(b.charttime, INTERVAL '6' HOUR)
)
SELECT icustay_id, charttime, gcs, endotrachflag
FROM gcs;
CREATE INDEX IF NOT EXISTS gcs_all_idx ON gcs_all (icustay_id, charttime);

View File

@@ -0,0 +1,265 @@
-- ------------------------------------------------------------------
-- Mortality verification for Sepsis-3 in MIMIC-III v1.3.
--
-- Usage:
-- psql -d mimic -v ON_ERROR_STOP=1 \
-- -c 'SET search_path TO mimiciii, public;' \
-- -f sql/sepsis/mortality_checks.sql
--
-- Purpose:
-- `sanity_checks.sql` reported a 14.6% in-hospital mortality among
-- Sepsis-3 = TRUE patients, well below the 25-35% range in the
-- literature. The hypothesis was that the broad cohort (neonates,
-- re-admissions, short stays included) drags the number down.
--
-- This script walks an exclusion funnel and shows mortality at
-- each step so you can confirm. It also shows 30-day mortality,
-- stratification by age band, and a direct comparison with the
-- numbers published in:
--
-- Johnson AEW et al., Crit Care Med 2018.
-- "A Comparative Analysis of Sepsis Identification Methods
-- in an Electronic Database."
-- Reported on MIMIC-III v1.4 with adult, first-ICU-stay,
-- LOS >= 4 h cohort:
-- n = 21 927 sepsis-3 stays
-- in-hospital mortality = 21.0%
-- 30-day mortality = 25.4%
--
-- All ages use a clamp at 91 (MIMIC-III shifts DOB by 300 y for
-- patients > 89; we treat them as 91 for stratification).
-- ------------------------------------------------------------------
\set ON_ERROR_STOP on
\timing on
-- Build a working cohort table once with everything we need
DROP TABLE IF EXISTS sepsis3_cohort_check;
CREATE TEMP TABLE sepsis3_cohort_check AS
SELECT ie.subject_id
, ie.hadm_id
, ie.icustay_id
, ie.intime
, ie.outtime
, ie.first_careunit
, EXTRACT(EPOCH FROM (ie.outtime - ie.intime)) / 3600.0 AS los_hours
, LEAST(
91.0,
EXTRACT(EPOCH FROM (ie.intime - pat.dob))
/ (365.242 * 86400.0)
) AS age_at_intime
, ROW_NUMBER() OVER (
PARTITION BY ie.subject_id
ORDER BY ie.intime
) AS icustay_seq
, adm.hospital_expire_flag AS died_in_hospital
, (pat.dod IS NOT NULL
AND pat.dod <= ie.intime + INTERVAL '30 days')::int AS died_within_30d
, COALESCE(s.sepsis3, FALSE) AS sepsis3
FROM icustays ie
JOIN admissions adm ON adm.hadm_id = ie.hadm_id
JOIN patients pat ON pat.subject_id = ie.subject_id
LEFT JOIN sepsis3 s ON s.icustay_id = ie.icustay_id;
CREATE INDEX ON sepsis3_cohort_check (icustay_id);
ANALYZE sepsis3_cohort_check;
\echo
\echo '=================================================================='
\echo ' 1. Cohort exclusion funnel (incremental filtering)'
\echo '=================================================================='
\echo "Each row applies an additional restriction. The 'sepsis3' columns"
\echo "report stats among rows where sepsis3 = TRUE within that cohort."
\echo
\echo "EXPECTED progression: as we narrow to the canonical adult/first-stay/"
\echo "LOS >= 24h cohort, in-hospital mortality among Sepsis-3 should rise"
\echo "from ~14% toward ~25-30%."
\echo
WITH levels AS (
-- 0. Everyone
SELECT 0 AS lvl, '0. all icustays' AS step
, c.* FROM sepsis3_cohort_check c
UNION ALL
-- 1. Exclude neonatal ICU
SELECT 1, '1. + exclude NICU'
, c.* FROM sepsis3_cohort_check c
WHERE c.first_careunit != 'NICU'
UNION ALL
-- 2. Adult (age >= 18) on top of (1)
SELECT 2, '2. + age >= 18'
, c.* FROM sepsis3_cohort_check c
WHERE c.first_careunit != 'NICU'
AND c.age_at_intime >= 18
UNION ALL
-- 3. First ICU stay only on top of (2)
SELECT 3, '3. + first ICU stay only'
, c.* FROM sepsis3_cohort_check c
WHERE c.first_careunit != 'NICU'
AND c.age_at_intime >= 18
AND c.icustay_seq = 1
UNION ALL
-- 4. LOS >= 24h on top of (3) -- the canonical Seymour cohort
SELECT 4, '4. + LOS >= 24 h (canonical)'
, c.* FROM sepsis3_cohort_check c
WHERE c.first_careunit != 'NICU'
AND c.age_at_intime >= 18
AND c.icustay_seq = 1
AND c.los_hours >= 24
)
SELECT lvl
, step
, count(*) AS n_total
, sum(CASE WHEN sepsis3 THEN 1 ELSE 0 END) AS n_sepsis3
, round(100.0 * sum(CASE WHEN sepsis3 THEN 1 ELSE 0 END)
/ count(*), 1) AS pct_sepsis3
-- in-hospital mortality among Sepsis-3 = TRUE
, round(100.0 * sum(CASE WHEN sepsis3 AND died_in_hospital = 1
THEN 1 ELSE 0 END)
/ NULLIF(sum(CASE WHEN sepsis3 THEN 1 ELSE 0 END), 0), 1)
AS sep_inhosp_pct
-- 30-day mortality among Sepsis-3 = TRUE
, round(100.0 * sum(CASE WHEN sepsis3 AND died_within_30d = 1
THEN 1 ELSE 0 END)
/ NULLIF(sum(CASE WHEN sepsis3 THEN 1 ELSE 0 END), 0), 1)
AS sep_30d_pct
-- in-hospital mortality among NOT Sepsis-3
, round(100.0 * sum(CASE WHEN NOT sepsis3 AND died_in_hospital = 1
THEN 1 ELSE 0 END)
/ NULLIF(sum(CASE WHEN NOT sepsis3 THEN 1 ELSE 0 END), 0), 1)
AS nonsep_inhosp_pct
FROM levels
GROUP BY lvl, step
ORDER BY lvl;
\echo
\echo '=================================================================='
\echo ' 2. Mortality stratified by age band, canonical cohort only'
\echo '=================================================================='
\echo "EXPECTED:"
\echo " Mortality among Sepsis-3 = TRUE rises monotonically with age."
\echo " Adults < 30: ~10-15%"
\echo " 30 - 50: ~15-20%"
\echo " 50 - 70: ~20-25%"
\echo " 70 - 90+: ~30-40%"
\echo
SELECT CASE
WHEN age_at_intime < 30 THEN '1. <30'
WHEN age_at_intime < 50 THEN '2. 30-49'
WHEN age_at_intime < 70 THEN '3. 50-69'
WHEN age_at_intime < 90 THEN '4. 70-89'
ELSE '5. 90+'
END AS age_band
, count(*) AS n_sepsis3
, round(100.0 * sum(died_in_hospital) / count(*), 1) AS pct_inhosp
, round(100.0 * sum(died_within_30d) / count(*), 1) AS pct_30d
FROM sepsis3_cohort_check
WHERE sepsis3
AND first_careunit != 'NICU'
AND age_at_intime >= 18
AND icustay_seq = 1
AND los_hours >= 24
GROUP BY 1
ORDER BY 1;
\echo
\echo '=================================================================='
\echo ' 3. Mortality stratified by max-SOFA day-1 (canonical cohort)'
\echo '=================================================================='
\echo "EXPECTED:"
\echo " Mortality should rise monotonically with SOFA. This is the"
\echo " classic dose-response curve of organ dysfunction vs death,"
\echo " and is the strongest semantic check that the SOFA pipeline"
\echo " itself is computing the right thing."
\echo " SOFA 0-3: ~5-10%"
\echo " SOFA 4-7: ~15-25%"
\echo " SOFA 8-11: ~30-40%"
\echo " SOFA 12+: ~50-65%"
\echo
WITH d1 AS (
SELECT icustay_id, max(sofa_24hours) AS d1_sofa
FROM sofa_hourly
WHERE hr <= 24
GROUP BY icustay_id
)
SELECT CASE
WHEN d1_sofa <= 3 THEN '1. 0-3'
WHEN d1_sofa <= 7 THEN '2. 4-7'
WHEN d1_sofa <= 11 THEN '3. 8-11'
ELSE '4. 12+'
END AS sofa_band
, count(*) AS n
, round(100.0 * sum(died_in_hospital) / count(*), 1) AS pct_inhosp
, round(100.0 * sum(died_within_30d) / count(*), 1) AS pct_30d
FROM sepsis3_cohort_check c
JOIN d1 USING (icustay_id)
WHERE first_careunit != 'NICU'
AND age_at_intime >= 18
AND icustay_seq = 1
AND los_hours >= 24
GROUP BY 1
ORDER BY 1;
\echo
\echo '=================================================================='
\echo ' 4. Direct comparison with Johnson 2018 (canonical cohort)'
\echo '=================================================================='
\echo "Johnson 2018 published numbers for Sepsis-3 on MIMIC-III v1.4"
\echo "with the cohort: adult, first ICU stay only, LOS >= 4 h."
\echo "They reported:"
\echo " n_sepsis3 = 21 927"
\echo " in-hospital pct_died = 21.0%"
\echo " 30-day pct_died = 25.4%"
\echo
\echo "We use LOS >= 24 h here (the more common Seymour 2016 cutoff),"
\echo "so our n will be a bit smaller and our mortality slightly"
\echo "higher than Johnson's."
\echo
SELECT count(*) AS n_sepsis3
, round(100.0 * sum(died_in_hospital) / count(*), 1) AS pct_inhosp
, round(100.0 * sum(died_within_30d) / count(*), 1) AS pct_30d
FROM sepsis3_cohort_check
WHERE sepsis3
AND first_careunit != 'NICU'
AND age_at_intime >= 18
AND icustay_seq = 1
AND los_hours >= 24;
\echo
\echo '=================================================================='
\echo ' 5. Sanity: where did the missing mortality "go"?'
\echo '=================================================================='
\echo "Decompose the gap between the broad-cohort 14.6% and the"
\echo "canonical-cohort number from section 4. This shows how much"
\echo "of the gap is explained by each filter individually."
\echo
WITH s3 AS (
SELECT * FROM sepsis3_cohort_check WHERE sepsis3
)
SELECT 'all sepsis3' AS slice
, count(*) AS n
, round(100.0 * sum(died_in_hospital)/count(*), 1) AS pct_inhosp
FROM s3
UNION ALL SELECT 'NICU only',
count(*), round(100.0 * sum(died_in_hospital)/count(*), 1)
FROM s3 WHERE first_careunit = 'NICU'
UNION ALL SELECT 'age < 18 only',
count(*), round(100.0 * sum(died_in_hospital)/count(*), 1)
FROM s3 WHERE age_at_intime < 18
UNION ALL SELECT 're-admission only',
count(*), round(100.0 * sum(died_in_hospital)/count(*), 1)
FROM s3 WHERE icustay_seq > 1
UNION ALL SELECT 'LOS < 24h only',
count(*), round(100.0 * sum(died_in_hospital)/count(*), 1)
FROM s3 WHERE los_hours < 24;
\echo
\echo 'Done. Compare the section-4 result to Johnson 2018 (~21% in-hospital,'
\echo '~25% 30-day) for the headline check.'

View File

@@ -0,0 +1,393 @@
-- ------------------------------------------------------------------
-- Sepsis-3 sanity checks for MIMIC-III v1.3.
--
-- Usage:
-- psql -d mimic -v ON_ERROR_STOP=1 \
-- -c 'SET search_path TO mimiciii, public;' \
-- -f sql/sepsis/sanity_checks.sql
--
-- Each section prints a short result set. Compare against the
-- "EXPECTED" comment. None of these are pass/fail tests; they are
-- bounds-style checks designed to catch obvious upstream breakage
-- (an empty staging table, an off-by-one in the hourly grid, a
-- vasopressor unit-conversion error, etc.).
--
-- Reference numbers come from:
-- Seymour CW et al., JAMA 2016 (the Sepsis-3 paper)
-- Johnson AEW et al., Crit Care Med 2018 ("A Comparative Analysis
-- of Sepsis Identification Methods in an Electronic Database",
-- which reproduces Sepsis-3 on MIMIC-III)
-- ------------------------------------------------------------------
\set ON_ERROR_STOP on
\timing on
\echo
\echo '=================================================================='
\echo ' 1. Row counts of every table in the pipeline'
\echo '=================================================================='
\echo "EXPECTED (MIMIC-III v1.3 full restore, all 61.5k ICU stays):"
\echo " icustays ~ 61 532"
\echo " sofa_grid ~ 6 - 8 M (60k stays * ~4d mean LOS * 24h)"
\echo " sofa_hourly same as sofa_grid"
\echo " blood_gas_arterial ~ 500 k - 1 M"
\echo " gcs_all ~ 4 - 6 M"
\echo " antibiotic ~ 500 k - 700 k prescription rows"
\echo " suspicion_of_infection same as antibiotic"
\echo " sepsis3 ~ 20 k - 30 k rows (one row per ICU stay"
\echo " that ever had any abx + qualifying SOFA)"
\echo
SELECT 'icustays' AS table_name, count(*) AS n FROM icustays
UNION ALL SELECT 'sofa_grid', count(*) FROM sofa_grid
UNION ALL SELECT 'sofa_vs', count(*) FROM sofa_vs
UNION ALL SELECT 'sofa_gcs', count(*) FROM sofa_gcs
UNION ALL SELECT 'sofa_bili', count(*) FROM sofa_bili
UNION ALL SELECT 'sofa_cr', count(*) FROM sofa_cr
UNION ALL SELECT 'sofa_plt', count(*) FROM sofa_plt
UNION ALL SELECT 'sofa_pf', count(*) FROM sofa_pf
UNION ALL SELECT 'sofa_uo', count(*) FROM sofa_uo
UNION ALL SELECT 'sofa_vaso', count(*) FROM sofa_vaso
UNION ALL SELECT 'sofa_wide', count(*) FROM sofa_wide
UNION ALL SELECT 'sofa_components', count(*) FROM sofa_components
UNION ALL SELECT 'sofa_hourly', count(*) FROM sofa_hourly
UNION ALL SELECT 'blood_gas_arterial', count(*) FROM blood_gas_arterial
UNION ALL SELECT 'gcs_all', count(*) FROM gcs_all
UNION ALL SELECT 'antibiotic', count(*) FROM antibiotic
UNION ALL SELECT 'suspicion_of_infection', count(*) FROM suspicion_of_infection
UNION ALL SELECT 'sepsis3', count(*) FROM sepsis3
ORDER BY 1;
\echo
\echo '=================================================================='
\echo ' 2. Hourly grid integrity'
\echo '=================================================================='
\echo "EXPECTED:"
\echo " bad_hr_seq = 0 (hours per stay must be 1..N consecutive)"
\echo " bad_endtime = 0 (endtime > starttime)"
\echo " duplicate_grid = 0 (no (icustay_id, hr) duplicates)"
\echo " grid_eq_hourly = 0 (sofa_grid and sofa_hourly row counts match)"
\echo
WITH per_stay AS (
SELECT icustay_id
, min(hr) AS hr_min
, max(hr) AS hr_max
, count(*) AS n
FROM sofa_grid
GROUP BY icustay_id
)
SELECT
(SELECT count(*) FROM per_stay
WHERE hr_min != 1 OR hr_max != n) AS bad_hr_seq
, (SELECT count(*) FROM sofa_grid
WHERE endtime <= starttime) AS bad_endtime
, (SELECT count(*) - count(DISTINCT (icustay_id, hr))
FROM sofa_grid) AS duplicate_grid
, (SELECT count(*) FROM sofa_grid)
- (SELECT count(*) FROM sofa_hourly) AS grid_eq_hourly;
\echo
\echo '=================================================================='
\echo ' 3. Per-component SOFA score ranges'
\echo '=================================================================='
\echo "EXPECTED: every per-hour component score is in [0, 4] or NULL."
\echo " Any value outside that range indicates a logic bug."
\echo
SELECT 'respiration' AS component, min(respiration) AS min, max(respiration) AS max FROM sofa_components
UNION ALL SELECT 'coagulation', min(coagulation), max(coagulation) FROM sofa_components
UNION ALL SELECT 'liver', min(liver), max(liver) FROM sofa_components
UNION ALL SELECT 'cardiovascular', min(cardiovascular), max(cardiovascular) FROM sofa_components
UNION ALL SELECT 'cns', min(cns), max(cns) FROM sofa_components
UNION ALL SELECT 'renal', min(renal), max(renal) FROM sofa_components
ORDER BY 1;
\echo
\echo '=================================================================='
\echo ' 4. 24-hour rolling SOFA distribution'
\echo '=================================================================='
\echo "EXPECTED:"
\echo " total_sofa min = 0, max ~ 20-24"
\echo " median per-hour total_sofa ~ 2-4"
\echo " Distribution should be heavy-tailed; ~70-80% of hours <= 5,"
\echo " ~5-10% of hours >= 10."
\echo
SELECT min(sofa_24hours) AS sofa_min
, max(sofa_24hours) AS sofa_max
, round(avg(sofa_24hours)::numeric, 2) AS sofa_mean
, percentile_disc(0.50) WITHIN GROUP (ORDER BY sofa_24hours) AS sofa_p50
, percentile_disc(0.90) WITHIN GROUP (ORDER BY sofa_24hours) AS sofa_p90
, percentile_disc(0.99) WITHIN GROUP (ORDER BY sofa_24hours) AS sofa_p99
FROM sofa_hourly;
\echo
\echo '=================================================================='
\echo ' 5. Day-1 max SOFA per stay (compare with SAPS-II severity)'
\echo '=================================================================='
\echo "EXPECTED for adult ICU (per Singer 2016, Vincent 1996):"
\echo " median day-1 SOFA ~ 4-6"
\echo " ~60-70% of stays have day-1 SOFA >= 2 (Sepsis-3 organ-dys threshold)"
\echo
WITH d1 AS (
SELECT icustay_id, max(sofa_24hours) AS day1_sofa
FROM sofa_hourly
WHERE hr <= 24
GROUP BY icustay_id
)
SELECT count(*) AS n_stays
, round(avg(day1_sofa)::numeric, 2) AS mean_d1_sofa
, percentile_disc(0.50) WITHIN GROUP (ORDER BY day1_sofa) AS p50
, percentile_disc(0.90) WITHIN GROUP (ORDER BY day1_sofa) AS p90
, round(100.0 * sum(CASE WHEN day1_sofa >= 2 THEN 1 ELSE 0 END)
/ count(*), 1) AS pct_ge2
, round(100.0 * sum(CASE WHEN day1_sofa >= 6 THEN 1 ELSE 0 END)
/ count(*), 1) AS pct_ge6
FROM d1;
\echo
\echo '=================================================================='
\echo ' 6. Component-input sanity (raw ranges)'
\echo '=================================================================='
\echo "EXPECTED ranges (after our valuenum filters):"
\echo " meanbp_min 30 - 200 mmHg"
\echo " gcs_min 3 - 15"
\echo " bilirubin_max 0 - 80 mg/dL"
\echo " creatinine_max 0 - 30 mg/dL (capped at 150 in pipeline)"
\echo " platelet_min 0 - 1500 K/uL"
\echo " pao2fio2_* 50 - 700"
\echo " uo_24hr 0 - 20000 mL"
\echo " rate_norepi etc. 0 - 5 mcg/kg/min (rates above ~3 are very rare)"
\echo
SELECT 'meanbp_min' AS metric
, min(meanbp_min)::text AS min
, max(meanbp_min)::text AS max
, round(avg(meanbp_min)::numeric, 1)::text AS mean
FROM sofa_components WHERE meanbp_min IS NOT NULL
UNION ALL SELECT 'gcs_min',
min(gcs_min)::text, max(gcs_min)::text, avg(gcs_min)::numeric(10,1)::text
FROM sofa_components WHERE gcs_min IS NOT NULL
UNION ALL SELECT 'bilirubin_max',
min(bilirubin_max)::text, max(bilirubin_max)::text,
avg(bilirubin_max)::numeric(10,2)::text
FROM sofa_components WHERE bilirubin_max IS NOT NULL
UNION ALL SELECT 'creatinine_max',
min(creatinine_max)::text, max(creatinine_max)::text,
avg(creatinine_max)::numeric(10,2)::text
FROM sofa_components WHERE creatinine_max IS NOT NULL
UNION ALL SELECT 'platelet_min',
min(platelet_min)::text, max(platelet_min)::text,
avg(platelet_min)::numeric(10,1)::text
FROM sofa_components WHERE platelet_min IS NOT NULL
UNION ALL SELECT 'pao2fio2_vent',
min(pao2fio2_vent)::text, max(pao2fio2_vent)::text,
avg(pao2fio2_vent)::numeric(10,1)::text
FROM sofa_components WHERE pao2fio2_vent IS NOT NULL
UNION ALL SELECT 'pao2fio2_novent',
min(pao2fio2_novent)::text, max(pao2fio2_novent)::text,
avg(pao2fio2_novent)::numeric(10,1)::text
FROM sofa_components WHERE pao2fio2_novent IS NOT NULL
UNION ALL SELECT 'uo_24hr',
min(uo_24hr)::text, max(uo_24hr)::text,
avg(uo_24hr)::numeric(10,1)::text
FROM sofa_components WHERE uo_24hr IS NOT NULL
UNION ALL SELECT 'rate_norepinephrine',
min(rate_norepinephrine)::text, max(rate_norepinephrine)::text,
avg(rate_norepinephrine)::numeric(10,3)::text
FROM sofa_components WHERE rate_norepinephrine IS NOT NULL
ORDER BY 1;
\echo
\echo '=================================================================='
\echo ' 7. Vasopressor coverage'
\echo '=================================================================='
\echo "EXPECTED: ~25-35% of adult ICU stays receive at least one"
\echo " vasopressor (norepi most common, then epi/dop/dob)."
\echo
SELECT 'any vaso' AS group
, count(DISTINCT icustay_id) AS n_stays
FROM sofa_vaso
UNION ALL SELECT 'norepi',
count(DISTINCT icustay_id) FROM sofa_vaso WHERE rate_norepinephrine IS NOT NULL
UNION ALL SELECT 'epi',
count(DISTINCT icustay_id) FROM sofa_vaso WHERE rate_epinephrine IS NOT NULL
UNION ALL SELECT 'dop',
count(DISTINCT icustay_id) FROM sofa_vaso WHERE rate_dopamine IS NOT NULL
UNION ALL SELECT 'dob',
count(DISTINCT icustay_id) FROM sofa_vaso WHERE rate_dobutamine IS NOT NULL;
\echo
\echo '=================================================================='
\echo ' 8. Antibiotic prescriptions: top 15 drugs'
\echo '=================================================================='
\echo "EXPECTED: vancomycin, piperacillin/tazobactam (zosyn),"
\echo " ceftriaxone, levofloxacin, metronidazole near the top."
\echo
SELECT antibiotic, count(*) AS n
FROM antibiotic
GROUP BY antibiotic
ORDER BY n DESC
LIMIT 15;
\echo
\echo '=================================================================='
\echo ' 9. Suspicion of infection: matching rate'
\echo '=================================================================='
\echo "EXPECTED:"
\echo " ~50-70% of antibiotic rows are matched to a culture"
\echo " (i.e. suspected_infection = 1). Top specimens should be:"
\echo " BLOOD CULTURE, URINE, MRSA SCREEN, SPUTUM, SWAB."
\echo
SELECT count(*) AS n_total
, sum(suspected_infection) AS n_suspected
, round(100.0 * sum(suspected_infection)
/ count(*), 1) AS pct_suspected
FROM suspicion_of_infection;
SELECT specimen, count(*) AS n
FROM suspicion_of_infection
WHERE suspected_infection = 1
GROUP BY specimen
ORDER BY n DESC
LIMIT 10;
\echo
\echo '=================================================================='
\echo '10. Sepsis-3 prevalence at the ICU-stay level'
\echo '=================================================================='
\echo "EXPECTED (Johnson 2018, MIMIC-III all-cohort):"
\echo " total stays in sepsis3 table : 25 - 35 k"
\echo " (every stay with any abx and a qualifying SOFA window)"
\echo " sepsis3 = TRUE : 18 - 24 k (~30-40% of all ICU stays)"
\echo
SELECT count(*) AS n_rows
, sum(CASE WHEN sepsis3 THEN 1 ELSE 0 END) AS n_sepsis3
, round(100.0 * sum(CASE WHEN sepsis3 THEN 1 ELSE 0 END)
/ NULLIF(count(*),0), 1) AS pct_sepsis3_among_rows
, round(100.0 * sum(CASE WHEN sepsis3 THEN 1 ELSE 0 END)
/ NULLIF((SELECT count(*) FROM icustays), 0), 1)
AS pct_sepsis3_of_all_icustays
FROM sepsis3;
\echo
\echo '=================================================================='
\echo '11. Sepsis-3 onset timing'
\echo '=================================================================='
\echo "EXPECTED:"
\echo " Most onsets occur early in the stay; median onset is on"
\echo " day 0-1 (~0-24h after intime). A long right tail exists"
\echo " for ICU-acquired sepsis."
\echo " sofa_time should be within [-48h, +24h] of"
\echo " suspected_infection_time by construction."
\echo
SELECT round(avg(EXTRACT(EPOCH FROM (suspected_infection_time - ie.intime))/3600)::numeric, 1)
AS mean_hours_to_onset
, percentile_disc(0.50) WITHIN GROUP (
ORDER BY EXTRACT(EPOCH FROM (suspected_infection_time - ie.intime))/3600
) AS p50_hours
, percentile_disc(0.90) WITHIN GROUP (
ORDER BY EXTRACT(EPOCH FROM (suspected_infection_time - ie.intime))/3600
) AS p90_hours
, min(EXTRACT(EPOCH FROM (sofa_time - suspected_infection_time))/3600)
AS min_sofa_offset_h
, max(EXTRACT(EPOCH FROM (sofa_time - suspected_infection_time))/3600)
AS max_sofa_offset_h
FROM sepsis3 s
JOIN icustays ie ON ie.icustay_id = s.icustay_id
WHERE s.sepsis3 = TRUE;
\echo
\echo '=================================================================='
\echo '12. Mortality stratified by Sepsis-3 status'
\echo '=================================================================='
\echo "EXPECTED:"
\echo " In-hospital mortality among Sepsis-3 = TRUE: ~25-35%"
\echo " Among Sepsis-3 = FALSE / no row in sepsis3: ~5-10%"
\echo
WITH cohort AS (
SELECT ie.icustay_id, ie.hadm_id
, CASE WHEN s.sepsis3 IS TRUE THEN 'sepsis3'
ELSE 'not sepsis3' END AS sepsis_status
FROM icustays ie
LEFT JOIN sepsis3 s ON s.icustay_id = ie.icustay_id
)
SELECT c.sepsis_status
, count(*) AS n_stays
, sum(CASE WHEN adm.hospital_expire_flag = 1 THEN 1 ELSE 0 END)
AS n_died
, round(100.0 * sum(CASE WHEN adm.hospital_expire_flag = 1 THEN 1 ELSE 0 END)
/ count(*), 1) AS pct_died
FROM cohort c
JOIN admissions adm ON adm.hadm_id = c.hadm_id
GROUP BY c.sepsis_status
ORDER BY c.sepsis_status DESC;
\echo
\echo '=================================================================='
\echo '13. Sepsis-3 vs SAPS-II (cross-score validation)'
\echo '=================================================================='
\echo "EXPECTED:"
\echo " Septic patients should have higher mean SAPS-II than non-septic"
\echo " (typically by ~10-15 points)."
\echo " This sanity check requires that you have already run"
\echo " build_sapsii.sql. If sapsii does not exist, this section"
\echo " will error -- skip it with -v ON_ERROR_STOP=0."
\echo
SELECT CASE WHEN s.sepsis3 IS TRUE THEN 'sepsis3' ELSE 'not sepsis3' END
AS sepsis_status
, count(*) AS n
, round(avg(sa.sapsii)::numeric, 1) AS mean_sapsii
, round(avg(sa.sapsii_prob)::numeric, 3) AS mean_predicted_mortality
FROM icustays ie
LEFT JOIN sepsis3 s ON s.icustay_id = ie.icustay_id
LEFT JOIN sapsii sa ON sa.icustay_id = ie.icustay_id
GROUP BY (s.sepsis3 IS TRUE)
ORDER BY 1 DESC;
\echo
\echo '=================================================================='
\echo '14. Spot-check a few stays end-to-end'
\echo '=================================================================='
\echo "Pulls 5 random Sepsis-3 = TRUE stays and shows you the trajectory"
\echo "of sofa_24hours alongside the suspected_infection_time. Eyeball:"
\echo " - sofa_24hours should be >= 2 at hours surrounding the onset"
\echo " - sofa_24hours should plausibly rise then fall over the stay"
\echo " - hour numbering should be consecutive"
\echo
WITH picks AS (
SELECT icustay_id, suspected_infection_time
FROM sepsis3 WHERE sepsis3 = TRUE
ORDER BY md5(icustay_id::text)
LIMIT 5
)
SELECT p.icustay_id
, p.suspected_infection_time
, h.hr
, h.endtime
, h.respiration_24hours AS resp
, h.coagulation_24hours AS coag
, h.liver_24hours AS liv
, h.cardiovascular_24hours AS cardio
, h.cns_24hours AS cns
, h.renal_24hours AS ren
, h.sofa_24hours AS sofa
FROM picks p
JOIN sofa_hourly h ON h.icustay_id = p.icustay_id
WHERE h.endtime BETWEEN p.suspected_infection_time - INTERVAL '6 hours'
AND p.suspected_infection_time + INTERVAL '6 hours'
ORDER BY p.icustay_id, h.hr;
\echo
\echo 'All sanity checks complete. Anything way off the expected ranges'
\echo 'above is worth investigating before relying on the sepsis-3 cohort.'

90
sql/sepsis/sepsis3.sql Normal file
View File

@@ -0,0 +1,90 @@
-- ------------------------------------------------------------------
-- Title: Sepsis-3 onset
--
-- Adapted from the MIMIC-IV upstream
-- concepts/sepsis/sepsis3.sql
-- and ported to MIMIC-III v1.3 vanilla PostgreSQL.
--
-- Definition (Singer et al., JAMA 2016):
-- Sepsis-3 = SOFA >= 2 AND suspicion of infection,
-- where the SOFA window must overlap the suspected-infection time
-- by at most 48 h before / 24 h after.
-- The "onset time" is the suspected-infection-time of the earliest
-- row that satisfies these criteria for each ICU stay.
--
-- Implicitly assumes baseline SOFA = 0 prior to ICU admission, since
-- we do not have premorbid organ-dysfunction data.
--
-- Dependencies:
-- sepsis/suspicion_of_infection.sql
-- sepsis/sofa_hourly.sql
-- ------------------------------------------------------------------
DROP TABLE IF EXISTS sepsis3;
CREATE TABLE sepsis3 AS
WITH sofa AS
(
SELECT icustay_id
, starttime
, endtime
, respiration_24hours AS respiration
, coagulation_24hours AS coagulation
, liver_24hours AS liver
, cardiovascular_24hours AS cardiovascular
, cns_24hours AS cns
, renal_24hours AS renal
, sofa_24hours AS sofa_score
FROM sofa_hourly
WHERE sofa_24hours >= 2
)
, s1 AS
(
SELECT soi.subject_id
, soi.icustay_id
, soi.ab_id
, soi.antibiotic
, soi.antibiotic_time
, soi.culture_time
, soi.suspected_infection
, soi.suspected_infection_time
, soi.specimen
, soi.positive_culture
, sofa.starttime
, sofa.endtime
, sofa.respiration
, sofa.coagulation
, sofa.liver
, sofa.cardiovascular
, sofa.cns
, sofa.renal
, sofa.sofa_score
, (sofa.sofa_score >= 2 AND soi.suspected_infection = 1) AS sepsis3
, ROW_NUMBER() OVER (
PARTITION BY soi.icustay_id
ORDER BY soi.suspected_infection_time
, soi.antibiotic_time
, soi.culture_time
, sofa.endtime
) AS rn_sus
FROM suspicion_of_infection soi
INNER JOIN sofa
ON soi.icustay_id = sofa.icustay_id
AND sofa.endtime >= DATETIME_SUB(soi.suspected_infection_time, INTERVAL '48' HOUR)
AND sofa.endtime <= DATETIME_ADD(soi.suspected_infection_time, INTERVAL '24' HOUR)
WHERE soi.icustay_id IS NOT NULL
AND soi.suspected_infection_time IS NOT NULL
)
SELECT subject_id
, icustay_id
, antibiotic_time
, culture_time
, suspected_infection_time
, endtime AS sofa_time
, sofa_score
, respiration, coagulation, liver, cardiovascular, cns, renal
, sepsis3
FROM s1
WHERE rn_sus = 1;
CREATE INDEX IF NOT EXISTS sepsis3_idx ON sepsis3 (icustay_id);

397
sql/sepsis/sofa_hourly.sql Normal file
View File

@@ -0,0 +1,397 @@
-- ------------------------------------------------------------------
-- Title: Hourly Sequential Organ Failure Assessment (SOFA)
--
-- Adapted from the MIMIC-IV upstream
-- concepts/score/sofa.sql
-- and ported to MIMIC-III v1.3 vanilla PostgreSQL.
--
-- Produces one row per (icustay_id, hr) for every hour of the ICU
-- stay, with both the per-component score AT that hour and the
-- 24-hour rolling MAX of each component (which is the value used by
-- Sepsis-3). Final column `sofa_24hours` is the sum of the six
-- 24-hour rolling maxes.
--
-- Differences vs. MIMIC-IV upstream (search this file for "PORT NOTE"):
-- 1. ID column is `icustay_id`, not `stay_id`.
-- 2. There is no `icustay_hourly` derived table in MIMIC-III; we
-- build the hourly grid inline with `generate_series`.
-- 3. There is no `mimic_derived.ventilation` with a fine-grained
-- `ventilation_status='InvasiveVent'` flag in MIMIC-III; we use
-- the lumped `ventilation_durations` table, so any active
-- ventilation row is treated as invasive ventilation for the
-- purpose of the PaO2:FiO2 vent/novent split.
-- 4. We replicate MIMIC-IV's `urine_output_rate` adjustment inline
-- in `sofa_uo`: we materialise both `uo_24hr` (sum) and
-- `uo_tm_24hr` (count of distinct hours that actually had a UO
-- observation in the past 24 h), and the renal CASE in (11)
-- uses
-- GREATEST(uo_24hr, 0) * 24.0 / uo_tm_24hr
-- only when uo_tm_24hr BETWEEN 22 AND 30, falling back to
-- creatinine alone otherwise. The GREATEST(_, 0) clip prevents
-- patients on continuous bladder irrigation (which the upstream
-- `urine_output.sql` subtracts as a negative volume) from being
-- mis-scored as oliguric.
-- 5. Vasopressor rates come from the upstream
-- durations/{epinephrine,norepinephrine,dopamine,dobutamine}_dose.sql
-- tables, which already merge CareVue + MetaVision and convert
-- to mcg/kg/min.
--
-- Implementation note: each measurement class is materialised into
-- its own narrow staging table. This avoids forcing the planner to
-- optimise a single ~10-way CTE join, lets each scan of the giant raw
-- tables (`chartevents`, `labevents`, `outputevents`,
-- `inputevents_*`) run independently, and lets you `EXPLAIN ANALYZE`
-- each step in isolation.
--
-- Dependencies:
-- postgres-functions.sql
-- durations/ventilation_durations.sql
-- durations/{dobutamine,dopamine,epinephrine,norepinephrine}_dose.sql
-- fluid_balance/urine_output.sql
-- sepsis/blood_gas_arterial.sql
-- sepsis/gcs_all.sql
-- ------------------------------------------------------------------
-- 1. Hourly grid: one row per (icustay_id, hr) for the entire stay.
DROP TABLE IF EXISTS sofa_grid;
CREATE TABLE sofa_grid AS
SELECT ie.subject_id, ie.hadm_id, ie.icustay_id
, gs.hr
, ie.intime + ((gs.hr - 1) * INTERVAL '1 hour') AS starttime
, ie.intime + (gs.hr * INTERVAL '1 hour') AS endtime
FROM icustays ie
, LATERAL generate_series(
1,
GREATEST(1,
CEIL(EXTRACT(EPOCH FROM (ie.outtime - ie.intime)) / 3600.0)::int
)
) AS gs(hr)
WHERE ie.outtime IS NOT NULL
AND ie.outtime > ie.intime;
CREATE INDEX IF NOT EXISTS sofa_grid_idx ON sofa_grid (icustay_id, hr);
CREATE INDEX IF NOT EXISTS sofa_grid_time_idx
ON sofa_grid (icustay_id, starttime, endtime);
ANALYZE sofa_grid;
-- 2. Mean arterial pressure: minimum within the hour.
DROP TABLE IF EXISTS sofa_vs;
CREATE TABLE sofa_vs AS
SELECT g.icustay_id, g.hr
, MIN(ce.valuenum) AS meanbp_min
FROM sofa_grid g
LEFT JOIN chartevents ce
ON ce.icustay_id = g.icustay_id
AND ce.charttime > g.starttime
AND ce.charttime <= g.endtime
AND ce.itemid IN (456, 52, 6702, 443, 220052, 220181, 225312)
AND ce.valuenum > 0 AND ce.valuenum < 300
AND COALESCE(ce.error, 0) = 0
GROUP BY g.icustay_id, g.hr;
CREATE INDEX IF NOT EXISTS sofa_vs_idx ON sofa_vs (icustay_id, hr);
ANALYZE sofa_vs;
-- 3. GCS: minimum within the hour (uses gcs_all carry-forward logic).
DROP TABLE IF EXISTS sofa_gcs;
CREATE TABLE sofa_gcs AS
SELECT g.icustay_id, g.hr
, MIN(gc.gcs) AS gcs_min
FROM sofa_grid g
LEFT JOIN gcs_all gc
ON gc.icustay_id = g.icustay_id
AND gc.charttime > g.starttime
AND gc.charttime <= g.endtime
GROUP BY g.icustay_id, g.hr;
CREATE INDEX IF NOT EXISTS sofa_gcs_idx ON sofa_gcs (icustay_id, hr);
ANALYZE sofa_gcs;
-- 4. Bilirubin: maximum within the hour.
DROP TABLE IF EXISTS sofa_bili;
CREATE TABLE sofa_bili AS
SELECT g.icustay_id, g.hr
, MAX(le.valuenum) AS bilirubin_max
FROM sofa_grid g
INNER JOIN icustays ie
ON ie.icustay_id = g.icustay_id
LEFT JOIN labevents le
ON le.subject_id = ie.subject_id
AND le.hadm_id = ie.hadm_id
AND le.charttime > g.starttime
AND le.charttime <= g.endtime
AND le.itemid = 50885
AND le.valuenum IS NOT NULL
AND le.valuenum > 0
GROUP BY g.icustay_id, g.hr;
CREATE INDEX IF NOT EXISTS sofa_bili_idx ON sofa_bili (icustay_id, hr);
ANALYZE sofa_bili;
-- 5. Creatinine: maximum within the hour.
DROP TABLE IF EXISTS sofa_cr;
CREATE TABLE sofa_cr AS
SELECT g.icustay_id, g.hr
, MAX(le.valuenum) AS creatinine_max
FROM sofa_grid g
INNER JOIN icustays ie
ON ie.icustay_id = g.icustay_id
LEFT JOIN labevents le
ON le.subject_id = ie.subject_id
AND le.hadm_id = ie.hadm_id
AND le.charttime > g.starttime
AND le.charttime <= g.endtime
AND le.itemid = 50912
AND le.valuenum IS NOT NULL
AND le.valuenum > 0
AND le.valuenum < 150 -- sanity (mg/dL)
GROUP BY g.icustay_id, g.hr;
CREATE INDEX IF NOT EXISTS sofa_cr_idx ON sofa_cr (icustay_id, hr);
ANALYZE sofa_cr;
-- 6. Platelets: minimum within the hour.
DROP TABLE IF EXISTS sofa_plt;
CREATE TABLE sofa_plt AS
SELECT g.icustay_id, g.hr
, MIN(le.valuenum) AS platelet_min
FROM sofa_grid g
INNER JOIN icustays ie
ON ie.icustay_id = g.icustay_id
LEFT JOIN labevents le
ON le.subject_id = ie.subject_id
AND le.hadm_id = ie.hadm_id
AND le.charttime > g.starttime
AND le.charttime <= g.endtime
AND le.itemid = 51265
AND le.valuenum IS NOT NULL
AND le.valuenum > 0
GROUP BY g.icustay_id, g.hr;
CREATE INDEX IF NOT EXISTS sofa_plt_idx ON sofa_plt (icustay_id, hr);
ANALYZE sofa_plt;
-- 7. PaO2/FiO2: split into vent / novent based on whether the patient
-- is on invasive ventilation at the time of the blood gas. We
-- take the worst (lowest) PaO2:FiO2 of each kind during the hour.
DROP TABLE IF EXISTS sofa_pf;
CREATE TABLE sofa_pf AS
WITH pafi AS (
SELECT bg.icustay_id, bg.charttime
, CASE WHEN vd.icustay_id IS NULL THEN bg.pao2fio2 END AS pao2fio2_novent
, CASE WHEN vd.icustay_id IS NOT NULL THEN bg.pao2fio2 END AS pao2fio2_vent
FROM blood_gas_arterial bg
LEFT JOIN ventilation_durations vd
ON bg.icustay_id = vd.icustay_id
AND bg.charttime >= vd.starttime
AND bg.charttime <= vd.endtime
WHERE bg.pao2fio2 IS NOT NULL
)
SELECT g.icustay_id, g.hr
, MIN(p.pao2fio2_novent) AS pao2fio2_novent
, MIN(p.pao2fio2_vent) AS pao2fio2_vent
FROM sofa_grid g
LEFT JOIN pafi p
ON p.icustay_id = g.icustay_id
AND p.charttime > g.starttime
AND p.charttime <= g.endtime
GROUP BY g.icustay_id, g.hr;
CREATE INDEX IF NOT EXISTS sofa_pf_idx ON sofa_pf (icustay_id, hr);
ANALYZE sofa_pf;
-- 8. Urine output: sum over the preceding 24 h plus a count of how many
-- distinct hours actually had a UO observation (`uo_tm_24hr`). This
-- matches the MIMIC-IV `urine_output_rate` table's two columns and
-- lets the renal CASE distinguish "no data" from "really oliguric".
-- The renal score in (11) requires uo_tm_24hr to be in a plausible
-- range before the UO branch fires; otherwise it falls back to
-- creatinine alone. See port note #3 in the file header.
DROP TABLE IF EXISTS sofa_uo;
CREATE TABLE sofa_uo AS
SELECT g.icustay_id, g.hr
, SUM(uo.value) AS uo_24hr
, COUNT(DISTINCT date_trunc('hour', uo.charttime)) AS uo_tm_24hr
FROM sofa_grid g
LEFT JOIN urine_output uo
ON uo.icustay_id = g.icustay_id
AND uo.charttime > DATETIME_SUB(g.endtime, INTERVAL '24' HOUR)
AND uo.charttime <= g.endtime
GROUP BY g.icustay_id, g.hr;
CREATE INDEX IF NOT EXISTS sofa_uo_idx ON sofa_uo (icustay_id, hr);
ANALYZE sofa_uo;
-- 9. Vasopressor rates: take the maximum rate of each pressor active at
-- `endtime`. Each upstream dose table is (icustay_id, starttime,
-- endtime, vaso_rate, vaso_amount).
DROP TABLE IF EXISTS sofa_vaso;
CREATE TABLE sofa_vaso AS
SELECT g.icustay_id, g.hr
, MAX(epi.vaso_rate) AS rate_epinephrine
, MAX(nor.vaso_rate) AS rate_norepinephrine
, MAX(dop.vaso_rate) AS rate_dopamine
, MAX(dob.vaso_rate) AS rate_dobutamine
FROM sofa_grid g
LEFT JOIN epinephrine_dose epi
ON epi.icustay_id = g.icustay_id
AND g.endtime > epi.starttime
AND g.endtime <= epi.endtime
LEFT JOIN norepinephrine_dose nor
ON nor.icustay_id = g.icustay_id
AND g.endtime > nor.starttime
AND g.endtime <= nor.endtime
LEFT JOIN dopamine_dose dop
ON dop.icustay_id = g.icustay_id
AND g.endtime > dop.starttime
AND g.endtime <= dop.endtime
LEFT JOIN dobutamine_dose dob
ON dob.icustay_id = g.icustay_id
AND g.endtime > dob.starttime
AND g.endtime <= dob.endtime
WHERE epi.icustay_id IS NOT NULL
OR nor.icustay_id IS NOT NULL
OR dop.icustay_id IS NOT NULL
OR dob.icustay_id IS NOT NULL
GROUP BY g.icustay_id, g.hr;
CREATE INDEX IF NOT EXISTS sofa_vaso_idx ON sofa_vaso (icustay_id, hr);
ANALYZE sofa_vaso;
-- 10. Wide assembly: left-join all of the above onto the grid.
DROP TABLE IF EXISTS sofa_wide;
CREATE TABLE sofa_wide AS
SELECT g.subject_id, g.hadm_id, g.icustay_id, g.hr
, g.starttime, g.endtime
, vs.meanbp_min
, gc.gcs_min
, bi.bilirubin_max
, cr.creatinine_max
, pl.platelet_min
, pf.pao2fio2_novent
, pf.pao2fio2_vent
, uo.uo_24hr
, uo.uo_tm_24hr
, va.rate_epinephrine
, va.rate_norepinephrine
, va.rate_dopamine
, va.rate_dobutamine
FROM sofa_grid g
LEFT JOIN sofa_vs vs ON vs.icustay_id = g.icustay_id AND vs.hr = g.hr
LEFT JOIN sofa_gcs gc ON gc.icustay_id = g.icustay_id AND gc.hr = g.hr
LEFT JOIN sofa_bili bi ON bi.icustay_id = g.icustay_id AND bi.hr = g.hr
LEFT JOIN sofa_cr cr ON cr.icustay_id = g.icustay_id AND cr.hr = g.hr
LEFT JOIN sofa_plt pl ON pl.icustay_id = g.icustay_id AND pl.hr = g.hr
LEFT JOIN sofa_pf pf ON pf.icustay_id = g.icustay_id AND pf.hr = g.hr
LEFT JOIN sofa_uo uo ON uo.icustay_id = g.icustay_id AND uo.hr = g.hr
LEFT JOIN sofa_vaso va ON va.icustay_id = g.icustay_id AND va.hr = g.hr;
CREATE INDEX IF NOT EXISTS sofa_wide_idx ON sofa_wide (icustay_id, hr);
ANALYZE sofa_wide;
-- 11. Per-hour component scores (no rolling window yet).
DROP TABLE IF EXISTS sofa_components;
CREATE TABLE sofa_components AS
SELECT w.*
-- Respiration
, CASE
WHEN pao2fio2_vent < 100 THEN 4
WHEN pao2fio2_vent < 200 THEN 3
WHEN pao2fio2_novent < 300 THEN 2
WHEN pao2fio2_vent < 300 THEN 2
WHEN pao2fio2_novent < 400 THEN 1
WHEN pao2fio2_vent < 400 THEN 1
WHEN COALESCE(pao2fio2_vent, pao2fio2_novent) IS NULL THEN NULL
ELSE 0
END AS respiration
-- Coagulation
, CASE
WHEN platelet_min < 20 THEN 4
WHEN platelet_min < 50 THEN 3
WHEN platelet_min < 100 THEN 2
WHEN platelet_min < 150 THEN 1
WHEN platelet_min IS NULL THEN NULL
ELSE 0
END AS coagulation
-- Liver (mg/dL)
, CASE
WHEN bilirubin_max >= 12.0 THEN 4
WHEN bilirubin_max >= 6.0 THEN 3
WHEN bilirubin_max >= 2.0 THEN 2
WHEN bilirubin_max >= 1.2 THEN 1
WHEN bilirubin_max IS NULL THEN NULL
ELSE 0
END AS liver
-- Cardiovascular
, CASE
WHEN rate_dopamine > 15 OR rate_epinephrine > 0.1 OR rate_norepinephrine > 0.1 THEN 4
WHEN rate_dopamine > 5 OR rate_epinephrine <= 0.1 OR rate_norepinephrine <= 0.1 THEN 3
WHEN rate_dopamine > 0 OR rate_dobutamine > 0 THEN 2
WHEN meanbp_min < 70 THEN 1
WHEN COALESCE(meanbp_min, rate_dopamine, rate_dobutamine,
rate_epinephrine, rate_norepinephrine) IS NULL THEN NULL
ELSE 0
END AS cardiovascular
-- CNS (GCS)
, CASE
WHEN gcs_min >= 13 AND gcs_min <= 14 THEN 1
WHEN gcs_min >= 10 AND gcs_min <= 12 THEN 2
WHEN gcs_min >= 6 AND gcs_min <= 9 THEN 3
WHEN gcs_min < 6 THEN 4
WHEN gcs_min IS NULL THEN NULL
ELSE 0
END AS cns
-- Renal
-- We scale uo_24hr to a 24h-equivalent only when the rolling
-- window has at least 22 distinct hours of observations
-- (matching MIMIC-IV's `urine_output_rate` upper-bound check).
-- If fewer than 22 valid hours exist, the UO branch is treated
-- as missing and the renal score falls back to creatinine alone.
-- We also clip GU-irrigant negative net values to zero so a
-- patient on continuous bladder irrigation isn't mis-scored as
-- oliguric.
, CASE
WHEN creatinine_max >= 5.0 THEN 4
WHEN uo_tm_24hr BETWEEN 22 AND 30
AND GREATEST(uo_24hr, 0) * 24.0 / uo_tm_24hr < 200 THEN 4
WHEN creatinine_max >= 3.5 AND creatinine_max < 5.0 THEN 3
WHEN uo_tm_24hr BETWEEN 22 AND 30
AND GREATEST(uo_24hr, 0) * 24.0 / uo_tm_24hr < 500 THEN 3
WHEN creatinine_max >= 2.0 AND creatinine_max < 3.5 THEN 2
WHEN creatinine_max >= 1.2 AND creatinine_max < 2.0 THEN 1
WHEN creatinine_max IS NULL
AND NOT (uo_tm_24hr BETWEEN 22 AND 30) THEN NULL
ELSE 0
END AS renal
FROM sofa_wide w;
CREATE INDEX IF NOT EXISTS sofa_components_idx
ON sofa_components (icustay_id, hr);
ANALYZE sofa_components;
-- 12. Final hourly SOFA: 24-hour rolling MAX per component, summed.
DROP TABLE IF EXISTS sofa_hourly;
CREATE TABLE sofa_hourly AS
SELECT s.subject_id, s.hadm_id, s.icustay_id, s.hr
, s.starttime, s.endtime
, s.respiration, s.coagulation, s.liver
, s.cardiovascular, s.cns, s.renal
, COALESCE(MAX(s.respiration) OVER w, 0) AS respiration_24hours
, COALESCE(MAX(s.coagulation) OVER w, 0) AS coagulation_24hours
, COALESCE(MAX(s.liver) OVER w, 0) AS liver_24hours
, COALESCE(MAX(s.cardiovascular) OVER w, 0) AS cardiovascular_24hours
, COALESCE(MAX(s.cns) OVER w, 0) AS cns_24hours
, COALESCE(MAX(s.renal) OVER w, 0) AS renal_24hours
, COALESCE(MAX(s.respiration) OVER w, 0)
+ COALESCE(MAX(s.coagulation) OVER w, 0)
+ COALESCE(MAX(s.liver) OVER w, 0)
+ COALESCE(MAX(s.cardiovascular) OVER w, 0)
+ COALESCE(MAX(s.cns) OVER w, 0)
+ COALESCE(MAX(s.renal) OVER w, 0)
AS sofa_24hours
FROM sofa_components s
WINDOW w AS (
PARTITION BY s.icustay_id
ORDER BY s.hr
ROWS BETWEEN 23 PRECEDING AND CURRENT ROW
);
CREATE INDEX IF NOT EXISTS sofa_hourly_idx
ON sofa_hourly (icustay_id, hr);
CREATE INDEX IF NOT EXISTS sofa_hourly_time_idx
ON sofa_hourly (icustay_id, endtime);
ANALYZE sofa_hourly;

View File

@@ -0,0 +1,153 @@
-- ------------------------------------------------------------------
-- Title: Suspicion of Infection
--
-- Adapted from the MIMIC-IV upstream
-- concepts/sepsis/suspicion_of_infection.sql
-- and ported to MIMIC-III v1.3 vanilla PostgreSQL.
--
-- Definition (from the original Sepsis-3 paper, Seymour 2016):
-- a patient is "suspected of infection" if a culture and an
-- antibiotic are ordered close in time:
-- - culture <= 72 h before antibiotic, OR
-- - culture <= 24 h after antibiotic.
-- The antibiotic time is taken as the suspected-infection time when
-- a culture comes second; the culture time when it comes first.
--
-- PORT NOTES:
-- 1. ID column is `icustay_id`, not `stay_id`.
-- 2. MIMIC-III has no `micro_specimen_id`; specimens are identified
-- by the tuple (subject_id, hadm_id, chartdate, charttime,
-- spec_itemid, spec_type_desc) and we deduplicate organism
-- rows by aggregating with that tuple.
-- 3. MIMIC-III `prescriptions.startdate` is DATE-precision only.
-- Consequently `antibiotic_time` always lands on midnight; the
-- MIMIC-IV branches that compare to `me.charttime` still work
-- (DATE auto-casts to TIMESTAMP at 00:00) but give day-level
-- onset precision.
-- ------------------------------------------------------------------
DROP TABLE IF EXISTS suspicion_of_infection;
CREATE TABLE suspicion_of_infection AS
WITH ab_tbl AS
(
SELECT abx.subject_id
, abx.hadm_id
, abx.icustay_id
, abx.antibiotic
, CAST(abx.starttime AS TIMESTAMP) AS antibiotic_time
, CAST(abx.starttime AS DATE) AS antibiotic_date
, CAST(abx.stoptime AS TIMESTAMP) AS antibiotic_stoptime
, ROW_NUMBER() OVER (
PARTITION BY abx.subject_id
ORDER BY abx.starttime, abx.stoptime, abx.antibiotic
) AS ab_id
FROM antibiotic abx
)
, me AS
(
SELECT subject_id, hadm_id
, spec_itemid
, spec_type_desc
, MAX(CAST(chartdate AS DATE)) AS chartdate
, MAX(charttime) AS charttime
, MAX(CASE WHEN org_name IS NOT NULL AND org_name != ''
THEN 1 ELSE 0 END) AS positiveculture
FROM microbiologyevents
GROUP BY subject_id, hadm_id, chartdate, charttime,
spec_itemid, spec_type_desc
)
, me_then_ab AS
(
SELECT ab_tbl.subject_id
, ab_tbl.hadm_id
, ab_tbl.icustay_id
, ab_tbl.ab_id
, COALESCE(me72.charttime,
CAST(me72.chartdate AS TIMESTAMP)) AS last72_charttime
, me72.positiveculture AS last72_positiveculture
, me72.spec_type_desc AS last72_specimen
, ROW_NUMBER() OVER (
PARTITION BY ab_tbl.subject_id, ab_tbl.ab_id
ORDER BY me72.chartdate, me72.charttime NULLS LAST
) AS micro_seq
FROM ab_tbl
LEFT JOIN me me72
ON ab_tbl.subject_id = me72.subject_id
AND (
(
me72.charttime IS NOT NULL
AND ab_tbl.antibiotic_time > me72.charttime
AND ab_tbl.antibiotic_time <= DATETIME_ADD(me72.charttime, INTERVAL '72' HOUR)
)
OR (
me72.charttime IS NULL
AND ab_tbl.antibiotic_date >= me72.chartdate
AND ab_tbl.antibiotic_date <= me72.chartdate + INTERVAL '3 day'
)
)
)
, ab_then_me AS
(
SELECT ab_tbl.subject_id
, ab_tbl.hadm_id
, ab_tbl.icustay_id
, ab_tbl.ab_id
, COALESCE(me24.charttime,
CAST(me24.chartdate AS TIMESTAMP)) AS next24_charttime
, me24.positiveculture AS next24_positiveculture
, me24.spec_type_desc AS next24_specimen
, ROW_NUMBER() OVER (
PARTITION BY ab_tbl.subject_id, ab_tbl.ab_id
ORDER BY me24.chartdate, me24.charttime NULLS LAST
) AS micro_seq
FROM ab_tbl
LEFT JOIN me me24
ON ab_tbl.subject_id = me24.subject_id
AND (
(
me24.charttime IS NOT NULL
AND ab_tbl.antibiotic_time >= DATETIME_SUB(me24.charttime, INTERVAL '24' HOUR)
AND ab_tbl.antibiotic_time < me24.charttime
)
OR (
me24.charttime IS NULL
AND ab_tbl.antibiotic_date >= me24.chartdate - INTERVAL '1 day'
AND ab_tbl.antibiotic_date <= me24.chartdate
)
)
)
SELECT ab_tbl.subject_id
, ab_tbl.icustay_id
, ab_tbl.hadm_id
, ab_tbl.ab_id
, ab_tbl.antibiotic
, ab_tbl.antibiotic_time
, CASE
WHEN me2ab.last72_specimen IS NULL AND ab2me.next24_specimen IS NULL
THEN 0 ELSE 1
END AS suspected_infection
, CASE
WHEN me2ab.last72_specimen IS NULL AND ab2me.next24_specimen IS NULL
THEN NULL
ELSE COALESCE(me2ab.last72_charttime, ab_tbl.antibiotic_time)
END AS suspected_infection_time
, COALESCE(me2ab.last72_charttime, ab2me.next24_charttime) AS culture_time
, COALESCE(me2ab.last72_specimen, ab2me.next24_specimen) AS specimen
, COALESCE(me2ab.last72_positiveculture,
ab2me.next24_positiveculture) AS positive_culture
FROM ab_tbl
LEFT JOIN ab_then_me ab2me
ON ab_tbl.subject_id = ab2me.subject_id
AND ab_tbl.ab_id = ab2me.ab_id
AND ab2me.micro_seq = 1
LEFT JOIN me_then_ab me2ab
ON ab_tbl.subject_id = me2ab.subject_id
AND ab_tbl.ab_id = me2ab.ab_id
AND me2ab.micro_seq = 1;
CREATE INDEX IF NOT EXISTS suspicion_of_infection_idx
ON suspicion_of_infection (icustay_id, suspected_infection_time);