Compare commits

...

2 Commits

2 changed files with 715 additions and 31 deletions

View File

@@ -9,12 +9,25 @@ Benutzt die Full-Pop-Prädiktionen aus dem vorherigen Lauf.
Basilakis 2026 · chicxulub.ai
"""
import json, sys, math, time, random
import json, os, sys, math, time, random
from collections import defaultdict
BQ_PROJECT = "goddard-gap"
DATA_PROJECT = "physionet-data"
NE_ITEMID = 221906
# PostgreSQL connection string (libpq DSN). Override with env var.
# e.g. "host=localhost port=5432 dbname=mimic user=postgres password=..."
PG_DSN = os.environ.get("MIMIC_PG_DSN", "dbname=mimic3")
# Schema holding the stock MIMIC-III v1.3 tables (admissions, icustays,
# labevents, chartevents, inputevents_mv, inputevents_cv, prescriptions,
# diagnoses_icd, d_items, ...).
MIMIC_SCHEMA = os.environ.get("MIMIC_SCHEMA", "mimiciii")
# Schema holding the locally built derived tables (sapsii, sepsis3, ...);
# see sql/schemas.sql. Defaults to the same schema as MIMIC-III itself.
DERIVED_SCHEMA = os.environ.get("DERIVED_SCHEMA", MIMIC_SCHEMA)
# MIMIC-III stores Norepinephrine under different itemids in CareVue
# (inputevents_cv: 30047, 30120) and MetaVision (inputevents_mv: 221906).
NE_ITEMIDS_MV = [221906]
NE_ITEMIDS_CV = [30047, 30120]
SAPS_WINDOW = 10
PARAM_KEYS = ["lactate","creatinine","ph","troponin","hemoglobin",
"heart_rate","map_bp","spo2","temperature","ne_dose"]
@@ -52,10 +65,24 @@ GALAXY_PRIORITY = ["sepsis","cardiogenic_shock","post_cardiac_arrest","ards",
"acute_mi","aki","liver_failure","gi_bleeding","stroke","pe","dka",
"heart_failure","pneumonia","copd","afib","post_cardiac_surgery"]
def run_bq(sql):
from google.cloud import bigquery
client = bigquery.Client(project=BQ_PROJECT)
return [dict(r.items()) for r in client.query(sql).result()]
_PG_CONN = None
def _pg_conn():
global _PG_CONN
if _PG_CONN is None or getattr(_PG_CONN, "closed", 0):
import psycopg2
_PG_CONN = psycopg2.connect(PG_DSN)
_PG_CONN.set_session(readonly=True, autocommit=True)
return _PG_CONN
def run_pg(sql):
"""Execute a read-only SQL query and return rows as list[dict]."""
import psycopg2.extras
conn = _pg_conn()
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
cur.execute(sql)
if cur.description is None:
return []
return [dict(r) for r in cur.fetchall()]
def auc_fast(preds):
if not preds: return 0.5
@@ -99,33 +126,63 @@ def td(pv,centroid,weights):
def load_all_icu():
print(" Loading ALL ICU patients...")
ne_mv = ",".join(str(i) for i in NE_ITEMIDS_MV)
ne_cv = ",".join(str(i) for i in NE_ITEMIDS_CV)
sql=f"""WITH icu_pts AS (
SELECT DISTINCT a.hadm_id,a.hospital_expire_flag AS died,s.sapsii,icu.intime,
s.sapsii_prob AS saps_prob
FROM `{DATA_PROJECT}.mimiciv_3_1_hosp.admissions` a
JOIN `{DATA_PROJECT}.mimiciv_3_1_icu.icustays` icu ON a.hadm_id=icu.hadm_id
JOIN `{DATA_PROJECT}.mimiciv_3_1_derived.sapsii` s ON icu.stay_id=s.stay_id
FROM {MIMIC_SCHEMA}.admissions a
JOIN {MIMIC_SCHEMA}.icustays icu ON a.hadm_id=icu.hadm_id
JOIN {DERIVED_SCHEMA}.sapsii s ON icu.icustay_id=s.icustay_id
WHERE s.sapsii BETWEEN 20 AND 90),
l_lac AS (SELECT le.hadm_id,MAX(le.valuenum) AS val FROM `{DATA_PROJECT}.mimiciv_3_1_hosp.labevents` le JOIN icu_pts ip ON le.hadm_id=ip.hadm_id WHERE le.itemid=50813 AND le.valuenum IS NOT NULL AND le.charttime BETWEEN ip.intime AND TIMESTAMP_ADD(ip.intime,INTERVAL 24 HOUR) GROUP BY le.hadm_id),
l_krea AS (SELECT le.hadm_id,MAX(le.valuenum) AS val FROM `{DATA_PROJECT}.mimiciv_3_1_hosp.labevents` le JOIN icu_pts ip ON le.hadm_id=ip.hadm_id WHERE le.itemid=50912 AND le.valuenum IS NOT NULL AND le.charttime BETWEEN ip.intime AND TIMESTAMP_ADD(ip.intime,INTERVAL 24 HOUR) GROUP BY le.hadm_id),
l_ph AS (SELECT le.hadm_id,MIN(le.valuenum) AS val FROM `{DATA_PROJECT}.mimiciv_3_1_hosp.labevents` le JOIN icu_pts ip ON le.hadm_id=ip.hadm_id WHERE le.itemid IN (50820,50831) AND le.valuenum IS NOT NULL AND le.charttime BETWEEN ip.intime AND TIMESTAMP_ADD(ip.intime,INTERVAL 24 HOUR) GROUP BY le.hadm_id),
l_trop AS (SELECT le.hadm_id,MAX(le.valuenum) AS val FROM `{DATA_PROJECT}.mimiciv_3_1_hosp.labevents` le JOIN icu_pts ip ON le.hadm_id=ip.hadm_id WHERE le.itemid IN (51002,51003) AND le.valuenum IS NOT NULL AND le.charttime BETWEEN ip.intime AND TIMESTAMP_ADD(ip.intime,INTERVAL 24 HOUR) GROUP BY le.hadm_id),
l_hb AS (SELECT le.hadm_id,MIN(le.valuenum) AS val FROM `{DATA_PROJECT}.mimiciv_3_1_hosp.labevents` le JOIN icu_pts ip ON le.hadm_id=ip.hadm_id WHERE le.itemid=51222 AND le.valuenum IS NOT NULL AND le.charttime BETWEEN ip.intime AND TIMESTAMP_ADD(ip.intime,INTERVAL 24 HOUR) GROUP BY le.hadm_id),
c_hr AS (SELECT ce.hadm_id,MAX(ce.valuenum) AS val FROM `{DATA_PROJECT}.mimiciv_3_1_icu.chartevents` ce JOIN icu_pts ip ON ce.hadm_id=ip.hadm_id JOIN `{DATA_PROJECT}.mimiciv_3_1_icu.icustays` icu ON ce.stay_id=icu.stay_id WHERE ce.itemid=220045 AND ce.valuenum BETWEEN 20 AND 250 AND ce.charttime BETWEEN icu.intime AND TIMESTAMP_ADD(icu.intime,INTERVAL 24 HOUR) GROUP BY ce.hadm_id),
c_map AS (SELECT ce.hadm_id,MIN(ce.valuenum) AS val FROM `{DATA_PROJECT}.mimiciv_3_1_icu.chartevents` ce JOIN icu_pts ip ON ce.hadm_id=ip.hadm_id JOIN `{DATA_PROJECT}.mimiciv_3_1_icu.icustays` icu ON ce.stay_id=icu.stay_id WHERE ce.itemid IN (220052,220181,225312) AND ce.valuenum BETWEEN 20 AND 200 AND ce.charttime BETWEEN icu.intime AND TIMESTAMP_ADD(icu.intime,INTERVAL 24 HOUR) GROUP BY ce.hadm_id),
c_spo2 AS (SELECT ce.hadm_id,MIN(ce.valuenum) AS val FROM `{DATA_PROJECT}.mimiciv_3_1_icu.chartevents` ce JOIN icu_pts ip ON ce.hadm_id=ip.hadm_id JOIN `{DATA_PROJECT}.mimiciv_3_1_icu.icustays` icu ON ce.stay_id=icu.stay_id WHERE ce.itemid=220277 AND ce.valuenum BETWEEN 50 AND 100 AND ce.charttime BETWEEN icu.intime AND TIMESTAMP_ADD(icu.intime,INTERVAL 24 HOUR) GROUP BY ce.hadm_id),
c_temp AS (SELECT ce.hadm_id,MIN(ce.valuenum) AS val FROM `{DATA_PROJECT}.mimiciv_3_1_icu.chartevents` ce JOIN icu_pts ip ON ce.hadm_id=ip.hadm_id JOIN `{DATA_PROJECT}.mimiciv_3_1_icu.icustays` icu ON ce.stay_id=icu.stay_id WHERE ce.itemid=223762 AND ce.valuenum BETWEEN 28 AND 43 AND ce.charttime BETWEEN icu.intime AND TIMESTAMP_ADD(icu.intime,INTERVAL 24 HOUR) GROUP BY ce.hadm_id),
ne AS (SELECT ie.hadm_id,MAX(ie.rate) AS val FROM `{DATA_PROJECT}.mimiciv_3_1_icu.inputevents` ie JOIN icu_pts ip ON ie.hadm_id=ip.hadm_id JOIN `{DATA_PROJECT}.mimiciv_3_1_icu.icustays` icu ON ie.stay_id=icu.stay_id WHERE ie.itemid={NE_ITEMID} AND ie.rate>0 AND ie.starttime BETWEEN icu.intime AND TIMESTAMP_ADD(icu.intime,INTERVAL 24 HOUR) GROUP BY ie.hadm_id)
l_lac AS (SELECT le.hadm_id,MAX(le.valuenum) AS val FROM {MIMIC_SCHEMA}.labevents le JOIN icu_pts ip ON le.hadm_id=ip.hadm_id WHERE le.itemid=50813 AND le.valuenum IS NOT NULL AND le.charttime BETWEEN ip.intime AND ip.intime + INTERVAL '24 hours' GROUP BY le.hadm_id),
l_krea AS (SELECT le.hadm_id,MAX(le.valuenum) AS val FROM {MIMIC_SCHEMA}.labevents le JOIN icu_pts ip ON le.hadm_id=ip.hadm_id WHERE le.itemid=50912 AND le.valuenum IS NOT NULL AND le.charttime BETWEEN ip.intime AND ip.intime + INTERVAL '24 hours' GROUP BY le.hadm_id),
l_ph AS (SELECT le.hadm_id,MIN(le.valuenum) AS val FROM {MIMIC_SCHEMA}.labevents le JOIN icu_pts ip ON le.hadm_id=ip.hadm_id WHERE le.itemid IN (50820,50831) AND le.valuenum IS NOT NULL AND le.charttime BETWEEN ip.intime AND ip.intime + INTERVAL '24 hours' GROUP BY le.hadm_id),
l_trop AS (SELECT le.hadm_id,MAX(le.valuenum) AS val FROM {MIMIC_SCHEMA}.labevents le JOIN icu_pts ip ON le.hadm_id=ip.hadm_id WHERE le.itemid IN (51002,51003) AND le.valuenum IS NOT NULL AND le.charttime BETWEEN ip.intime AND ip.intime + INTERVAL '24 hours' GROUP BY le.hadm_id),
l_hb AS (SELECT le.hadm_id,MIN(le.valuenum) AS val FROM {MIMIC_SCHEMA}.labevents le JOIN icu_pts ip ON le.hadm_id=ip.hadm_id WHERE le.itemid=51222 AND le.valuenum IS NOT NULL AND le.charttime BETWEEN ip.intime AND ip.intime + INTERVAL '24 hours' GROUP BY le.hadm_id),
c_hr AS (SELECT ce.hadm_id,MAX(ce.valuenum) AS val FROM {MIMIC_SCHEMA}.chartevents ce JOIN icu_pts ip ON ce.hadm_id=ip.hadm_id JOIN {MIMIC_SCHEMA}.icustays icu ON ce.icustay_id=icu.icustay_id WHERE ce.itemid IN (211,220045) AND ce.valuenum BETWEEN 20 AND 250 AND ce.charttime BETWEEN icu.intime AND icu.intime + INTERVAL '24 hours' GROUP BY ce.hadm_id),
c_map AS (SELECT ce.hadm_id,MIN(ce.valuenum) AS val FROM {MIMIC_SCHEMA}.chartevents ce JOIN icu_pts ip ON ce.hadm_id=ip.hadm_id JOIN {MIMIC_SCHEMA}.icustays icu ON ce.icustay_id=icu.icustay_id WHERE ce.itemid IN (52,456,6702,220052,220181,225312) AND ce.valuenum BETWEEN 20 AND 200 AND ce.charttime BETWEEN icu.intime AND icu.intime + INTERVAL '24 hours' GROUP BY ce.hadm_id),
c_spo2 AS (SELECT ce.hadm_id,MIN(ce.valuenum) AS val FROM {MIMIC_SCHEMA}.chartevents ce JOIN icu_pts ip ON ce.hadm_id=ip.hadm_id JOIN {MIMIC_SCHEMA}.icustays icu ON ce.icustay_id=icu.icustay_id WHERE ce.itemid IN (646,220277) AND ce.valuenum BETWEEN 50 AND 100 AND ce.charttime BETWEEN icu.intime AND icu.intime + INTERVAL '24 hours' GROUP BY ce.hadm_id),
-- Temperature: pull all four MIMIC-III itemids (676/223762 nominally
-- Celsius, 678/223761 nominally Fahrenheit) and decide the unit from
-- the value itself. Plausible body temperature in C is ~28..43 and
-- in F is ~82..110; the two ranges don't overlap, so a value in the
-- F band can be safely converted to C even if it was charted under a
-- "Celsius" itemid (and vice versa). Anything outside both bands is
-- treated as sensor noise and dropped.
c_temp AS (
SELECT ce.hadm_id,
MIN(CASE
WHEN ce.valuenum BETWEEN 28 AND 43 THEN ce.valuenum
WHEN ce.valuenum BETWEEN 82 AND 110 THEN (ce.valuenum - 32.0) / 1.8
END) AS val
FROM {MIMIC_SCHEMA}.chartevents ce
JOIN icu_pts ip ON ce.hadm_id=ip.hadm_id
JOIN {MIMIC_SCHEMA}.icustays icu ON ce.icustay_id=icu.icustay_id
WHERE ce.itemid IN (676, 223762, 678, 223761)
AND ce.valuenum IS NOT NULL
AND (ce.valuenum BETWEEN 28 AND 43 OR ce.valuenum BETWEEN 82 AND 110)
AND ce.charttime BETWEEN icu.intime AND icu.intime + INTERVAL '24 hours'
GROUP BY ce.hadm_id),
ne_all AS (
SELECT ie.hadm_id, ie.icustay_id, ie.rate, ie.starttime AS evttime
FROM {MIMIC_SCHEMA}.inputevents_mv ie
WHERE ie.itemid IN ({ne_mv}) AND ie.rate>0
UNION ALL
SELECT ie.hadm_id, ie.icustay_id, ie.rate, ie.charttime AS evttime
FROM {MIMIC_SCHEMA}.inputevents_cv ie
WHERE ie.itemid IN ({ne_cv}) AND ie.rate>0),
ne AS (SELECT ie.hadm_id,MAX(ie.rate) AS val FROM ne_all ie JOIN icu_pts ip ON ie.hadm_id=ip.hadm_id JOIN {MIMIC_SCHEMA}.icustays icu ON ie.icustay_id=icu.icustay_id WHERE ie.evttime BETWEEN icu.intime AND icu.intime + INTERVAL '24 hours' GROUP BY ie.hadm_id)
SELECT ip.hadm_id,ip.died,ip.sapsii,ip.saps_prob,
ll.val AS lactate,lk.val AS creatinine,lp.val AS ph,lt.val AS troponin,lh.val AS hemoglobin,
chr.val AS heart_rate,cma.val AS map_bp,csp.val AS spo2,cte.val AS temperature,ne.val AS ne_dose
chr_.val AS heart_rate,cma.val AS map_bp,csp.val AS spo2,cte.val AS temperature,ne.val AS ne_dose
FROM icu_pts ip
LEFT JOIN l_lac ll ON ip.hadm_id=ll.hadm_id LEFT JOIN l_krea lk ON ip.hadm_id=lk.hadm_id
LEFT JOIN l_ph lp ON ip.hadm_id=lp.hadm_id LEFT JOIN l_trop lt ON ip.hadm_id=lt.hadm_id
LEFT JOIN l_hb lh ON ip.hadm_id=lh.hadm_id LEFT JOIN c_hr chr ON ip.hadm_id=chr.hadm_id
LEFT JOIN l_hb lh ON ip.hadm_id=lh.hadm_id LEFT JOIN c_hr chr_ ON ip.hadm_id=chr_.hadm_id
LEFT JOIN c_map cma ON ip.hadm_id=cma.hadm_id LEFT JOIN c_spo2 csp ON ip.hadm_id=csp.hadm_id
LEFT JOIN c_temp cte ON ip.hadm_id=cte.hadm_id LEFT JOIN ne ON ip.hadm_id=ne.hadm_id"""
rows=run_bq(sql)
rows=run_pg(sql)
pts=[{k:r.get(k) for k in ["hadm_id","died","sapsii","saps_prob"]+PARAM_KEYS}
for r in rows if sum(1 for k in PARAM_KEYS if r.get(k) is not None)>=3 and r.get("died") is not None]
print(f" -> {len(pts)} patients"); return pts
@@ -135,10 +192,13 @@ def assign_galaxies(pts):
hids=[p["hadm_id"] for p in pts];ps=defaultdict(set)
for i in range(0,len(hids),10000):
chunk=hids[i:i+10000]
for r in run_bq(f"SELECT hadm_id,icd_code,icd_version FROM `{DATA_PROJECT}.mimiciv_3_1_hosp.diagnoses_icd` WHERE hadm_id IN ({','.join(str(h) for h in chunk)})"):
# MIMIC-III v1.3 only carries ICD-9 codes (column `icd9_code`).
for r in run_pg(f"SELECT hadm_id,icd9_code FROM {MIMIC_SCHEMA}.diagnoses_icd WHERE hadm_id IN ({','.join(str(h) for h in chunk)})"):
code = r.get("icd9_code")
if code is None: continue
for sk,sd in SYNDROME_ICDS.items():
for rc in sd.get(f"icd_{r['icd_version']}",[]):
if r["icd_code"].startswith(rc): ps[r["hadm_id"]].add(sk);break
for rc in sd.get("icd_9",[]):
if code.startswith(rc): ps[r["hadm_id"]].add(sk);break
for p in pts:
p["galaxy"]=None
for g in GALAXY_PRIORITY:
@@ -147,14 +207,33 @@ def assign_galaxies(pts):
def load_therapy_hadmids(tkey):
t=THERAPIES[tkey]
if tkey=="ne_high":
return set(r["hadm_id"] for r in run_bq(f"SELECT DISTINCT ie.hadm_id FROM `{DATA_PROJECT}.mimiciv_3_1_icu.inputevents` ie JOIN `{DATA_PROJECT}.mimiciv_3_1_icu.icustays` icu ON ie.stay_id=icu.stay_id WHERE ie.itemid={NE_ITEMID} AND ie.rate>=0.5 AND ie.starttime BETWEEN icu.intime AND TIMESTAMP_ADD(icu.intime,INTERVAL 24 HOUR)"))
ne_mv = ",".join(str(i) for i in NE_ITEMIDS_MV)
ne_cv = ",".join(str(i) for i in NE_ITEMIDS_CV)
sql = f"""
SELECT DISTINCT ie.hadm_id
FROM {MIMIC_SCHEMA}.inputevents_mv ie
JOIN {MIMIC_SCHEMA}.icustays icu ON ie.icustay_id=icu.icustay_id
WHERE ie.itemid IN ({ne_mv}) AND ie.rate>=0.5
AND ie.starttime BETWEEN icu.intime AND icu.intime + INTERVAL '24 hours'
UNION
SELECT DISTINCT ie.hadm_id
FROM {MIMIC_SCHEMA}.inputevents_cv ie
JOIN {MIMIC_SCHEMA}.icustays icu ON ie.icustay_id=icu.icustay_id
WHERE ie.itemid IN ({ne_cv}) AND ie.rate>=0.5
AND ie.charttime BETWEEN icu.intime AND icu.intime + INTERVAL '24 hours'
"""
return set(r["hadm_id"] for r in run_pg(sql))
clauses=[]
# MIMIC-III splits inputevents across MetaVision (starttime) and CareVue
# (charttime); we have to query both and UNION the hadm_ids.
for d in t.get("drugs_input",[]):
clauses.append(f"SELECT DISTINCT ie.hadm_id FROM `{DATA_PROJECT}.mimiciv_3_1_icu.inputevents` ie JOIN `{DATA_PROJECT}.mimiciv_3_1_icu.d_items` di ON ie.itemid=di.itemid JOIN `{DATA_PROJECT}.mimiciv_3_1_icu.icustays` icu ON ie.stay_id=icu.stay_id WHERE di.label LIKE '%{d}%' AND ie.starttime BETWEEN icu.intime AND TIMESTAMP_ADD(icu.intime,INTERVAL 24 HOUR)")
clauses.append(f"SELECT DISTINCT ie.hadm_id FROM {MIMIC_SCHEMA}.inputevents_mv ie JOIN {MIMIC_SCHEMA}.d_items di ON ie.itemid=di.itemid JOIN {MIMIC_SCHEMA}.icustays icu ON ie.icustay_id=icu.icustay_id WHERE di.label ILIKE '%{d}%' AND ie.starttime BETWEEN icu.intime AND icu.intime + INTERVAL '24 hours'")
clauses.append(f"SELECT DISTINCT ie.hadm_id FROM {MIMIC_SCHEMA}.inputevents_cv ie JOIN {MIMIC_SCHEMA}.d_items di ON ie.itemid=di.itemid JOIN {MIMIC_SCHEMA}.icustays icu ON ie.icustay_id=icu.icustay_id WHERE di.label ILIKE '%{d}%' AND ie.charttime BETWEEN icu.intime AND icu.intime + INTERVAL '24 hours'")
# MIMIC-III prescriptions uses DATE-precision `startdate` (not `starttime`).
for d in t.get("drugs_rx",[]):
clauses.append(f"SELECT DISTINCT p.hadm_id FROM `{DATA_PROJECT}.mimiciv_3_1_hosp.prescriptions` p JOIN `{DATA_PROJECT}.mimiciv_3_1_icu.icustays` icu ON p.hadm_id=icu.hadm_id WHERE p.drug LIKE '%{d}%' AND p.starttime BETWEEN icu.intime AND TIMESTAMP_ADD(icu.intime,INTERVAL 24 HOUR)")
clauses.append(f"SELECT DISTINCT p.hadm_id FROM {MIMIC_SCHEMA}.prescriptions p JOIN {MIMIC_SCHEMA}.icustays icu ON p.hadm_id=icu.hadm_id WHERE p.drug ILIKE '%{d}%' AND p.startdate BETWEEN icu.intime AND icu.intime + INTERVAL '24 hours'")
if not clauses: return set()
return set(r["hadm_id"] for r in run_bq(" UNION DISTINCT ".join(clauses)))
return set(r["hadm_id"] for r in run_pg(" UNION ".join(clauses)))
def run_loo(test_pts,ref_pts,therapy_hids,by_gal,label):
"""Returns list of {a, p, g, hadm_id} — includes hadm_id for fair comparison."""

605
sql/schemas.sql Normal file
View File

@@ -0,0 +1,605 @@
-- ------------------------------------------------------------------
-- Reference CREATE TABLE schemas for every derived table produced by
-- sql/build_sapsii.sql
-- sql/build_sepsis3.sql
--
-- This file is documentation only. The actual build scripts use
-- `DROP TABLE IF EXISTS ...; CREATE TABLE ... AS SELECT ...`, so
-- column types are inferred by PostgreSQL at build time from the
-- MIMIC-III v1.3 base schema and from the expressions in the SELECT.
-- The types below match what PostgreSQL infers when the build is run
-- on a stock MIMIC-III v1.3 PostgreSQL restore (where for example
-- chartevents.valuenum is DOUBLE PRECISION, outputevents.value is
-- DOUBLE PRECISION, *.charttime is TIMESTAMP(0), etc.).
--
-- Use this file as:
-- * a quick reference for column names and types of each derived
-- table (handy for downstream consumers that need to know the
-- output schema without grep'ing through the build SQL);
-- * a stub for declaring empty derived tables ahead of time (e.g.
-- in a migration that just `CREATE TABLE IF NOT EXISTS ...`s
-- them, then later runs the build to populate them);
-- * a checklist when porting these scripts to another flavor of
-- MIMIC (e.g. MIMIC-III v1.4 or MIMIC-IV).
-- ------------------------------------------------------------------
-- ==================================================================
-- SAPS-II
-- ==================================================================
-- 1. Helper: all-time urine output (from outputevents).
DROP TABLE IF EXISTS urine_output;
CREATE TABLE urine_output (
icustay_id INTEGER,
charttime TIMESTAMP(0),
value DOUBLE PRECISION
);
-- 2. Ventilation: classification (per charttime) and durations
-- (per ventilation episode).
DROP TABLE IF EXISTS ventilation_classification;
CREATE TABLE ventilation_classification (
icustay_id INTEGER,
charttime TIMESTAMP(0),
mechvent INTEGER,
oxygentherapy INTEGER,
extubated INTEGER,
selfextubated INTEGER
);
DROP TABLE IF EXISTS ventilation_durations;
CREATE TABLE ventilation_durations (
icustay_id INTEGER,
ventnum BIGINT,
starttime TIMESTAMP(0),
endtime TIMESTAMP(0),
duration_hours NUMERIC
);
-- 3. First-day pivots feeding SAPS-II.
DROP TABLE IF EXISTS blood_gas_first_day;
CREATE TABLE blood_gas_first_day (
subject_id INTEGER,
hadm_id INTEGER,
icustay_id INTEGER,
charttime TIMESTAMP(0),
specimen VARCHAR(200),
aado2 DOUBLE PRECISION,
baseexcess DOUBLE PRECISION,
bicarbonate DOUBLE PRECISION,
totalco2 DOUBLE PRECISION,
carboxyhemoglobin DOUBLE PRECISION,
chloride DOUBLE PRECISION,
calcium DOUBLE PRECISION,
glucose DOUBLE PRECISION,
hematocrit DOUBLE PRECISION,
hemoglobin DOUBLE PRECISION,
intubated DOUBLE PRECISION,
lactate DOUBLE PRECISION,
methemoglobin DOUBLE PRECISION,
o2flow DOUBLE PRECISION,
fio2 DOUBLE PRECISION,
so2 DOUBLE PRECISION,
pco2 DOUBLE PRECISION,
peep DOUBLE PRECISION,
ph DOUBLE PRECISION,
po2 DOUBLE PRECISION,
potassium DOUBLE PRECISION,
requiredo2 DOUBLE PRECISION,
sodium DOUBLE PRECISION,
temperature DOUBLE PRECISION,
tidalvolume DOUBLE PRECISION,
ventilationrate DOUBLE PRECISION,
ventilator DOUBLE PRECISION
);
DROP TABLE IF EXISTS blood_gas_first_day_arterial;
CREATE TABLE blood_gas_first_day_arterial (
subject_id INTEGER,
hadm_id INTEGER,
icustay_id INTEGER,
charttime TIMESTAMP(0),
specimen VARCHAR(200),
specimen_pred VARCHAR(200),
specimen_prob DOUBLE PRECISION,
so2 DOUBLE PRECISION,
spo2 DOUBLE PRECISION,
po2 DOUBLE PRECISION,
pco2 DOUBLE PRECISION,
fio2_chartevents DOUBLE PRECISION,
fio2 DOUBLE PRECISION,
aado2 DOUBLE PRECISION,
aado2_calc DOUBLE PRECISION,
pao2fio2 DOUBLE PRECISION,
ph DOUBLE PRECISION,
baseexcess DOUBLE PRECISION,
bicarbonate DOUBLE PRECISION,
totalco2 DOUBLE PRECISION,
hematocrit DOUBLE PRECISION,
hemoglobin DOUBLE PRECISION,
carboxyhemoglobin DOUBLE PRECISION,
methemoglobin DOUBLE PRECISION,
chloride DOUBLE PRECISION,
calcium DOUBLE PRECISION,
temperature DOUBLE PRECISION,
potassium DOUBLE PRECISION,
sodium DOUBLE PRECISION,
lactate DOUBLE PRECISION,
glucose DOUBLE PRECISION,
intubated DOUBLE PRECISION,
tidalvolume DOUBLE PRECISION,
ventilationrate DOUBLE PRECISION,
ventilator DOUBLE PRECISION,
peep DOUBLE PRECISION,
o2flow DOUBLE PRECISION,
requiredo2 DOUBLE PRECISION
);
DROP TABLE IF EXISTS gcs_first_day;
CREATE TABLE gcs_first_day (
subject_id INTEGER,
hadm_id INTEGER,
icustay_id INTEGER,
mingcs DOUBLE PRECISION,
gcsmotor DOUBLE PRECISION,
gcsverbal DOUBLE PRECISION,
gcseyes DOUBLE PRECISION,
endotrachflag INTEGER
);
DROP TABLE IF EXISTS labs_first_day;
CREATE TABLE labs_first_day (
subject_id INTEGER,
hadm_id INTEGER,
icustay_id INTEGER,
aniongap_min DOUBLE PRECISION,
aniongap_max DOUBLE PRECISION,
albumin_min DOUBLE PRECISION,
albumin_max DOUBLE PRECISION,
bands_min DOUBLE PRECISION,
bands_max DOUBLE PRECISION,
bicarbonate_min DOUBLE PRECISION,
bicarbonate_max DOUBLE PRECISION,
bilirubin_min DOUBLE PRECISION,
bilirubin_max DOUBLE PRECISION,
creatinine_min DOUBLE PRECISION,
creatinine_max DOUBLE PRECISION,
chloride_min DOUBLE PRECISION,
chloride_max DOUBLE PRECISION,
glucose_min DOUBLE PRECISION,
glucose_max DOUBLE PRECISION,
hematocrit_min DOUBLE PRECISION,
hematocrit_max DOUBLE PRECISION,
hemoglobin_min DOUBLE PRECISION,
hemoglobin_max DOUBLE PRECISION,
lactate_min DOUBLE PRECISION,
lactate_max DOUBLE PRECISION,
platelet_min DOUBLE PRECISION,
platelet_max DOUBLE PRECISION,
potassium_min DOUBLE PRECISION,
potassium_max DOUBLE PRECISION,
ptt_min DOUBLE PRECISION,
ptt_max DOUBLE PRECISION,
inr_min DOUBLE PRECISION,
inr_max DOUBLE PRECISION,
pt_min DOUBLE PRECISION,
pt_max DOUBLE PRECISION,
sodium_min DOUBLE PRECISION,
sodium_max DOUBLE PRECISION,
bun_min DOUBLE PRECISION,
bun_max DOUBLE PRECISION,
wbc_min DOUBLE PRECISION,
wbc_max DOUBLE PRECISION
);
DROP TABLE IF EXISTS urine_output_first_day;
CREATE TABLE urine_output_first_day (
subject_id INTEGER,
hadm_id INTEGER,
icustay_id INTEGER,
urineoutput DOUBLE PRECISION
);
DROP TABLE IF EXISTS vitals_first_day;
CREATE TABLE vitals_first_day (
subject_id INTEGER,
hadm_id INTEGER,
icustay_id INTEGER,
heartrate_min DOUBLE PRECISION,
heartrate_max DOUBLE PRECISION,
heartrate_mean DOUBLE PRECISION,
sysbp_min DOUBLE PRECISION,
sysbp_max DOUBLE PRECISION,
sysbp_mean DOUBLE PRECISION,
diasbp_min DOUBLE PRECISION,
diasbp_max DOUBLE PRECISION,
diasbp_mean DOUBLE PRECISION,
meanbp_min DOUBLE PRECISION,
meanbp_max DOUBLE PRECISION,
meanbp_mean DOUBLE PRECISION,
resprate_min DOUBLE PRECISION,
resprate_max DOUBLE PRECISION,
resprate_mean DOUBLE PRECISION,
tempc_min DOUBLE PRECISION,
tempc_max DOUBLE PRECISION,
tempc_mean DOUBLE PRECISION,
spo2_min DOUBLE PRECISION,
spo2_max DOUBLE PRECISION,
spo2_mean DOUBLE PRECISION,
glucose_min DOUBLE PRECISION,
glucose_max DOUBLE PRECISION,
glucose_mean DOUBLE PRECISION
);
-- 4. Final SAPS-II score table (one row per ICU stay).
DROP TABLE IF EXISTS sapsii;
CREATE TABLE sapsii (
subject_id INTEGER,
hadm_id INTEGER,
icustay_id INTEGER,
sapsii INTEGER,
sapsii_prob DOUBLE PRECISION,
age_score INTEGER,
hr_score INTEGER,
sysbp_score INTEGER,
temp_score INTEGER,
pao2fio2_score INTEGER,
uo_score INTEGER,
bun_score INTEGER,
wbc_score INTEGER,
potassium_score INTEGER,
sodium_score INTEGER,
bicarbonate_score INTEGER,
bilirubin_score INTEGER,
gcs_score INTEGER,
comorbidity_score INTEGER,
admissiontype_score INTEGER
);
-- ==================================================================
-- Sepsis-3
-- ==================================================================
--
-- Sepsis-3 reuses these SAPS-II tables:
-- urine_output, ventilation_classification, ventilation_durations
-- (defined above). The tables below are the ones added by
-- build_sepsis3.sql.
-- 1. Echo extraction (used to impute weight when chartevents weight
-- is missing; also keyed by ROW_ID to the noteevents row).
DROP TABLE IF EXISTS echo_data;
CREATE TABLE echo_data (
row_id INTEGER,
subject_id INTEGER,
hadm_id INTEGER,
chartdate TIMESTAMP(0),
charttime TIMESTAMP(3),
indication TEXT,
height NUMERIC,
weight NUMERIC,
bsa NUMERIC,
bp TEXT,
bpsys NUMERIC,
bpdias NUMERIC,
hr NUMERIC,
status TEXT,
test TEXT,
doppler TEXT,
contrast TEXT,
technicalquality TEXT
);
-- 2. Per-stay weight durations (admit + daily + neonate + echo
-- imputed); used for mcg/kg/min vasopressor unit conversion.
DROP TABLE IF EXISTS weight_durations;
CREATE TABLE weight_durations (
icustay_id INTEGER,
starttime TIMESTAMP(0),
endtime TIMESTAMP(0),
weight DOUBLE PRECISION
);
-- 3. Vasopressor dose tables. All four have the same schema; rates
-- are merged CareVue + MetaVision and converted to mcg/kg/min.
DROP TABLE IF EXISTS dobutamine_dose;
CREATE TABLE dobutamine_dose (
icustay_id INTEGER,
starttime TIMESTAMP(0),
endtime TIMESTAMP(0),
vaso_rate DOUBLE PRECISION,
vaso_amount DOUBLE PRECISION
);
DROP TABLE IF EXISTS dopamine_dose;
CREATE TABLE dopamine_dose (
icustay_id INTEGER,
starttime TIMESTAMP(0),
endtime TIMESTAMP(0),
vaso_rate DOUBLE PRECISION,
vaso_amount DOUBLE PRECISION
);
DROP TABLE IF EXISTS epinephrine_dose;
CREATE TABLE epinephrine_dose (
icustay_id INTEGER,
starttime TIMESTAMP(0),
endtime TIMESTAMP(0),
vaso_rate DOUBLE PRECISION,
vaso_amount DOUBLE PRECISION
);
DROP TABLE IF EXISTS norepinephrine_dose;
CREATE TABLE norepinephrine_dose (
icustay_id INTEGER,
starttime TIMESTAMP(0),
endtime TIMESTAMP(0),
vaso_rate DOUBLE PRECISION,
vaso_amount DOUBLE PRECISION
);
-- 4. All-time pivots feeding hourly SOFA.
DROP TABLE IF EXISTS blood_gas_arterial;
CREATE TABLE blood_gas_arterial (
subject_id INTEGER,
hadm_id INTEGER,
icustay_id INTEGER,
charttime TIMESTAMP(0),
specimen VARCHAR(200),
specimen_pred VARCHAR(200),
specimen_prob DOUBLE PRECISION,
so2 DOUBLE PRECISION,
spo2 DOUBLE PRECISION,
po2 DOUBLE PRECISION,
pco2 DOUBLE PRECISION,
fio2_chartevents DOUBLE PRECISION,
fio2 DOUBLE PRECISION,
aado2 DOUBLE PRECISION,
aado2_calc DOUBLE PRECISION,
pao2fio2 DOUBLE PRECISION,
ph DOUBLE PRECISION,
baseexcess DOUBLE PRECISION,
bicarbonate DOUBLE PRECISION,
totalco2 DOUBLE PRECISION,
hematocrit DOUBLE PRECISION,
hemoglobin DOUBLE PRECISION,
carboxyhemoglobin DOUBLE PRECISION,
methemoglobin DOUBLE PRECISION,
chloride DOUBLE PRECISION,
calcium DOUBLE PRECISION,
temperature DOUBLE PRECISION,
potassium DOUBLE PRECISION,
sodium DOUBLE PRECISION,
lactate DOUBLE PRECISION,
glucose DOUBLE PRECISION,
intubated DOUBLE PRECISION,
tidalvolume DOUBLE PRECISION,
ventilationrate DOUBLE PRECISION,
ventilator DOUBLE PRECISION,
peep DOUBLE PRECISION,
o2flow DOUBLE PRECISION,
requiredo2 DOUBLE PRECISION
);
DROP TABLE IF EXISTS gcs_all;
CREATE TABLE gcs_all (
icustay_id INTEGER,
charttime TIMESTAMP(0),
gcs DOUBLE PRECISION,
endotrachflag INTEGER
);
-- 5. Hourly SOFA pipeline. Each measurement class is materialised
-- into a narrow staging table keyed by (icustay_id, hr); these
-- are kept (not dropped) so each stage can be inspected with
-- EXPLAIN ANALYZE.
-- 5a. Hourly grid (one row per ICU hour per stay).
DROP TABLE IF EXISTS sofa_grid;
CREATE TABLE sofa_grid (
subject_id INTEGER,
hadm_id INTEGER,
icustay_id INTEGER,
hr INTEGER,
starttime TIMESTAMP(0),
endtime TIMESTAMP(0)
);
-- 5b. MAP minimum within each hour.
DROP TABLE IF EXISTS sofa_vs;
CREATE TABLE sofa_vs (
icustay_id INTEGER,
hr INTEGER,
meanbp_min DOUBLE PRECISION
);
-- 5c. GCS minimum within each hour (from gcs_all, which already has
-- the carry-forward and ET-trach=15 rules applied).
DROP TABLE IF EXISTS sofa_gcs;
CREATE TABLE sofa_gcs (
icustay_id INTEGER,
hr INTEGER,
gcs_min DOUBLE PRECISION
);
-- 5d. Bilirubin maximum within each hour.
DROP TABLE IF EXISTS sofa_bili;
CREATE TABLE sofa_bili (
icustay_id INTEGER,
hr INTEGER,
bilirubin_max DOUBLE PRECISION
);
-- 5e. Creatinine maximum within each hour.
DROP TABLE IF EXISTS sofa_cr;
CREATE TABLE sofa_cr (
icustay_id INTEGER,
hr INTEGER,
creatinine_max DOUBLE PRECISION
);
-- 5f. Platelet minimum within each hour.
DROP TABLE IF EXISTS sofa_plt;
CREATE TABLE sofa_plt (
icustay_id INTEGER,
hr INTEGER,
platelet_min DOUBLE PRECISION
);
-- 5g. PaO2/FiO2: split into vent / no-vent based on whether an
-- active ventilation episode covered the blood gas.
DROP TABLE IF EXISTS sofa_pf;
CREATE TABLE sofa_pf (
icustay_id INTEGER,
hr INTEGER,
pao2fio2_novent DOUBLE PRECISION,
pao2fio2_vent DOUBLE PRECISION
);
-- 5h. Urine output rolling sum + count of distinct charted hours
-- within the past 24 h.
DROP TABLE IF EXISTS sofa_uo;
CREATE TABLE sofa_uo (
icustay_id INTEGER,
hr INTEGER,
uo_24hr DOUBLE PRECISION,
uo_tm_24hr BIGINT
);
-- 5i. Vasopressor rate snapshot at the hour boundary.
DROP TABLE IF EXISTS sofa_vaso;
CREATE TABLE sofa_vaso (
icustay_id INTEGER,
hr INTEGER,
rate_epinephrine DOUBLE PRECISION,
rate_norepinephrine DOUBLE PRECISION,
rate_dopamine DOUBLE PRECISION,
rate_dobutamine DOUBLE PRECISION
);
-- 5j. Wide assembly: grid LEFT JOINed onto every measurement table.
DROP TABLE IF EXISTS sofa_wide;
CREATE TABLE sofa_wide (
subject_id INTEGER,
hadm_id INTEGER,
icustay_id INTEGER,
hr INTEGER,
starttime TIMESTAMP(0),
endtime TIMESTAMP(0),
meanbp_min DOUBLE PRECISION,
gcs_min DOUBLE PRECISION,
bilirubin_max DOUBLE PRECISION,
creatinine_max DOUBLE PRECISION,
platelet_min DOUBLE PRECISION,
pao2fio2_novent DOUBLE PRECISION,
pao2fio2_vent DOUBLE PRECISION,
uo_24hr DOUBLE PRECISION,
uo_tm_24hr BIGINT,
rate_epinephrine DOUBLE PRECISION,
rate_norepinephrine DOUBLE PRECISION,
rate_dopamine DOUBLE PRECISION,
rate_dobutamine DOUBLE PRECISION
);
-- 5k. Per-hour component scores (no rolling window yet).
DROP TABLE IF EXISTS sofa_components;
CREATE TABLE sofa_components (
subject_id INTEGER,
hadm_id INTEGER,
icustay_id INTEGER,
hr INTEGER,
starttime TIMESTAMP(0),
endtime TIMESTAMP(0),
meanbp_min DOUBLE PRECISION,
gcs_min DOUBLE PRECISION,
bilirubin_max DOUBLE PRECISION,
creatinine_max DOUBLE PRECISION,
platelet_min DOUBLE PRECISION,
pao2fio2_novent DOUBLE PRECISION,
pao2fio2_vent DOUBLE PRECISION,
uo_24hr DOUBLE PRECISION,
uo_tm_24hr BIGINT,
rate_epinephrine DOUBLE PRECISION,
rate_norepinephrine DOUBLE PRECISION,
rate_dopamine DOUBLE PRECISION,
rate_dobutamine DOUBLE PRECISION,
respiration INTEGER,
coagulation INTEGER,
liver INTEGER,
cardiovascular INTEGER,
cns INTEGER,
renal INTEGER
);
-- 5l. Final hourly SOFA: 24-hour rolling MAX per component, summed.
DROP TABLE IF EXISTS sofa_hourly;
CREATE TABLE sofa_hourly (
subject_id INTEGER,
hadm_id INTEGER,
icustay_id INTEGER,
hr INTEGER,
starttime TIMESTAMP(0),
endtime TIMESTAMP(0),
respiration INTEGER,
coagulation INTEGER,
liver INTEGER,
cardiovascular INTEGER,
cns INTEGER,
renal INTEGER,
respiration_24hours INTEGER,
coagulation_24hours INTEGER,
liver_24hours INTEGER,
cardiovascular_24hours INTEGER,
cns_24hours INTEGER,
renal_24hours INTEGER,
sofa_24hours INTEGER
);
-- 6. Suspicion of infection.
DROP TABLE IF EXISTS antibiotic;
CREATE TABLE antibiotic (
subject_id INTEGER,
hadm_id INTEGER,
icustay_id INTEGER,
antibiotic VARCHAR(255),
route VARCHAR(120),
-- MIMIC-III prescriptions has DATE-precision startdate / enddate
-- (stored as TIMESTAMP(0) but always at 00:00:00).
starttime TIMESTAMP(0),
stoptime TIMESTAMP(0)
);
DROP TABLE IF EXISTS suspicion_of_infection;
CREATE TABLE suspicion_of_infection (
subject_id INTEGER,
icustay_id INTEGER,
hadm_id INTEGER,
ab_id BIGINT,
antibiotic VARCHAR(255),
antibiotic_time TIMESTAMP,
suspected_infection INTEGER,
suspected_infection_time TIMESTAMP,
culture_time TIMESTAMP,
specimen VARCHAR(100),
positive_culture INTEGER
);
-- 7. Final Sepsis-3 onset table (one row per ICU stay).
DROP TABLE IF EXISTS sepsis3;
CREATE TABLE sepsis3 (
subject_id INTEGER,
icustay_id INTEGER,
antibiotic_time TIMESTAMP,
culture_time TIMESTAMP,
suspected_infection_time TIMESTAMP,
sofa_time TIMESTAMP(0),
sofa_score INTEGER,
respiration INTEGER,
coagulation INTEGER,
liver INTEGER,
cardiovascular INTEGER,
cns INTEGER,
renal INTEGER,
sepsis3 BOOLEAN
);