diff --git a/paper2_festung_teil3.py b/paper2_festung_teil3.py index e585690..d32b8b1 100644 --- a/paper2_festung_teil3.py +++ b/paper2_festung_teil3.py @@ -9,12 +9,25 @@ Benutzt die Full-Pop-Prädiktionen aus dem vorherigen Lauf. Basilakis 2026 · chicxulub.ai """ -import json, sys, math, time, random +import json, os, sys, math, time, random from collections import defaultdict -BQ_PROJECT = "goddard-gap" -DATA_PROJECT = "physionet-data" -NE_ITEMID = 221906 +# PostgreSQL connection string (libpq DSN). Override with env var. +# e.g. "host=localhost port=5432 dbname=mimic user=postgres password=..." +PG_DSN = os.environ.get("MIMIC_PG_DSN", "dbname=mimic3") +# Schema holding the stock MIMIC-III v1.3 tables (admissions, icustays, +# labevents, chartevents, inputevents_mv, inputevents_cv, prescriptions, +# diagnoses_icd, d_items, ...). +MIMIC_SCHEMA = os.environ.get("MIMIC_SCHEMA", "mimiciii") +# Schema holding the locally built derived tables (sapsii, sepsis3, ...); +# see sql/schemas.sql. Defaults to the same schema as MIMIC-III itself. +DERIVED_SCHEMA = os.environ.get("DERIVED_SCHEMA", MIMIC_SCHEMA) + +# MIMIC-III stores Norepinephrine under different itemids in CareVue +# (inputevents_cv: 30047, 30120) and MetaVision (inputevents_mv: 221906). +NE_ITEMIDS_MV = [221906] +NE_ITEMIDS_CV = [30047, 30120] + SAPS_WINDOW = 10 PARAM_KEYS = ["lactate","creatinine","ph","troponin","hemoglobin", "heart_rate","map_bp","spo2","temperature","ne_dose"] @@ -52,10 +65,24 @@ GALAXY_PRIORITY = ["sepsis","cardiogenic_shock","post_cardiac_arrest","ards", "acute_mi","aki","liver_failure","gi_bleeding","stroke","pe","dka", "heart_failure","pneumonia","copd","afib","post_cardiac_surgery"] -def run_bq(sql): - from google.cloud import bigquery - client = bigquery.Client(project=BQ_PROJECT) - return [dict(r.items()) for r in client.query(sql).result()] +_PG_CONN = None +def _pg_conn(): + global _PG_CONN + if _PG_CONN is None or getattr(_PG_CONN, "closed", 0): + import psycopg2 + _PG_CONN = psycopg2.connect(PG_DSN) + _PG_CONN.set_session(readonly=True, autocommit=True) + return _PG_CONN + +def run_pg(sql): + """Execute a read-only SQL query and return rows as list[dict].""" + import psycopg2.extras + conn = _pg_conn() + with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: + cur.execute(sql) + if cur.description is None: + return [] + return [dict(r) for r in cur.fetchall()] def auc_fast(preds): if not preds: return 0.5 @@ -99,33 +126,63 @@ def td(pv,centroid,weights): def load_all_icu(): print(" Loading ALL ICU patients...") + ne_mv = ",".join(str(i) for i in NE_ITEMIDS_MV) + ne_cv = ",".join(str(i) for i in NE_ITEMIDS_CV) sql=f"""WITH icu_pts AS ( SELECT DISTINCT a.hadm_id,a.hospital_expire_flag AS died,s.sapsii,icu.intime, s.sapsii_prob AS saps_prob - FROM `{DATA_PROJECT}.mimiciv_3_1_hosp.admissions` a - JOIN `{DATA_PROJECT}.mimiciv_3_1_icu.icustays` icu ON a.hadm_id=icu.hadm_id - JOIN `{DATA_PROJECT}.mimiciv_3_1_derived.sapsii` s ON icu.stay_id=s.stay_id + FROM {MIMIC_SCHEMA}.admissions a + JOIN {MIMIC_SCHEMA}.icustays icu ON a.hadm_id=icu.hadm_id + JOIN {DERIVED_SCHEMA}.sapsii s ON icu.icustay_id=s.icustay_id WHERE s.sapsii BETWEEN 20 AND 90), - l_lac AS (SELECT le.hadm_id,MAX(le.valuenum) AS val FROM `{DATA_PROJECT}.mimiciv_3_1_hosp.labevents` le JOIN icu_pts ip ON le.hadm_id=ip.hadm_id WHERE le.itemid=50813 AND le.valuenum IS NOT NULL AND le.charttime BETWEEN ip.intime AND TIMESTAMP_ADD(ip.intime,INTERVAL 24 HOUR) GROUP BY le.hadm_id), - l_krea AS (SELECT le.hadm_id,MAX(le.valuenum) AS val FROM `{DATA_PROJECT}.mimiciv_3_1_hosp.labevents` le JOIN icu_pts ip ON le.hadm_id=ip.hadm_id WHERE le.itemid=50912 AND le.valuenum IS NOT NULL AND le.charttime BETWEEN ip.intime AND TIMESTAMP_ADD(ip.intime,INTERVAL 24 HOUR) GROUP BY le.hadm_id), - l_ph AS (SELECT le.hadm_id,MIN(le.valuenum) AS val FROM `{DATA_PROJECT}.mimiciv_3_1_hosp.labevents` le JOIN icu_pts ip ON le.hadm_id=ip.hadm_id WHERE le.itemid IN (50820,50831) AND le.valuenum IS NOT NULL AND le.charttime BETWEEN ip.intime AND TIMESTAMP_ADD(ip.intime,INTERVAL 24 HOUR) GROUP BY le.hadm_id), - l_trop AS (SELECT le.hadm_id,MAX(le.valuenum) AS val FROM `{DATA_PROJECT}.mimiciv_3_1_hosp.labevents` le JOIN icu_pts ip ON le.hadm_id=ip.hadm_id WHERE le.itemid IN (51002,51003) AND le.valuenum IS NOT NULL AND le.charttime BETWEEN ip.intime AND TIMESTAMP_ADD(ip.intime,INTERVAL 24 HOUR) GROUP BY le.hadm_id), - l_hb AS (SELECT le.hadm_id,MIN(le.valuenum) AS val FROM `{DATA_PROJECT}.mimiciv_3_1_hosp.labevents` le JOIN icu_pts ip ON le.hadm_id=ip.hadm_id WHERE le.itemid=51222 AND le.valuenum IS NOT NULL AND le.charttime BETWEEN ip.intime AND TIMESTAMP_ADD(ip.intime,INTERVAL 24 HOUR) GROUP BY le.hadm_id), - c_hr AS (SELECT ce.hadm_id,MAX(ce.valuenum) AS val FROM `{DATA_PROJECT}.mimiciv_3_1_icu.chartevents` ce JOIN icu_pts ip ON ce.hadm_id=ip.hadm_id JOIN `{DATA_PROJECT}.mimiciv_3_1_icu.icustays` icu ON ce.stay_id=icu.stay_id WHERE ce.itemid=220045 AND ce.valuenum BETWEEN 20 AND 250 AND ce.charttime BETWEEN icu.intime AND TIMESTAMP_ADD(icu.intime,INTERVAL 24 HOUR) GROUP BY ce.hadm_id), - c_map AS (SELECT ce.hadm_id,MIN(ce.valuenum) AS val FROM `{DATA_PROJECT}.mimiciv_3_1_icu.chartevents` ce JOIN icu_pts ip ON ce.hadm_id=ip.hadm_id JOIN `{DATA_PROJECT}.mimiciv_3_1_icu.icustays` icu ON ce.stay_id=icu.stay_id WHERE ce.itemid IN (220052,220181,225312) AND ce.valuenum BETWEEN 20 AND 200 AND ce.charttime BETWEEN icu.intime AND TIMESTAMP_ADD(icu.intime,INTERVAL 24 HOUR) GROUP BY ce.hadm_id), - c_spo2 AS (SELECT ce.hadm_id,MIN(ce.valuenum) AS val FROM `{DATA_PROJECT}.mimiciv_3_1_icu.chartevents` ce JOIN icu_pts ip ON ce.hadm_id=ip.hadm_id JOIN `{DATA_PROJECT}.mimiciv_3_1_icu.icustays` icu ON ce.stay_id=icu.stay_id WHERE ce.itemid=220277 AND ce.valuenum BETWEEN 50 AND 100 AND ce.charttime BETWEEN icu.intime AND TIMESTAMP_ADD(icu.intime,INTERVAL 24 HOUR) GROUP BY ce.hadm_id), - c_temp AS (SELECT ce.hadm_id,MIN(ce.valuenum) AS val FROM `{DATA_PROJECT}.mimiciv_3_1_icu.chartevents` ce JOIN icu_pts ip ON ce.hadm_id=ip.hadm_id JOIN `{DATA_PROJECT}.mimiciv_3_1_icu.icustays` icu ON ce.stay_id=icu.stay_id WHERE ce.itemid=223762 AND ce.valuenum BETWEEN 28 AND 43 AND ce.charttime BETWEEN icu.intime AND TIMESTAMP_ADD(icu.intime,INTERVAL 24 HOUR) GROUP BY ce.hadm_id), - ne AS (SELECT ie.hadm_id,MAX(ie.rate) AS val FROM `{DATA_PROJECT}.mimiciv_3_1_icu.inputevents` ie JOIN icu_pts ip ON ie.hadm_id=ip.hadm_id JOIN `{DATA_PROJECT}.mimiciv_3_1_icu.icustays` icu ON ie.stay_id=icu.stay_id WHERE ie.itemid={NE_ITEMID} AND ie.rate>0 AND ie.starttime BETWEEN icu.intime AND TIMESTAMP_ADD(icu.intime,INTERVAL 24 HOUR) GROUP BY ie.hadm_id) + l_lac AS (SELECT le.hadm_id,MAX(le.valuenum) AS val FROM {MIMIC_SCHEMA}.labevents le JOIN icu_pts ip ON le.hadm_id=ip.hadm_id WHERE le.itemid=50813 AND le.valuenum IS NOT NULL AND le.charttime BETWEEN ip.intime AND ip.intime + INTERVAL '24 hours' GROUP BY le.hadm_id), + l_krea AS (SELECT le.hadm_id,MAX(le.valuenum) AS val FROM {MIMIC_SCHEMA}.labevents le JOIN icu_pts ip ON le.hadm_id=ip.hadm_id WHERE le.itemid=50912 AND le.valuenum IS NOT NULL AND le.charttime BETWEEN ip.intime AND ip.intime + INTERVAL '24 hours' GROUP BY le.hadm_id), + l_ph AS (SELECT le.hadm_id,MIN(le.valuenum) AS val FROM {MIMIC_SCHEMA}.labevents le JOIN icu_pts ip ON le.hadm_id=ip.hadm_id WHERE le.itemid IN (50820,50831) AND le.valuenum IS NOT NULL AND le.charttime BETWEEN ip.intime AND ip.intime + INTERVAL '24 hours' GROUP BY le.hadm_id), + l_trop AS (SELECT le.hadm_id,MAX(le.valuenum) AS val FROM {MIMIC_SCHEMA}.labevents le JOIN icu_pts ip ON le.hadm_id=ip.hadm_id WHERE le.itemid IN (51002,51003) AND le.valuenum IS NOT NULL AND le.charttime BETWEEN ip.intime AND ip.intime + INTERVAL '24 hours' GROUP BY le.hadm_id), + l_hb AS (SELECT le.hadm_id,MIN(le.valuenum) AS val FROM {MIMIC_SCHEMA}.labevents le JOIN icu_pts ip ON le.hadm_id=ip.hadm_id WHERE le.itemid=51222 AND le.valuenum IS NOT NULL AND le.charttime BETWEEN ip.intime AND ip.intime + INTERVAL '24 hours' GROUP BY le.hadm_id), + c_hr AS (SELECT ce.hadm_id,MAX(ce.valuenum) AS val FROM {MIMIC_SCHEMA}.chartevents ce JOIN icu_pts ip ON ce.hadm_id=ip.hadm_id JOIN {MIMIC_SCHEMA}.icustays icu ON ce.icustay_id=icu.icustay_id WHERE ce.itemid IN (211,220045) AND ce.valuenum BETWEEN 20 AND 250 AND ce.charttime BETWEEN icu.intime AND icu.intime + INTERVAL '24 hours' GROUP BY ce.hadm_id), + c_map AS (SELECT ce.hadm_id,MIN(ce.valuenum) AS val FROM {MIMIC_SCHEMA}.chartevents ce JOIN icu_pts ip ON ce.hadm_id=ip.hadm_id JOIN {MIMIC_SCHEMA}.icustays icu ON ce.icustay_id=icu.icustay_id WHERE ce.itemid IN (52,456,6702,220052,220181,225312) AND ce.valuenum BETWEEN 20 AND 200 AND ce.charttime BETWEEN icu.intime AND icu.intime + INTERVAL '24 hours' GROUP BY ce.hadm_id), + c_spo2 AS (SELECT ce.hadm_id,MIN(ce.valuenum) AS val FROM {MIMIC_SCHEMA}.chartevents ce JOIN icu_pts ip ON ce.hadm_id=ip.hadm_id JOIN {MIMIC_SCHEMA}.icustays icu ON ce.icustay_id=icu.icustay_id WHERE ce.itemid IN (646,220277) AND ce.valuenum BETWEEN 50 AND 100 AND ce.charttime BETWEEN icu.intime AND icu.intime + INTERVAL '24 hours' GROUP BY ce.hadm_id), + -- Temperature: pull all four MIMIC-III itemids (676/223762 nominally + -- Celsius, 678/223761 nominally Fahrenheit) and decide the unit from + -- the value itself. Plausible body temperature in C is ~28..43 and + -- in F is ~82..110; the two ranges don't overlap, so a value in the + -- F band can be safely converted to C even if it was charted under a + -- "Celsius" itemid (and vice versa). Anything outside both bands is + -- treated as sensor noise and dropped. + c_temp AS ( + SELECT ce.hadm_id, + MIN(CASE + WHEN ce.valuenum BETWEEN 28 AND 43 THEN ce.valuenum + WHEN ce.valuenum BETWEEN 82 AND 110 THEN (ce.valuenum - 32.0) / 1.8 + END) AS val + FROM {MIMIC_SCHEMA}.chartevents ce + JOIN icu_pts ip ON ce.hadm_id=ip.hadm_id + JOIN {MIMIC_SCHEMA}.icustays icu ON ce.icustay_id=icu.icustay_id + WHERE ce.itemid IN (676, 223762, 678, 223761) + AND ce.valuenum IS NOT NULL + AND (ce.valuenum BETWEEN 28 AND 43 OR ce.valuenum BETWEEN 82 AND 110) + AND ce.charttime BETWEEN icu.intime AND icu.intime + INTERVAL '24 hours' + GROUP BY ce.hadm_id), + ne_all AS ( + SELECT ie.hadm_id, ie.icustay_id, ie.rate, ie.starttime AS evttime + FROM {MIMIC_SCHEMA}.inputevents_mv ie + WHERE ie.itemid IN ({ne_mv}) AND ie.rate>0 + UNION ALL + SELECT ie.hadm_id, ie.icustay_id, ie.rate, ie.charttime AS evttime + FROM {MIMIC_SCHEMA}.inputevents_cv ie + WHERE ie.itemid IN ({ne_cv}) AND ie.rate>0), + ne AS (SELECT ie.hadm_id,MAX(ie.rate) AS val FROM ne_all ie JOIN icu_pts ip ON ie.hadm_id=ip.hadm_id JOIN {MIMIC_SCHEMA}.icustays icu ON ie.icustay_id=icu.icustay_id WHERE ie.evttime BETWEEN icu.intime AND icu.intime + INTERVAL '24 hours' GROUP BY ie.hadm_id) SELECT ip.hadm_id,ip.died,ip.sapsii,ip.saps_prob, ll.val AS lactate,lk.val AS creatinine,lp.val AS ph,lt.val AS troponin,lh.val AS hemoglobin, - chr.val AS heart_rate,cma.val AS map_bp,csp.val AS spo2,cte.val AS temperature,ne.val AS ne_dose + chr_.val AS heart_rate,cma.val AS map_bp,csp.val AS spo2,cte.val AS temperature,ne.val AS ne_dose FROM icu_pts ip LEFT JOIN l_lac ll ON ip.hadm_id=ll.hadm_id LEFT JOIN l_krea lk ON ip.hadm_id=lk.hadm_id LEFT JOIN l_ph lp ON ip.hadm_id=lp.hadm_id LEFT JOIN l_trop lt ON ip.hadm_id=lt.hadm_id - LEFT JOIN l_hb lh ON ip.hadm_id=lh.hadm_id LEFT JOIN c_hr chr ON ip.hadm_id=chr.hadm_id + LEFT JOIN l_hb lh ON ip.hadm_id=lh.hadm_id LEFT JOIN c_hr chr_ ON ip.hadm_id=chr_.hadm_id LEFT JOIN c_map cma ON ip.hadm_id=cma.hadm_id LEFT JOIN c_spo2 csp ON ip.hadm_id=csp.hadm_id LEFT JOIN c_temp cte ON ip.hadm_id=cte.hadm_id LEFT JOIN ne ON ip.hadm_id=ne.hadm_id""" - rows=run_bq(sql) + rows=run_pg(sql) pts=[{k:r.get(k) for k in ["hadm_id","died","sapsii","saps_prob"]+PARAM_KEYS} for r in rows if sum(1 for k in PARAM_KEYS if r.get(k) is not None)>=3 and r.get("died") is not None] print(f" -> {len(pts)} patients"); return pts @@ -135,10 +192,13 @@ def assign_galaxies(pts): hids=[p["hadm_id"] for p in pts];ps=defaultdict(set) for i in range(0,len(hids),10000): chunk=hids[i:i+10000] - for r in run_bq(f"SELECT hadm_id,icd_code,icd_version FROM `{DATA_PROJECT}.mimiciv_3_1_hosp.diagnoses_icd` WHERE hadm_id IN ({','.join(str(h) for h in chunk)})"): + # MIMIC-III v1.3 only carries ICD-9 codes (column `icd9_code`). + for r in run_pg(f"SELECT hadm_id,icd9_code FROM {MIMIC_SCHEMA}.diagnoses_icd WHERE hadm_id IN ({','.join(str(h) for h in chunk)})"): + code = r.get("icd9_code") + if code is None: continue for sk,sd in SYNDROME_ICDS.items(): - for rc in sd.get(f"icd_{r['icd_version']}",[]): - if r["icd_code"].startswith(rc): ps[r["hadm_id"]].add(sk);break + for rc in sd.get("icd_9",[]): + if code.startswith(rc): ps[r["hadm_id"]].add(sk);break for p in pts: p["galaxy"]=None for g in GALAXY_PRIORITY: @@ -147,14 +207,33 @@ def assign_galaxies(pts): def load_therapy_hadmids(tkey): t=THERAPIES[tkey] if tkey=="ne_high": - return set(r["hadm_id"] for r in run_bq(f"SELECT DISTINCT ie.hadm_id FROM `{DATA_PROJECT}.mimiciv_3_1_icu.inputevents` ie JOIN `{DATA_PROJECT}.mimiciv_3_1_icu.icustays` icu ON ie.stay_id=icu.stay_id WHERE ie.itemid={NE_ITEMID} AND ie.rate>=0.5 AND ie.starttime BETWEEN icu.intime AND TIMESTAMP_ADD(icu.intime,INTERVAL 24 HOUR)")) + ne_mv = ",".join(str(i) for i in NE_ITEMIDS_MV) + ne_cv = ",".join(str(i) for i in NE_ITEMIDS_CV) + sql = f""" + SELECT DISTINCT ie.hadm_id + FROM {MIMIC_SCHEMA}.inputevents_mv ie + JOIN {MIMIC_SCHEMA}.icustays icu ON ie.icustay_id=icu.icustay_id + WHERE ie.itemid IN ({ne_mv}) AND ie.rate>=0.5 + AND ie.starttime BETWEEN icu.intime AND icu.intime + INTERVAL '24 hours' + UNION + SELECT DISTINCT ie.hadm_id + FROM {MIMIC_SCHEMA}.inputevents_cv ie + JOIN {MIMIC_SCHEMA}.icustays icu ON ie.icustay_id=icu.icustay_id + WHERE ie.itemid IN ({ne_cv}) AND ie.rate>=0.5 + AND ie.charttime BETWEEN icu.intime AND icu.intime + INTERVAL '24 hours' + """ + return set(r["hadm_id"] for r in run_pg(sql)) clauses=[] + # MIMIC-III splits inputevents across MetaVision (starttime) and CareVue + # (charttime); we have to query both and UNION the hadm_ids. for d in t.get("drugs_input",[]): - clauses.append(f"SELECT DISTINCT ie.hadm_id FROM `{DATA_PROJECT}.mimiciv_3_1_icu.inputevents` ie JOIN `{DATA_PROJECT}.mimiciv_3_1_icu.d_items` di ON ie.itemid=di.itemid JOIN `{DATA_PROJECT}.mimiciv_3_1_icu.icustays` icu ON ie.stay_id=icu.stay_id WHERE di.label LIKE '%{d}%' AND ie.starttime BETWEEN icu.intime AND TIMESTAMP_ADD(icu.intime,INTERVAL 24 HOUR)") + clauses.append(f"SELECT DISTINCT ie.hadm_id FROM {MIMIC_SCHEMA}.inputevents_mv ie JOIN {MIMIC_SCHEMA}.d_items di ON ie.itemid=di.itemid JOIN {MIMIC_SCHEMA}.icustays icu ON ie.icustay_id=icu.icustay_id WHERE di.label ILIKE '%{d}%' AND ie.starttime BETWEEN icu.intime AND icu.intime + INTERVAL '24 hours'") + clauses.append(f"SELECT DISTINCT ie.hadm_id FROM {MIMIC_SCHEMA}.inputevents_cv ie JOIN {MIMIC_SCHEMA}.d_items di ON ie.itemid=di.itemid JOIN {MIMIC_SCHEMA}.icustays icu ON ie.icustay_id=icu.icustay_id WHERE di.label ILIKE '%{d}%' AND ie.charttime BETWEEN icu.intime AND icu.intime + INTERVAL '24 hours'") + # MIMIC-III prescriptions uses DATE-precision `startdate` (not `starttime`). for d in t.get("drugs_rx",[]): - clauses.append(f"SELECT DISTINCT p.hadm_id FROM `{DATA_PROJECT}.mimiciv_3_1_hosp.prescriptions` p JOIN `{DATA_PROJECT}.mimiciv_3_1_icu.icustays` icu ON p.hadm_id=icu.hadm_id WHERE p.drug LIKE '%{d}%' AND p.starttime BETWEEN icu.intime AND TIMESTAMP_ADD(icu.intime,INTERVAL 24 HOUR)") + clauses.append(f"SELECT DISTINCT p.hadm_id FROM {MIMIC_SCHEMA}.prescriptions p JOIN {MIMIC_SCHEMA}.icustays icu ON p.hadm_id=icu.hadm_id WHERE p.drug ILIKE '%{d}%' AND p.startdate BETWEEN icu.intime AND icu.intime + INTERVAL '24 hours'") if not clauses: return set() - return set(r["hadm_id"] for r in run_bq(" UNION DISTINCT ".join(clauses))) + return set(r["hadm_id"] for r in run_pg(" UNION ".join(clauses))) def run_loo(test_pts,ref_pts,therapy_hids,by_gal,label): """Returns list of {a, p, g, hadm_id} — includes hadm_id for fair comparison."""