From e51cb777fd34c620e691e5eb6e1f20ba17154e93 Mon Sep 17 00:00:00 2001 From: David Madl Date: Tue, 5 May 2026 10:20:07 +0200 Subject: [PATCH] add: sql schemas for sapsii and sepsis3 scores (as ported to MIMIC-III, with caveats) --- sql/schemas.sql | 605 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 605 insertions(+) create mode 100644 sql/schemas.sql diff --git a/sql/schemas.sql b/sql/schemas.sql new file mode 100644 index 0000000..197bcc6 --- /dev/null +++ b/sql/schemas.sql @@ -0,0 +1,605 @@ +-- ------------------------------------------------------------------ +-- Reference CREATE TABLE schemas for every derived table produced by +-- sql/build_sapsii.sql +-- sql/build_sepsis3.sql +-- +-- This file is documentation only. The actual build scripts use +-- `DROP TABLE IF EXISTS ...; CREATE TABLE ... AS SELECT ...`, so +-- column types are inferred by PostgreSQL at build time from the +-- MIMIC-III v1.3 base schema and from the expressions in the SELECT. +-- The types below match what PostgreSQL infers when the build is run +-- on a stock MIMIC-III v1.3 PostgreSQL restore (where for example +-- chartevents.valuenum is DOUBLE PRECISION, outputevents.value is +-- DOUBLE PRECISION, *.charttime is TIMESTAMP(0), etc.). +-- +-- Use this file as: +-- * a quick reference for column names and types of each derived +-- table (handy for downstream consumers that need to know the +-- output schema without grep'ing through the build SQL); +-- * a stub for declaring empty derived tables ahead of time (e.g. +-- in a migration that just `CREATE TABLE IF NOT EXISTS ...`s +-- them, then later runs the build to populate them); +-- * a checklist when porting these scripts to another flavor of +-- MIMIC (e.g. MIMIC-III v1.4 or MIMIC-IV). +-- ------------------------------------------------------------------ + + +-- ================================================================== +-- SAPS-II +-- ================================================================== + +-- 1. Helper: all-time urine output (from outputevents). +DROP TABLE IF EXISTS urine_output; +CREATE TABLE urine_output ( + icustay_id INTEGER, + charttime TIMESTAMP(0), + value DOUBLE PRECISION +); + +-- 2. Ventilation: classification (per charttime) and durations +-- (per ventilation episode). +DROP TABLE IF EXISTS ventilation_classification; +CREATE TABLE ventilation_classification ( + icustay_id INTEGER, + charttime TIMESTAMP(0), + mechvent INTEGER, + oxygentherapy INTEGER, + extubated INTEGER, + selfextubated INTEGER +); + +DROP TABLE IF EXISTS ventilation_durations; +CREATE TABLE ventilation_durations ( + icustay_id INTEGER, + ventnum BIGINT, + starttime TIMESTAMP(0), + endtime TIMESTAMP(0), + duration_hours NUMERIC +); + +-- 3. First-day pivots feeding SAPS-II. +DROP TABLE IF EXISTS blood_gas_first_day; +CREATE TABLE blood_gas_first_day ( + subject_id INTEGER, + hadm_id INTEGER, + icustay_id INTEGER, + charttime TIMESTAMP(0), + specimen VARCHAR(200), + aado2 DOUBLE PRECISION, + baseexcess DOUBLE PRECISION, + bicarbonate DOUBLE PRECISION, + totalco2 DOUBLE PRECISION, + carboxyhemoglobin DOUBLE PRECISION, + chloride DOUBLE PRECISION, + calcium DOUBLE PRECISION, + glucose DOUBLE PRECISION, + hematocrit DOUBLE PRECISION, + hemoglobin DOUBLE PRECISION, + intubated DOUBLE PRECISION, + lactate DOUBLE PRECISION, + methemoglobin DOUBLE PRECISION, + o2flow DOUBLE PRECISION, + fio2 DOUBLE PRECISION, + so2 DOUBLE PRECISION, + pco2 DOUBLE PRECISION, + peep DOUBLE PRECISION, + ph DOUBLE PRECISION, + po2 DOUBLE PRECISION, + potassium DOUBLE PRECISION, + requiredo2 DOUBLE PRECISION, + sodium DOUBLE PRECISION, + temperature DOUBLE PRECISION, + tidalvolume DOUBLE PRECISION, + ventilationrate DOUBLE PRECISION, + ventilator DOUBLE PRECISION +); + +DROP TABLE IF EXISTS blood_gas_first_day_arterial; +CREATE TABLE blood_gas_first_day_arterial ( + subject_id INTEGER, + hadm_id INTEGER, + icustay_id INTEGER, + charttime TIMESTAMP(0), + specimen VARCHAR(200), + specimen_pred VARCHAR(200), + specimen_prob DOUBLE PRECISION, + so2 DOUBLE PRECISION, + spo2 DOUBLE PRECISION, + po2 DOUBLE PRECISION, + pco2 DOUBLE PRECISION, + fio2_chartevents DOUBLE PRECISION, + fio2 DOUBLE PRECISION, + aado2 DOUBLE PRECISION, + aado2_calc DOUBLE PRECISION, + pao2fio2 DOUBLE PRECISION, + ph DOUBLE PRECISION, + baseexcess DOUBLE PRECISION, + bicarbonate DOUBLE PRECISION, + totalco2 DOUBLE PRECISION, + hematocrit DOUBLE PRECISION, + hemoglobin DOUBLE PRECISION, + carboxyhemoglobin DOUBLE PRECISION, + methemoglobin DOUBLE PRECISION, + chloride DOUBLE PRECISION, + calcium DOUBLE PRECISION, + temperature DOUBLE PRECISION, + potassium DOUBLE PRECISION, + sodium DOUBLE PRECISION, + lactate DOUBLE PRECISION, + glucose DOUBLE PRECISION, + intubated DOUBLE PRECISION, + tidalvolume DOUBLE PRECISION, + ventilationrate DOUBLE PRECISION, + ventilator DOUBLE PRECISION, + peep DOUBLE PRECISION, + o2flow DOUBLE PRECISION, + requiredo2 DOUBLE PRECISION +); + +DROP TABLE IF EXISTS gcs_first_day; +CREATE TABLE gcs_first_day ( + subject_id INTEGER, + hadm_id INTEGER, + icustay_id INTEGER, + mingcs DOUBLE PRECISION, + gcsmotor DOUBLE PRECISION, + gcsverbal DOUBLE PRECISION, + gcseyes DOUBLE PRECISION, + endotrachflag INTEGER +); + +DROP TABLE IF EXISTS labs_first_day; +CREATE TABLE labs_first_day ( + subject_id INTEGER, + hadm_id INTEGER, + icustay_id INTEGER, + aniongap_min DOUBLE PRECISION, + aniongap_max DOUBLE PRECISION, + albumin_min DOUBLE PRECISION, + albumin_max DOUBLE PRECISION, + bands_min DOUBLE PRECISION, + bands_max DOUBLE PRECISION, + bicarbonate_min DOUBLE PRECISION, + bicarbonate_max DOUBLE PRECISION, + bilirubin_min DOUBLE PRECISION, + bilirubin_max DOUBLE PRECISION, + creatinine_min DOUBLE PRECISION, + creatinine_max DOUBLE PRECISION, + chloride_min DOUBLE PRECISION, + chloride_max DOUBLE PRECISION, + glucose_min DOUBLE PRECISION, + glucose_max DOUBLE PRECISION, + hematocrit_min DOUBLE PRECISION, + hematocrit_max DOUBLE PRECISION, + hemoglobin_min DOUBLE PRECISION, + hemoglobin_max DOUBLE PRECISION, + lactate_min DOUBLE PRECISION, + lactate_max DOUBLE PRECISION, + platelet_min DOUBLE PRECISION, + platelet_max DOUBLE PRECISION, + potassium_min DOUBLE PRECISION, + potassium_max DOUBLE PRECISION, + ptt_min DOUBLE PRECISION, + ptt_max DOUBLE PRECISION, + inr_min DOUBLE PRECISION, + inr_max DOUBLE PRECISION, + pt_min DOUBLE PRECISION, + pt_max DOUBLE PRECISION, + sodium_min DOUBLE PRECISION, + sodium_max DOUBLE PRECISION, + bun_min DOUBLE PRECISION, + bun_max DOUBLE PRECISION, + wbc_min DOUBLE PRECISION, + wbc_max DOUBLE PRECISION +); + +DROP TABLE IF EXISTS urine_output_first_day; +CREATE TABLE urine_output_first_day ( + subject_id INTEGER, + hadm_id INTEGER, + icustay_id INTEGER, + urineoutput DOUBLE PRECISION +); + +DROP TABLE IF EXISTS vitals_first_day; +CREATE TABLE vitals_first_day ( + subject_id INTEGER, + hadm_id INTEGER, + icustay_id INTEGER, + heartrate_min DOUBLE PRECISION, + heartrate_max DOUBLE PRECISION, + heartrate_mean DOUBLE PRECISION, + sysbp_min DOUBLE PRECISION, + sysbp_max DOUBLE PRECISION, + sysbp_mean DOUBLE PRECISION, + diasbp_min DOUBLE PRECISION, + diasbp_max DOUBLE PRECISION, + diasbp_mean DOUBLE PRECISION, + meanbp_min DOUBLE PRECISION, + meanbp_max DOUBLE PRECISION, + meanbp_mean DOUBLE PRECISION, + resprate_min DOUBLE PRECISION, + resprate_max DOUBLE PRECISION, + resprate_mean DOUBLE PRECISION, + tempc_min DOUBLE PRECISION, + tempc_max DOUBLE PRECISION, + tempc_mean DOUBLE PRECISION, + spo2_min DOUBLE PRECISION, + spo2_max DOUBLE PRECISION, + spo2_mean DOUBLE PRECISION, + glucose_min DOUBLE PRECISION, + glucose_max DOUBLE PRECISION, + glucose_mean DOUBLE PRECISION +); + +-- 4. Final SAPS-II score table (one row per ICU stay). +DROP TABLE IF EXISTS sapsii; +CREATE TABLE sapsii ( + subject_id INTEGER, + hadm_id INTEGER, + icustay_id INTEGER, + sapsii INTEGER, + sapsii_prob DOUBLE PRECISION, + age_score INTEGER, + hr_score INTEGER, + sysbp_score INTEGER, + temp_score INTEGER, + pao2fio2_score INTEGER, + uo_score INTEGER, + bun_score INTEGER, + wbc_score INTEGER, + potassium_score INTEGER, + sodium_score INTEGER, + bicarbonate_score INTEGER, + bilirubin_score INTEGER, + gcs_score INTEGER, + comorbidity_score INTEGER, + admissiontype_score INTEGER +); + + +-- ================================================================== +-- Sepsis-3 +-- ================================================================== +-- +-- Sepsis-3 reuses these SAPS-II tables: +-- urine_output, ventilation_classification, ventilation_durations +-- (defined above). The tables below are the ones added by +-- build_sepsis3.sql. + +-- 1. Echo extraction (used to impute weight when chartevents weight +-- is missing; also keyed by ROW_ID to the noteevents row). +DROP TABLE IF EXISTS echo_data; +CREATE TABLE echo_data ( + row_id INTEGER, + subject_id INTEGER, + hadm_id INTEGER, + chartdate TIMESTAMP(0), + charttime TIMESTAMP(3), + indication TEXT, + height NUMERIC, + weight NUMERIC, + bsa NUMERIC, + bp TEXT, + bpsys NUMERIC, + bpdias NUMERIC, + hr NUMERIC, + status TEXT, + test TEXT, + doppler TEXT, + contrast TEXT, + technicalquality TEXT +); + +-- 2. Per-stay weight durations (admit + daily + neonate + echo +-- imputed); used for mcg/kg/min vasopressor unit conversion. +DROP TABLE IF EXISTS weight_durations; +CREATE TABLE weight_durations ( + icustay_id INTEGER, + starttime TIMESTAMP(0), + endtime TIMESTAMP(0), + weight DOUBLE PRECISION +); + +-- 3. Vasopressor dose tables. All four have the same schema; rates +-- are merged CareVue + MetaVision and converted to mcg/kg/min. +DROP TABLE IF EXISTS dobutamine_dose; +CREATE TABLE dobutamine_dose ( + icustay_id INTEGER, + starttime TIMESTAMP(0), + endtime TIMESTAMP(0), + vaso_rate DOUBLE PRECISION, + vaso_amount DOUBLE PRECISION +); + +DROP TABLE IF EXISTS dopamine_dose; +CREATE TABLE dopamine_dose ( + icustay_id INTEGER, + starttime TIMESTAMP(0), + endtime TIMESTAMP(0), + vaso_rate DOUBLE PRECISION, + vaso_amount DOUBLE PRECISION +); + +DROP TABLE IF EXISTS epinephrine_dose; +CREATE TABLE epinephrine_dose ( + icustay_id INTEGER, + starttime TIMESTAMP(0), + endtime TIMESTAMP(0), + vaso_rate DOUBLE PRECISION, + vaso_amount DOUBLE PRECISION +); + +DROP TABLE IF EXISTS norepinephrine_dose; +CREATE TABLE norepinephrine_dose ( + icustay_id INTEGER, + starttime TIMESTAMP(0), + endtime TIMESTAMP(0), + vaso_rate DOUBLE PRECISION, + vaso_amount DOUBLE PRECISION +); + +-- 4. All-time pivots feeding hourly SOFA. +DROP TABLE IF EXISTS blood_gas_arterial; +CREATE TABLE blood_gas_arterial ( + subject_id INTEGER, + hadm_id INTEGER, + icustay_id INTEGER, + charttime TIMESTAMP(0), + specimen VARCHAR(200), + specimen_pred VARCHAR(200), + specimen_prob DOUBLE PRECISION, + so2 DOUBLE PRECISION, + spo2 DOUBLE PRECISION, + po2 DOUBLE PRECISION, + pco2 DOUBLE PRECISION, + fio2_chartevents DOUBLE PRECISION, + fio2 DOUBLE PRECISION, + aado2 DOUBLE PRECISION, + aado2_calc DOUBLE PRECISION, + pao2fio2 DOUBLE PRECISION, + ph DOUBLE PRECISION, + baseexcess DOUBLE PRECISION, + bicarbonate DOUBLE PRECISION, + totalco2 DOUBLE PRECISION, + hematocrit DOUBLE PRECISION, + hemoglobin DOUBLE PRECISION, + carboxyhemoglobin DOUBLE PRECISION, + methemoglobin DOUBLE PRECISION, + chloride DOUBLE PRECISION, + calcium DOUBLE PRECISION, + temperature DOUBLE PRECISION, + potassium DOUBLE PRECISION, + sodium DOUBLE PRECISION, + lactate DOUBLE PRECISION, + glucose DOUBLE PRECISION, + intubated DOUBLE PRECISION, + tidalvolume DOUBLE PRECISION, + ventilationrate DOUBLE PRECISION, + ventilator DOUBLE PRECISION, + peep DOUBLE PRECISION, + o2flow DOUBLE PRECISION, + requiredo2 DOUBLE PRECISION +); + +DROP TABLE IF EXISTS gcs_all; +CREATE TABLE gcs_all ( + icustay_id INTEGER, + charttime TIMESTAMP(0), + gcs DOUBLE PRECISION, + endotrachflag INTEGER +); + +-- 5. Hourly SOFA pipeline. Each measurement class is materialised +-- into a narrow staging table keyed by (icustay_id, hr); these +-- are kept (not dropped) so each stage can be inspected with +-- EXPLAIN ANALYZE. + +-- 5a. Hourly grid (one row per ICU hour per stay). +DROP TABLE IF EXISTS sofa_grid; +CREATE TABLE sofa_grid ( + subject_id INTEGER, + hadm_id INTEGER, + icustay_id INTEGER, + hr INTEGER, + starttime TIMESTAMP(0), + endtime TIMESTAMP(0) +); + +-- 5b. MAP minimum within each hour. +DROP TABLE IF EXISTS sofa_vs; +CREATE TABLE sofa_vs ( + icustay_id INTEGER, + hr INTEGER, + meanbp_min DOUBLE PRECISION +); + +-- 5c. GCS minimum within each hour (from gcs_all, which already has +-- the carry-forward and ET-trach=15 rules applied). +DROP TABLE IF EXISTS sofa_gcs; +CREATE TABLE sofa_gcs ( + icustay_id INTEGER, + hr INTEGER, + gcs_min DOUBLE PRECISION +); + +-- 5d. Bilirubin maximum within each hour. +DROP TABLE IF EXISTS sofa_bili; +CREATE TABLE sofa_bili ( + icustay_id INTEGER, + hr INTEGER, + bilirubin_max DOUBLE PRECISION +); + +-- 5e. Creatinine maximum within each hour. +DROP TABLE IF EXISTS sofa_cr; +CREATE TABLE sofa_cr ( + icustay_id INTEGER, + hr INTEGER, + creatinine_max DOUBLE PRECISION +); + +-- 5f. Platelet minimum within each hour. +DROP TABLE IF EXISTS sofa_plt; +CREATE TABLE sofa_plt ( + icustay_id INTEGER, + hr INTEGER, + platelet_min DOUBLE PRECISION +); + +-- 5g. PaO2/FiO2: split into vent / no-vent based on whether an +-- active ventilation episode covered the blood gas. +DROP TABLE IF EXISTS sofa_pf; +CREATE TABLE sofa_pf ( + icustay_id INTEGER, + hr INTEGER, + pao2fio2_novent DOUBLE PRECISION, + pao2fio2_vent DOUBLE PRECISION +); + +-- 5h. Urine output rolling sum + count of distinct charted hours +-- within the past 24 h. +DROP TABLE IF EXISTS sofa_uo; +CREATE TABLE sofa_uo ( + icustay_id INTEGER, + hr INTEGER, + uo_24hr DOUBLE PRECISION, + uo_tm_24hr BIGINT +); + +-- 5i. Vasopressor rate snapshot at the hour boundary. +DROP TABLE IF EXISTS sofa_vaso; +CREATE TABLE sofa_vaso ( + icustay_id INTEGER, + hr INTEGER, + rate_epinephrine DOUBLE PRECISION, + rate_norepinephrine DOUBLE PRECISION, + rate_dopamine DOUBLE PRECISION, + rate_dobutamine DOUBLE PRECISION +); + +-- 5j. Wide assembly: grid LEFT JOINed onto every measurement table. +DROP TABLE IF EXISTS sofa_wide; +CREATE TABLE sofa_wide ( + subject_id INTEGER, + hadm_id INTEGER, + icustay_id INTEGER, + hr INTEGER, + starttime TIMESTAMP(0), + endtime TIMESTAMP(0), + meanbp_min DOUBLE PRECISION, + gcs_min DOUBLE PRECISION, + bilirubin_max DOUBLE PRECISION, + creatinine_max DOUBLE PRECISION, + platelet_min DOUBLE PRECISION, + pao2fio2_novent DOUBLE PRECISION, + pao2fio2_vent DOUBLE PRECISION, + uo_24hr DOUBLE PRECISION, + uo_tm_24hr BIGINT, + rate_epinephrine DOUBLE PRECISION, + rate_norepinephrine DOUBLE PRECISION, + rate_dopamine DOUBLE PRECISION, + rate_dobutamine DOUBLE PRECISION +); + +-- 5k. Per-hour component scores (no rolling window yet). +DROP TABLE IF EXISTS sofa_components; +CREATE TABLE sofa_components ( + subject_id INTEGER, + hadm_id INTEGER, + icustay_id INTEGER, + hr INTEGER, + starttime TIMESTAMP(0), + endtime TIMESTAMP(0), + meanbp_min DOUBLE PRECISION, + gcs_min DOUBLE PRECISION, + bilirubin_max DOUBLE PRECISION, + creatinine_max DOUBLE PRECISION, + platelet_min DOUBLE PRECISION, + pao2fio2_novent DOUBLE PRECISION, + pao2fio2_vent DOUBLE PRECISION, + uo_24hr DOUBLE PRECISION, + uo_tm_24hr BIGINT, + rate_epinephrine DOUBLE PRECISION, + rate_norepinephrine DOUBLE PRECISION, + rate_dopamine DOUBLE PRECISION, + rate_dobutamine DOUBLE PRECISION, + respiration INTEGER, + coagulation INTEGER, + liver INTEGER, + cardiovascular INTEGER, + cns INTEGER, + renal INTEGER +); + +-- 5l. Final hourly SOFA: 24-hour rolling MAX per component, summed. +DROP TABLE IF EXISTS sofa_hourly; +CREATE TABLE sofa_hourly ( + subject_id INTEGER, + hadm_id INTEGER, + icustay_id INTEGER, + hr INTEGER, + starttime TIMESTAMP(0), + endtime TIMESTAMP(0), + respiration INTEGER, + coagulation INTEGER, + liver INTEGER, + cardiovascular INTEGER, + cns INTEGER, + renal INTEGER, + respiration_24hours INTEGER, + coagulation_24hours INTEGER, + liver_24hours INTEGER, + cardiovascular_24hours INTEGER, + cns_24hours INTEGER, + renal_24hours INTEGER, + sofa_24hours INTEGER +); + +-- 6. Suspicion of infection. +DROP TABLE IF EXISTS antibiotic; +CREATE TABLE antibiotic ( + subject_id INTEGER, + hadm_id INTEGER, + icustay_id INTEGER, + antibiotic VARCHAR(255), + route VARCHAR(120), + -- MIMIC-III prescriptions has DATE-precision startdate / enddate + -- (stored as TIMESTAMP(0) but always at 00:00:00). + starttime TIMESTAMP(0), + stoptime TIMESTAMP(0) +); + +DROP TABLE IF EXISTS suspicion_of_infection; +CREATE TABLE suspicion_of_infection ( + subject_id INTEGER, + icustay_id INTEGER, + hadm_id INTEGER, + ab_id BIGINT, + antibiotic VARCHAR(255), + antibiotic_time TIMESTAMP, + suspected_infection INTEGER, + suspected_infection_time TIMESTAMP, + culture_time TIMESTAMP, + specimen VARCHAR(100), + positive_culture INTEGER +); + +-- 7. Final Sepsis-3 onset table (one row per ICU stay). +DROP TABLE IF EXISTS sepsis3; +CREATE TABLE sepsis3 ( + subject_id INTEGER, + icustay_id INTEGER, + antibiotic_time TIMESTAMP, + culture_time TIMESTAMP, + suspected_infection_time TIMESTAMP, + sofa_time TIMESTAMP(0), + sofa_score INTEGER, + respiration INTEGER, + coagulation INTEGER, + liver INTEGER, + cardiovascular INTEGER, + cns INTEGER, + renal INTEGER, + sepsis3 BOOLEAN +);