From 9a248650162accc39c29657e652ec6e2bc518c6e Mon Sep 17 00:00:00 2001 From: David Madl Date: Tue, 5 May 2026 10:22:17 +0200 Subject: [PATCH] initial --- README.md | 246 +++++++ fetch.ps1 | 36 ++ sql/build_sapsii.sql | 48 ++ sql/build_sepsis3.sql | 74 +++ sql/durations/dobutamine_dose.sql | 259 ++++++++ sql/durations/dopamine_dose.sql | 262 ++++++++ sql/durations/epinephrine_dose.sql | 273 ++++++++ sql/durations/norepinephrine_dose.sql | 270 ++++++++ sql/durations/ventilation_classification.sql | 142 ++++ sql/durations/ventilation_durations.sql | 112 ++++ sql/durations/weight_durations.sql | 207 ++++++ sql/echo_data.sql | 48 ++ sql/firstday/blood_gas_first_day.sql | 108 ++++ sql/firstday/blood_gas_first_day_arterial.sql | 156 +++++ sql/firstday/gcs_first_day.sql | 143 +++++ sql/firstday/labs_first_day.sql | 155 +++++ sql/firstday/urine_output_first_day.sql | 58 ++ sql/firstday/vitals_first_day.sql | 120 ++++ sql/fluid_balance/urine_output.sql | 45 ++ sql/postgres-functions.sql | 163 +++++ sql/schemas.sql | 605 ++++++++++++++++++ sql/sepsis/antibiotic.sql | 215 +++++++ sql/sepsis/blood_gas_arterial.sql | 230 +++++++ sql/sepsis/gcs_all.sql | 78 +++ sql/sepsis/mortality_checks.sql | 265 ++++++++ sql/sepsis/sanity_checks.sql | 393 ++++++++++++ sql/sepsis/sepsis3.sql | 90 +++ sql/sepsis/sofa_hourly.sql | 397 ++++++++++++ sql/sepsis/suspicion_of_infection.sql | 153 +++++ sql/severityscores/sapsii.sql | 384 +++++++++++ 30 files changed, 5735 insertions(+) create mode 100644 README.md create mode 100644 fetch.ps1 create mode 100644 sql/build_sapsii.sql create mode 100644 sql/build_sepsis3.sql create mode 100644 sql/durations/dobutamine_dose.sql create mode 100644 sql/durations/dopamine_dose.sql create mode 100644 sql/durations/epinephrine_dose.sql create mode 100644 sql/durations/norepinephrine_dose.sql create mode 100644 sql/durations/ventilation_classification.sql create mode 100644 sql/durations/ventilation_durations.sql create mode 100644 sql/durations/weight_durations.sql create mode 100644 sql/echo_data.sql create mode 100644 sql/firstday/blood_gas_first_day.sql create mode 100644 sql/firstday/blood_gas_first_day_arterial.sql create mode 100644 sql/firstday/gcs_first_day.sql create mode 100644 sql/firstday/labs_first_day.sql create mode 100644 sql/firstday/urine_output_first_day.sql create mode 100644 sql/firstday/vitals_first_day.sql create mode 100644 sql/fluid_balance/urine_output.sql create mode 100644 sql/postgres-functions.sql create mode 100644 sql/schemas.sql create mode 100644 sql/sepsis/antibiotic.sql create mode 100644 sql/sepsis/blood_gas_arterial.sql create mode 100644 sql/sepsis/gcs_all.sql create mode 100644 sql/sepsis/mortality_checks.sql create mode 100644 sql/sepsis/sanity_checks.sql create mode 100644 sql/sepsis/sepsis3.sql create mode 100644 sql/sepsis/sofa_hourly.sql create mode 100644 sql/sepsis/suspicion_of_infection.sql create mode 100644 sql/severityscores/sapsii.sql diff --git a/README.md b/README.md new file mode 100644 index 0000000..f618e67 --- /dev/null +++ b/README.md @@ -0,0 +1,246 @@ +# SAPS-II and Sepsis-3 on vanilla PostgreSQL MIMIC-III v1.3 + +This folder contains the SQL scripts needed to compute, on a stock +PostgreSQL restore of the MIMIC-III v1.3 dump: + +- the **Simplified Acute Physiology Score II (SAPS-II)** — a + one-row-per-ICU-stay severity score computed on day 1; and +- the **Sepsis-3 onset time** (Singer et al., JAMA 2016) — the + earliest hour at which a patient has SOFA ≥ 2 within ±48 h + of suspicion of infection. + +The SAPS-II scripts are mirrored verbatim from the upstream repository +[`MIT-LCP/mimic-code`](https://github.com/MIT-LCP/mimic-code) (specifically +the `mimic-iii/concepts_postgres/` tree, the auto-generated PostgreSQL +port of the BigQuery concepts). The Sepsis-3 pipeline is bespoke: +upstream ships only an empty stub for MIMIC-III, so we adapted the +MIMIC-IV scripts under `mimic-iv/concepts/{score,sepsis,medication}/` +to MIMIC-III's schema (see "Sepsis-3 port notes" below). + +## Files + +``` +sql/ +├── build_sapsii.sql <-- master runner for SAPS-II +├── build_sepsis3.sql <-- master runner for Sepsis-3 +├── postgres-functions.sql <-- PL/pgSQL shims for DATETIME_DIFF / _ADD / _SUB +├── echo_data.sql <-- weight imputation helper +├── durations/ +│ ├── ventilation_classification.sql +│ ├── ventilation_durations.sql +│ ├── weight_durations.sql +│ ├── dobutamine_dose.sql +│ ├── dopamine_dose.sql +│ ├── epinephrine_dose.sql +│ └── norepinephrine_dose.sql +├── firstday/ +│ ├── blood_gas_first_day.sql +│ ├── blood_gas_first_day_arterial.sql +│ ├── gcs_first_day.sql +│ ├── labs_first_day.sql +│ ├── urine_output_first_day.sql +│ └── vitals_first_day.sql +├── fluid_balance/ +│ └── urine_output.sql <-- all-time UO; used by Sepsis-3 +├── severityscores/ +│ └── sapsii.sql +└── sepsis/ <-- bespoke Sepsis-3 pipeline + ├── blood_gas_arterial.sql <-- all-time arterial BG (PaO2/FiO2) + ├── gcs_all.sql <-- all-time GCS + ├── sofa_hourly.sql <-- staged hourly SOFA pipeline + ├── antibiotic.sql <-- filtered antibiotic prescriptions + ├── suspicion_of_infection.sql <-- abx <-> culture pairing + └── sepsis3.sql <-- final onset table +``` + +## Dependency graph (SAPS-II) + +``` +postgres-functions.sql (DATETIME_* shims, used everywhere below) +│ +├── durations/ventilation_classification.sql +│ └── durations/ventilation_durations.sql +│ +├── firstday/blood_gas_first_day.sql +│ └── firstday/blood_gas_first_day_arterial.sql +│ +├── firstday/gcs_first_day.sql +├── firstday/labs_first_day.sql +├── firstday/urine_output_first_day.sql +└── firstday/vitals_first_day.sql + │ + └── severityscores/sapsii.sql <-- final table: `sapsii` +``` + +## Dependency graph (Sepsis-3) + +``` +postgres-functions.sql +│ +├── echo_data.sql +│ └── durations/weight_durations.sql +│ ├── durations/dobutamine_dose.sql +│ ├── durations/dopamine_dose.sql +│ ├── durations/epinephrine_dose.sql +│ └── durations/norepinephrine_dose.sql +│ +├── fluid_balance/urine_output.sql (all-time UO; consumed by sofa_hourly) +├── durations/ventilation_classification.sql +│ └── durations/ventilation_durations.sql +│ +├── sepsis/blood_gas_arterial.sql (all-time arterial PaO2/FiO2) +├── sepsis/gcs_all.sql (all-time GCS) +│ │ +│ └── sepsis/sofa_hourly.sql <-- table: `sofa_hourly` +│ (one row per ICU hour) +│ +├── sepsis/antibiotic.sql +│ └── sepsis/suspicion_of_infection.sql +│ +└── sepsis/sepsis3.sql <-- final table: `sepsis3` + (one row per ICU stay) +``` + +`fluid_balance/urine_output.sql` is included because it appears in the +upstream `postgres-make-concepts.sql` and is harmless to build for +SAPS-II; for Sepsis-3 it is required (the renal-SOFA branch reads it). + +## Required base MIMIC-III tables + +The scripts assume the standard MIMIC-III v1.3 schema with these tables +already present and accessible via `search_path`: + +`admissions`, `chartevents`, `diagnoses_icd`, `icustays`, +`inputevents_cv`, `inputevents_mv`, `labevents`, `microbiologyevents`, +`noteevents` (for `echo_data`), `outputevents`, `patients`, +`prescriptions`, `procedureevents_mv`, `services`. + +## Running SAPS-II + +1. Restore the MIMIC-III v1.3 dump into a PostgreSQL database (the usual + `postgres_create_tables.sql` / `postgres_load_data.sql` flow from the + `mimic-iii/buildmimic/postgres/` directory of the upstream repo). +2. Make sure the schema containing those tables is on your `search_path` + (commonly `mimiciii`). +3. Run the master script from this directory: + + ```bash + psql -d mimic -v ON_ERROR_STOP=1 \ + -c 'SET search_path TO mimiciii, public;' \ + -f sql/build_sapsii.sql + ``` + +4. Query the result: + + ```sql + SELECT subject_id, hadm_id, icustay_id, sapsii, sapsii_prob + FROM sapsii + ORDER BY icustay_id + LIMIT 10; + ``` + +## Running Sepsis-3 + +1. Same prerequisites as SAPS-II. +2. Run: + + ```bash + psql -d mimic -v ON_ERROR_STOP=1 \ + -c 'SET search_path TO mimiciii, public;' \ + -f sql/build_sepsis3.sql + ``` + + Expect a few hours runtime on stock PostgreSQL with default + indexes. Most of that is the per-measurement scans of + `chartevents` (~330 M rows), `labevents` (~30 M rows), and + `outputevents` (~4 M rows) that drive the hourly SOFA pipeline. + +3. Query the result: + + ```sql + SELECT icustay_id, suspected_infection_time, + sofa_time, sofa_score, sepsis3 + FROM sepsis3 + WHERE sepsis3 = TRUE + ORDER BY icustay_id + LIMIT 10; + ``` + + For per-hour SOFA across the whole stay (e.g. for time-series + modelling), query `sofa_hourly` directly. + +### Sepsis-3 port notes + +The Sepsis-3 onset definition is *time-stamped*: it requires a SOFA +score at any moment during the ICU stay, not just on day 1, because +suspicion of infection can fall anywhere in the stay. MIMIC-IV +provides a hourly SOFA via a stack of pre-built `mimic_derived.*` +tables (`icustay_hourly`, `bg`, `vitalsign`, `gcs`, +`urine_output_rate`, `chemistry`, `enzyme`, `complete_blood_count`, +`epinephrine`, `norepinephrine`, `dopamine`, `dobutamine`, +`ventilation` with `ventilation_status='InvasiveVent'`). Almost none +of those exist in `mimic-iii/concepts_postgres/`, so we ported the +needed bits inline. Notable differences from MIMIC-IV behaviour: + +1. **Hourly grid** is built inline with `generate_series` on + `icustays.intime` / `outtime` (no `icustay_hourly` table). +2. **Ventilation status granularity** is missing. MIMIC-IV's + `InvasiveVent` filter (used to split `pao2fio2ratio_vent` from + `pao2fio2ratio_novent` so a ventilated patient doesn't get a + stratospheric SOFA respiratory score from a noisy unventilated + PaO2:FiO2) doesn't have a clean MIMIC-III equivalent. We use the + lumped `ventilation_durations`, treating any active ventilation + row as invasive ventilation. +3. **Urine output adjustment** is replicated inline in `sofa_uo`. + We materialise both `uo_24hr` (sum) and `uo_tm_24hr` (count of + distinct hours within the rolling 24 h window that actually had + a UO observation), and the renal CASE in `sofa_components` uses + `GREATEST(uo_24hr, 0) * 24.0 / uo_tm_24hr` only when + `uo_tm_24hr BETWEEN 22 AND 30`, falling back to creatinine alone + otherwise. The `GREATEST(_, 0)` clip prevents patients on + continuous bladder irrigation (which `fluid_balance/urine_output.sql` + subtracts as a negative volume) from being mis-scored as + oliguric. Effect: at hours where the stay has lasted less than + ~22 h or where charting is sparse, renal SOFA is computed from + creatinine only, instead of being over-scored from a partial UO + sum. +4. **Vasopressor rates** come from upstream + `durations/{epinephrine,norepinephrine,dopamine,dobutamine}_dose.sql`, + which already merge CareVue + MetaVision and convert to + mcg/kg/min. Weight-based unit conversion uses + `weight_durations.sql`, which itself depends on `echo_data.sql` + for weight imputation. +5. **Antibiotic time precision is one day.** MIMIC-III + `prescriptions.startdate` is DATE-precision only, so + `antibiotic_time` (and any sepsis-3 onset time driven by it) + lands on midnight of the prescription start date. Suspicion of + infection driven by a culture with charttime is still + timestamped to the minute. + +The hourly SOFA pipeline is built **column-by-column into staged +tables** (`sofa_grid`, `sofa_vs`, `sofa_gcs`, `sofa_bili`, `sofa_cr`, +`sofa_plt`, `sofa_pf`, `sofa_uo`, `sofa_vaso`, then `sofa_wide`, +`sofa_components`, `sofa_hourly`). Each stage scans exactly one +giant raw table, so each stage can be `EXPLAIN ANALYZE`d +independently and re-run in isolation if you crash partway through. +The intermediates are kept (not dropped) so you can inspect them. + +## Re-fetching from upstream + +Re-run `fetch.ps1` (PowerShell, Windows) to pull the latest versions of +all upstream-mirrored scripts from the `main` branch of +`MIT-LCP/mimic-code`. Files under `sql/sepsis/` are bespoke and not +fetched. + +## Caveats + +- Severity scores (SAPS-II, SOFA) and sepsis-3 are computed for + **every** ICU stay, including neonates, re-admissions, and short + stays. Filter `icustay_id` yourself to match your study population + (the canonical adult-ICU sepsis-3 cohort excludes patients < 18 + and ICU re-admissions, but we do not enforce that here). +- The PostgreSQL scripts in `concepts_postgres/` are auto-generated from + the BigQuery-flavored originals in `concepts/`; the `DATETIME_*` + function calls you'll see in the SQL are resolved by the wrappers in + `postgres-functions.sql`, so it must be sourced first (the master + scripts do this for you). diff --git a/fetch.ps1 b/fetch.ps1 new file mode 100644 index 0000000..9095a43 --- /dev/null +++ b/fetch.ps1 @@ -0,0 +1,36 @@ +$ErrorActionPreference = "Stop" +$base = "https://raw.githubusercontent.com/MIT-LCP/mimic-code/main/mimic-iii/concepts_postgres" + +$files = @( + "postgres-functions.sql", + "echo_data.sql", + "fluid_balance/urine_output.sql", + "durations/ventilation_classification.sql", + "durations/ventilation_durations.sql", + "durations/weight_durations.sql", + "durations/dobutamine_dose.sql", + "durations/dopamine_dose.sql", + "durations/epinephrine_dose.sql", + "durations/norepinephrine_dose.sql", + "firstday/blood_gas_first_day.sql", + "firstday/blood_gas_first_day_arterial.sql", + "firstday/gcs_first_day.sql", + "firstday/labs_first_day.sql", + "firstday/urine_output_first_day.sql", + "firstday/vitals_first_day.sql", + "severityscores/sapsii.sql" +) + +foreach ($rel in $files) { + $url = "$base/$rel" + $dest = Join-Path "sql" $rel + $dir = Split-Path $dest -Parent + if (-not (Test-Path $dir)) { + New-Item -ItemType Directory -Force -Path $dir | Out-Null + } + Write-Host "Fetching $rel" + Invoke-WebRequest -Uri $url -OutFile $dest -UseBasicParsing +} + +Write-Host "" +Write-Host "Done. Files saved under .\sql\" diff --git a/sql/build_sapsii.sql b/sql/build_sapsii.sql new file mode 100644 index 0000000..6e35c04 --- /dev/null +++ b/sql/build_sapsii.sql @@ -0,0 +1,48 @@ +-- ------------------------------------------------------------------ +-- Build the SAPS-II severity score on a vanilla PostgreSQL MIMIC-III v1.3 DB. +-- +-- Usage (assuming you have already restored the MIMIC-III dump into a +-- database called `mimic` and have the base tables in the `mimiciii` schema): +-- +-- psql -d mimic -v ON_ERROR_STOP=1 \ +-- -c 'SET search_path TO mimiciii, public;' \ +-- -f sql/build_sapsii.sql +-- +-- Resulting tables created in the current search_path: +-- urine_output (not used by SAPS-II directly, +-- included for completeness) +-- ventilation_classification +-- ventilation_durations +-- blood_gas_first_day +-- blood_gas_first_day_arterial +-- gcs_first_day +-- labs_first_day +-- urine_output_first_day +-- vitals_first_day +-- sapsii <-- final score table +-- ------------------------------------------------------------------ + +\set ON_ERROR_STOP on + +-- 0. PL/pgSQL shims for BigQuery-style DATETIME_DIFF / DATETIME_ADD / DATETIME_SUB +\i postgres-functions.sql + +-- 1. Optional helper view (not required by SAPS-II, but useful and harmless) +\i fluid_balance/urine_output.sql + +-- 2. Ventilation: classification first, then durations +\i durations/ventilation_classification.sql +\i durations/ventilation_durations.sql + +-- 3. First-day derived views (blood_gas_first_day must precede the arterial one) +\i firstday/blood_gas_first_day.sql +\i firstday/blood_gas_first_day_arterial.sql +\i firstday/gcs_first_day.sql +\i firstday/labs_first_day.sql +\i firstday/urine_output_first_day.sql +\i firstday/vitals_first_day.sql + +-- 4. The score itself +\i severityscores/sapsii.sql + +\echo 'SAPS-II build complete. Query results with: SELECT * FROM sapsii LIMIT 10;' diff --git a/sql/build_sepsis3.sql b/sql/build_sepsis3.sql new file mode 100644 index 0000000..83dda0f --- /dev/null +++ b/sql/build_sepsis3.sql @@ -0,0 +1,74 @@ +-- ------------------------------------------------------------------ +-- Build the Sepsis-3 onset table on a vanilla PostgreSQL MIMIC-III +-- v1.3 DB. +-- +-- Usage (assuming you have already restored the MIMIC-III dump into a +-- database called `mimic` and have the base tables in the `mimiciii` +-- schema): +-- +-- psql -d mimic -v ON_ERROR_STOP=1 \ +-- -c 'SET search_path TO mimiciii, public;' \ +-- -f sql/build_sepsis3.sql +-- +-- Resulting tables created in the current search_path: +-- echo_data +-- urine_output +-- ventilation_classification +-- ventilation_durations +-- weight_durations +-- {dobutamine,dopamine,epinephrine,norepinephrine}_dose +-- blood_gas_arterial (all-time PaO2/FiO2) +-- gcs_all (all-time GCS) +-- sofa_grid, sofa_vs, sofa_gcs, sofa_bili, sofa_cr, sofa_plt, +-- sofa_pf, sofa_uo, sofa_vaso, sofa_wide, sofa_components +-- (intermediate hourly stages, +-- retained for inspection) +-- sofa_hourly (final hourly SOFA, one row per +-- ICU hour, with 24-h rolling MAX) +-- antibiotic (filtered antibiotic prescriptions) +-- suspicion_of_infection (Seymour 2016 abx<>culture pairing) +-- sepsis3 (final sepsis-3 onset, one row per +-- ICU stay) +-- +-- Runtime: expect a few hours on a stock single-node PostgreSQL with +-- the default `chartevents` and `labevents` indexes. Most of the +-- cost is the eight raw-table scans driving the sofa_* staging +-- tables; each stage prints its progress via psql's default ECHO. +-- ------------------------------------------------------------------ + +\set ON_ERROR_STOP on + +-- 0. PL/pgSQL shims for BigQuery-style DATETIME_DIFF / _ADD / _SUB +\i postgres-functions.sql + +-- 1. Helpers shared with SAPS-II +\i echo_data.sql +\i fluid_balance/urine_output.sql +\i durations/ventilation_classification.sql +\i durations/ventilation_durations.sql +\i durations/weight_durations.sql + +-- 2. Vasopressor dose tables (each merges CareVue + MetaVision) +\i durations/dobutamine_dose.sql +\i durations/dopamine_dose.sql +\i durations/epinephrine_dose.sql +\i durations/norepinephrine_dose.sql + +-- 3. All-time pivots feeding hourly SOFA +\i sepsis/blood_gas_arterial.sql +\i sepsis/gcs_all.sql + +-- 4. Hourly SOFA pipeline (staged temp tables -> sofa_hourly) +\i sepsis/sofa_hourly.sql + +-- 5. Suspicion of infection +\i sepsis/antibiotic.sql +\i sepsis/suspicion_of_infection.sql + +-- 6. Final onset table +\i sepsis/sepsis3.sql + +\echo 'Sepsis-3 build complete.' +\echo 'Query results with:' +\echo ' SELECT icustay_id, suspected_infection_time, sofa_time, sofa_score, sepsis3' +\echo ' FROM sepsis3 WHERE sepsis3 = TRUE LIMIT 10;' diff --git a/sql/durations/dobutamine_dose.sql b/sql/durations/dobutamine_dose.sql new file mode 100644 index 0000000..bfd3f70 --- /dev/null +++ b/sql/durations/dobutamine_dose.sql @@ -0,0 +1,259 @@ +-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY. +DROP TABLE IF EXISTS dobutamine_dose; CREATE TABLE dobutamine_dose AS +-- This query extracts dose+durations of dopamine administration + +-- Get drug administration data from CareVue first +with vasocv1 as +( + select + icustay_id, charttime + -- case statement determining whether the ITEMID is an instance of vasopressor usage + , max(case when itemid in (30042,30306) then 1 else 0 end) as vaso -- dobutamine + + -- the 'stopped' column indicates if a vasopressor has been disconnected + , max(case when itemid in (30042,30306) and (stopped = 'Stopped' OR stopped like 'D/C%') then 1 + else 0 end) as vaso_stopped + + , max(case when itemid in (30042,30306) and rate is not null then 1 else 0 end) as vaso_null + , max(case when itemid in (30042,30306) then rate else null end) as vaso_rate + , max(case when itemid in (30042,30306) then amount else null end) as vaso_amount + + FROM inputevents_cv + where itemid in (30042,30306) -- dobutamine + group by icustay_id, charttime +) +, vasocv2 as +( + select v.* + , sum(vaso_null) over (partition by icustay_id order by charttime) as vaso_partition + from + vasocv1 v +) +, vasocv3 as +( + select v.* + , first_value(vaso_rate) over (partition by icustay_id, vaso_partition order by charttime) as vaso_prevrate_ifnull + from + vasocv2 v +) +, vasocv4 as +( +select + icustay_id + , charttime + -- , (CHARTTIME - (LAG(CHARTTIME, 1) OVER (partition by icustay_id, vaso order by charttime))) AS delta + + , vaso + , vaso_rate + , vaso_amount + , vaso_stopped + , vaso_prevrate_ifnull + + -- We define start time here + , case + when vaso = 0 then null + + -- if this is the first instance of the vasoactive drug + when vaso_rate > 0 and + LAG(vaso_prevrate_ifnull,1) + OVER + ( + partition by icustay_id, vaso, vaso_null + order by charttime + ) + is null + then 1 + + -- you often get a string of 0s + -- we decide not to set these as 1, just because it makes vasonum sequential + when vaso_rate = 0 and + LAG(vaso_prevrate_ifnull,1) + OVER + ( + partition by icustay_id, vaso + order by charttime + ) + = 0 + then 0 + + -- sometimes you get a string of NULL, associated with 0 volumes + -- same reason as before, we decide not to set these as 1 + -- vaso_prevrate_ifnull is equal to the previous value *iff* the current value is null + when vaso_prevrate_ifnull = 0 and + LAG(vaso_prevrate_ifnull,1) + OVER + ( + partition by icustay_id, vaso + order by charttime + ) + = 0 + then 0 + + -- If the last recorded rate was 0, newvaso = 1 + when LAG(vaso_prevrate_ifnull,1) + OVER + ( + partition by icustay_id, vaso + order by charttime + ) = 0 + then 1 + + -- If the last recorded vaso was D/C'd, newvaso = 1 + when + LAG(vaso_stopped,1) + OVER + ( + partition by icustay_id, vaso + order by charttime + ) + = 1 then 1 + + -- ** not sure if the below is needed + --when (CHARTTIME - (LAG(CHARTTIME, 1) OVER (partition by icustay_id, vaso order by charttime))) > (interval '4 hours') then 1 + else null + end as vaso_start + +FROM + vasocv3 +) +-- propagate start/stop flags forward in time +, vasocv5 as +( + select v.* + , SUM(vaso_start) OVER (partition by icustay_id, vaso order by charttime) as vaso_first +FROM + vasocv4 v +) +, vasocv6 as +( + select v.* + -- We define end time here + , case + when vaso = 0 + then null + + -- If the recorded vaso was D/C'd, this is an end time + when vaso_stopped = 1 + then vaso_first + + -- If the rate is zero, this is the end time + when vaso_rate = 0 + then vaso_first + + -- the last row in the table is always a potential end time + -- this captures patients who die/are discharged while on vasopressors + -- in principle, this could add an extra end time for the vasopressor + -- however, since we later group on vaso_start, any extra end times are ignored + when LEAD(CHARTTIME,1) + OVER + ( + partition by icustay_id, vaso + order by charttime + ) is null + then vaso_first + + else null + end as vaso_stop + from vasocv5 v +) + +-- -- if you want to look at the results of the table before grouping: +-- select +-- icustay_id, charttime, vaso, vaso_rate, vaso_amount +-- , vaso_stopped +-- , vaso_start +-- , vaso_first +-- , vaso_stop +-- from vasocv6 order by icustay_id, charttime; + +, vasocv7 as +( +select + icustay_id + , charttime as starttime + , lead(charttime) OVER (partition by icustay_id, vaso_first order by charttime) as endtime + , vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first +from vasocv6 +where + vaso_first is not null -- bogus data +and + vaso_first != 0 -- sometimes *only* a rate of 0 appears, i.e. the drug is never actually delivered +and + icustay_id is not null -- there are data for "floating" admissions, we don't worry about these +) +-- table of start/stop times for event +, vasocv8 as +( + select + icustay_id + , starttime, endtime + , vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first + from vasocv7 + where endtime is not null + and vaso_rate > 0 + and starttime != endtime +) +-- collapse these start/stop times down if the rate doesn't change +, vasocv9 as +( + select + icustay_id + , starttime, endtime + , case + when LAG(endtime) OVER (partition by icustay_id order by starttime, endtime) = starttime + AND LAG(vaso_rate) OVER (partition by icustay_id order by starttime, endtime) = vaso_rate + THEN 0 + else 1 + end as vaso_groups + , vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first + from vasocv8 + where endtime is not null + and vaso_rate > 0 + and starttime != endtime +) +, vasocv10 as +( + select + icustay_id + , starttime, endtime + , vaso_groups + , SUM(vaso_groups) OVER (partition by icustay_id order by starttime, endtime) as vaso_groups_sum + , vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first + from vasocv9 +) +, vasocv as +( + select icustay_id + , min(starttime) as starttime + , max(endtime) as endtime + , vaso_groups_sum + , vaso_rate + , sum(vaso_amount) as vaso_amount + from vasocv10 + group by icustay_id, vaso_groups_sum, vaso_rate +) +-- now we extract the associated data for metavision patients +, vasomv as +( + select + icustay_id, linkorderid + , rate as vaso_rate + , amount as vaso_amount + , starttime + , endtime + from inputevents_mv + where itemid = 221653 -- dobutamine + and statusdescription != 'Rewritten' -- only valid orders +) +-- now assign this data to every hour of the patient's stay +-- vaso_amount for carevue is not accurate +SELECT icustay_id + , starttime, endtime + , vaso_rate, vaso_amount +from vasocv +UNION ALL +SELECT icustay_id + , starttime, endtime + , vaso_rate, vaso_amount +from vasomv +order by icustay_id, starttime; diff --git a/sql/durations/dopamine_dose.sql b/sql/durations/dopamine_dose.sql new file mode 100644 index 0000000..5b25e6a --- /dev/null +++ b/sql/durations/dopamine_dose.sql @@ -0,0 +1,262 @@ +-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY. +DROP TABLE IF EXISTS dopamine_dose; CREATE TABLE dopamine_dose AS +-- This query extracts dose+durations of dopamine administration + +-- Get drug administration data from CareVue first +with vasocv1 as +( + select + icustay_id, charttime + -- case statement determining whether the ITEMID is an instance of vasopressor usage + , max(case when itemid in (30043,30307) then 1 else 0 end) as vaso -- dopamine + + -- the 'stopped' column indicates if a vasopressor has been disconnected + , max(case when itemid in (30043,30307) and (stopped = 'Stopped' OR stopped like 'D/C%') then 1 + else 0 end) as vaso_stopped + + , max(case when itemid in (30043,30307) and rate is not null then 1 else 0 end) as vaso_null + , max(case when itemid in (30043,30307) then rate else null end) as vaso_rate + , max(case when itemid in (30043,30307) then amount else null end) as vaso_amount + + FROM inputevents_cv + where itemid in + ( + 30043,30307 -- dopamine + ) + group by icustay_id, charttime +) +, vasocv2 as +( + select v.* + , sum(vaso_null) over (partition by icustay_id order by charttime) as vaso_partition + from + vasocv1 v +) +, vasocv3 as +( + select v.* + , first_value(vaso_rate) over (partition by icustay_id, vaso_partition order by charttime) as vaso_prevrate_ifnull + from + vasocv2 v +) +, vasocv4 as +( +select + icustay_id + , charttime + -- , (CHARTTIME - (LAG(CHARTTIME, 1) OVER (partition by icustay_id, vaso order by charttime))) AS delta + + , vaso + , vaso_rate + , vaso_amount + , vaso_stopped + , vaso_prevrate_ifnull + + -- We define start time here + , case + when vaso = 0 then null + + -- if this is the first instance of the vasoactive drug + when vaso_rate > 0 and + LAG(vaso_prevrate_ifnull,1) + OVER + ( + partition by icustay_id, vaso, vaso_null + order by charttime + ) + is null + then 1 + + -- you often get a string of 0s + -- we decide not to set these as 1, just because it makes vasonum sequential + when vaso_rate = 0 and + LAG(vaso_prevrate_ifnull,1) + OVER + ( + partition by icustay_id, vaso + order by charttime + ) + = 0 + then 0 + + -- sometimes you get a string of NULL, associated with 0 volumes + -- same reason as before, we decide not to set these as 1 + -- vaso_prevrate_ifnull is equal to the previous value *iff* the current value is null + when vaso_prevrate_ifnull = 0 and + LAG(vaso_prevrate_ifnull,1) + OVER + ( + partition by icustay_id, vaso + order by charttime + ) + = 0 + then 0 + + -- If the last recorded rate was 0, newvaso = 1 + when LAG(vaso_prevrate_ifnull,1) + OVER + ( + partition by icustay_id, vaso + order by charttime + ) = 0 + then 1 + + -- If the last recorded vaso was D/C'd, newvaso = 1 + when + LAG(vaso_stopped,1) + OVER + ( + partition by icustay_id, vaso + order by charttime + ) + = 1 then 1 + + -- ** not sure if the below is needed + --when (CHARTTIME - (LAG(CHARTTIME, 1) OVER (partition by icustay_id, vaso order by charttime))) > (interval '4 hours') then 1 + else null + end as vaso_start + +FROM + vasocv3 +) +-- propagate start/stop flags forward in time +, vasocv5 as +( + select v.* + , SUM(vaso_start) OVER (partition by icustay_id, vaso order by charttime) as vaso_first +FROM + vasocv4 v +) +, vasocv6 as +( + select v.* + -- We define end time here + , case + when vaso = 0 + then null + + -- If the recorded vaso was D/C'd, this is an end time + when vaso_stopped = 1 + then vaso_first + + -- If the rate is zero, this is the end time + when vaso_rate = 0 + then vaso_first + + -- the last row in the table is always a potential end time + -- this captures patients who die/are discharged while on vasopressors + -- in principle, this could add an extra end time for the vasopressor + -- however, since we later group on vaso_start, any extra end times are ignored + when LEAD(CHARTTIME,1) + OVER + ( + partition by icustay_id, vaso + order by charttime + ) is null + then vaso_first + + else null + end as vaso_stop + from vasocv5 v +) + +-- -- if you want to look at the results of the table before grouping: +-- select +-- icustay_id, charttime, vaso, vaso_rate, vaso_amount +-- , vaso_stopped +-- , vaso_start +-- , vaso_first +-- , vaso_stop +-- from vasocv6 order by icustay_id, charttime; + +, vasocv7 as +( +select + icustay_id + , charttime as starttime + , lead(charttime) OVER (partition by icustay_id, vaso_first order by charttime) as endtime + , vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first +from vasocv6 +where + vaso_first is not null -- bogus data +and + vaso_first != 0 -- sometimes *only* a rate of 0 appears, i.e. the drug is never actually delivered +and + icustay_id is not null -- there are data for "floating" admissions, we don't worry about these +) +-- table of start/stop times for event +, vasocv8 as +( + select + icustay_id + , starttime, endtime + , vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first + from vasocv7 + where endtime is not null + and vaso_rate > 0 + and starttime != endtime +) +-- collapse these start/stop times down if the rate doesn't change +, vasocv9 as +( + select + icustay_id + , starttime, endtime + , case + when LAG(endtime) OVER (partition by icustay_id order by starttime, endtime) = starttime + AND LAG(vaso_rate) OVER (partition by icustay_id order by starttime, endtime) = vaso_rate + THEN 0 + else 1 + end as vaso_groups + , vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first + from vasocv8 + where endtime is not null + and vaso_rate > 0 + and starttime != endtime +) +, vasocv10 as +( + select + icustay_id + , starttime, endtime + , vaso_groups + , SUM(vaso_groups) OVER (partition by icustay_id order by starttime, endtime) as vaso_groups_sum + , vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first + from vasocv9 +) +, vasocv as +( + select icustay_id + , min(starttime) as starttime + , max(endtime) as endtime + , vaso_groups_sum + , vaso_rate + , sum(vaso_amount) as vaso_amount + from vasocv10 + group by icustay_id, vaso_groups_sum, vaso_rate +) +-- now we extract the associated data for metavision patients +, vasomv as +( + select + icustay_id, linkorderid + , rate as vaso_rate + , amount as vaso_amount + , starttime + , endtime + from inputevents_mv + where itemid = 221662 -- dopamine + and statusdescription != 'Rewritten' -- only valid orders +) +-- now assign this data to every hour of the patient's stay +-- vaso_amount for carevue is not accurate +SELECT icustay_id + , starttime, endtime + , vaso_rate, vaso_amount +from vasocv +UNION ALL +SELECT icustay_id + , starttime, endtime + , vaso_rate, vaso_amount +from vasomv +order by icustay_id, starttime; diff --git a/sql/durations/epinephrine_dose.sql b/sql/durations/epinephrine_dose.sql new file mode 100644 index 0000000..29b556d --- /dev/null +++ b/sql/durations/epinephrine_dose.sql @@ -0,0 +1,273 @@ +-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY. +DROP TABLE IF EXISTS epinephrine_dose; CREATE TABLE epinephrine_dose AS +-- This query extracts dose+durations of epinephrine administration + +-- Requires the weightfirstday table + +-- Get drug administration data from CareVue first +with vasocv1 as +( + select + cv.icustay_id, cv.charttime + -- case statement determining whether the ITEMID is an instance of vasopressor usage + , max(case when itemid in (30044,30119,30309) then 1 else 0 end) as vaso -- epinephrine + + -- the 'stopped' column indicates if a vasopressor has been disconnected + , max(case when itemid in (30044,30119,30309) and (stopped = 'Stopped' OR stopped like 'D/C%') then 1 + else 0 end) as vaso_stopped + + , max(case when itemid in (30044,30119,30309) and rate is not null then 1 else 0 end) as vaso_null + , max(case + when itemid = 30044 and wd.weight is null then rate / 80.0 -- super rare to be missing weight... affects 2 patients for 14 rows + when itemid = 30044 then rate / wd.weight -- measured in mcgmin + when itemid in (30119,30309) then rate -- measured in mcgkgmin + else null + end) as vaso_rate + , max(case when itemid in (30044,30119,30309) then amount else null end) as vaso_amount + + FROM inputevents_cv cv + left join weight_durations wd + on cv.icustay_id = wd.icustay_id + and cv.charttime between wd.starttime and wd.endtime + where itemid in + ( + 30044,30119,30309 -- epinephrine + ) + and cv.icustay_id is not null + group by cv.icustay_id, charttime +) +, vasocv2 as +( + select v.* + , sum(vaso_null) over (partition by icustay_id order by charttime) as vaso_partition + from + vasocv1 v +) +, vasocv3 as +( + select v.* + , first_value(vaso_rate) over (partition by icustay_id, vaso_partition order by charttime) as vaso_prevrate_ifnull + from + vasocv2 v +) +, vasocv4 as +( +select + icustay_id + , charttime + -- , (CHARTTIME - (LAG(CHARTTIME, 1) OVER (partition by icustay_id, vaso order by charttime))) AS delta + + , vaso + , vaso_rate + , vaso_amount + , vaso_stopped + , vaso_prevrate_ifnull + + -- We define start time here + , case + when vaso = 0 then null + + -- if this is the first instance of the vasoactive drug + when vaso_rate > 0 and + LAG(vaso_prevrate_ifnull,1) + OVER + ( + partition by icustay_id, vaso, vaso_null + order by charttime + ) + is null + then 1 + + -- you often get a string of 0s + -- we decide not to set these as 1, just because it makes vasonum sequential + when vaso_rate = 0 and + LAG(vaso_prevrate_ifnull,1) + OVER + ( + partition by icustay_id, vaso + order by charttime + ) + = 0 + then 0 + + -- sometimes you get a string of NULL, associated with 0 volumes + -- same reason as before, we decide not to set these as 1 + -- vaso_prevrate_ifnull is equal to the previous value *iff* the current value is null + when vaso_prevrate_ifnull = 0 and + LAG(vaso_prevrate_ifnull,1) + OVER + ( + partition by icustay_id, vaso + order by charttime + ) + = 0 + then 0 + + -- If the last recorded rate was 0, newvaso = 1 + when LAG(vaso_prevrate_ifnull,1) + OVER + ( + partition by icustay_id, vaso + order by charttime + ) = 0 + then 1 + + -- If the last recorded vaso was D/C'd, newvaso = 1 + when + LAG(vaso_stopped,1) + OVER + ( + partition by icustay_id, vaso + order by charttime + ) + = 1 then 1 + + -- ** not sure if the below is needed + --when (CHARTTIME - (LAG(CHARTTIME, 1) OVER (partition by icustay_id, vaso order by charttime))) > (interval '4 hours') then 1 + else null + end as vaso_start + +FROM + vasocv3 +) +-- propagate start/stop flags forward in time +, vasocv5 as +( + select v.* + , SUM(vaso_start) OVER (partition by icustay_id, vaso order by charttime) as vaso_first +FROM + vasocv4 v +) +, vasocv6 as +( + select v.* + -- We define end time here + , case + when vaso = 0 + then null + + -- If the recorded vaso was D/C'd, this is an end time + when vaso_stopped = 1 + then vaso_first + + -- If the rate is zero, this is the end time + when vaso_rate = 0 + then vaso_first + + -- the last row in the table is always a potential end time + -- this captures patients who die/are discharged while on vasopressors + -- in principle, this could add an extra end time for the vasopressor + -- however, since we later group on vaso_start, any extra end times are ignored + when LEAD(CHARTTIME,1) + OVER + ( + partition by icustay_id, vaso + order by charttime + ) is null + then vaso_first + + else null + end as vaso_stop + from vasocv5 v +) + +-- -- if you want to look at the results of the table before grouping: +-- select +-- icustay_id, charttime, vaso, vaso_rate, vaso_amount +-- , vaso_stopped +-- , vaso_start +-- , vaso_first +-- , vaso_stop +-- from vasocv6 order by icustay_id, charttime; + +, vasocv7 as +( +select + icustay_id + , charttime as starttime + , lead(charttime) OVER (partition by icustay_id, vaso_first order by charttime) as endtime + , vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first +from vasocv6 +where + vaso_first is not null -- bogus data +and + vaso_first != 0 -- sometimes *only* a rate of 0 appears, i.e. the drug is never actually delivered +and + icustay_id is not null -- there are data for "floating" admissions, we don't worry about these +) +-- table of start/stop times for event +, vasocv8 as +( + select + icustay_id + , starttime, endtime + , vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first + from vasocv7 + where endtime is not null + and vaso_rate > 0 + and starttime != endtime +) +-- collapse these start/stop times down if the rate doesn't change +, vasocv9 as +( + select + icustay_id + , starttime, endtime + , case + when LAG(endtime) OVER (partition by icustay_id order by starttime, endtime) = starttime + AND LAG(vaso_rate) OVER (partition by icustay_id order by starttime, endtime) = vaso_rate + THEN 0 + else 1 + end as vaso_groups + , vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first + from vasocv8 + where endtime is not null + and vaso_rate > 0 + and starttime != endtime +) +, vasocv10 as +( + select + icustay_id + , starttime, endtime + , vaso_groups + , SUM(vaso_groups) OVER (partition by icustay_id order by starttime, endtime) as vaso_groups_sum + , vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first + from vasocv9 +) +, vasocv as +( + select icustay_id + , min(starttime) as starttime + , max(endtime) as endtime + , vaso_groups_sum + , vaso_rate + , sum(vaso_amount) as vaso_amount + from vasocv10 + group by icustay_id, vaso_groups_sum, vaso_rate +) +-- now we extract the associated data for metavision patients +, vasomv as +( + select + icustay_id, linkorderid + , rate as vaso_rate + , amount as vaso_amount + , starttime + , endtime + from inputevents_mv + where itemid = 221289 -- epinephrine + and statusdescription != 'Rewritten' -- only valid orders +) +-- now assign this data to every hour of the patient's stay +-- vaso_amount for carevue is not accurate +SELECT icustay_id + , starttime, endtime + , vaso_rate, vaso_amount +from vasocv +UNION ALL +SELECT icustay_id + , starttime, endtime + , vaso_rate, vaso_amount +from vasomv +order by icustay_id, starttime; diff --git a/sql/durations/norepinephrine_dose.sql b/sql/durations/norepinephrine_dose.sql new file mode 100644 index 0000000..9f2f97b --- /dev/null +++ b/sql/durations/norepinephrine_dose.sql @@ -0,0 +1,270 @@ +-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY. +DROP TABLE IF EXISTS norepinephrine_dose; CREATE TABLE norepinephrine_dose AS +-- This query extracts dose+durations of norepinephrine administration +-- Total time on the drug can be calculated from this table by grouping using ICUSTAY_ID + +-- Get drug administration data from CareVue first +with vasocv1 as +( + select + cv.icustay_id, cv.charttime + -- case statement determining whether the ITEMID is an instance of vasopressor usage + , max(case when itemid in (30047,30120) then 1 else 0 end) as vaso -- norepinephrine + + -- the 'stopped' column indicates if a vasopressor has been disconnected + , max(case when itemid in (30047,30120) and (stopped = 'Stopped' OR stopped like 'D/C%') then 1 + else 0 end) as vaso_stopped + + -- case statement determining whether the ITEMID is an instance of vasopressor usage + + , max(case when itemid in (30047,30120) and rate is not null then 1 else 0 end) as vaso_null + , max(case + when itemid = 30047 and wd.weight is null then rate / 80.0 -- this is rare, only affects a total of ~400 rows + when itemid = 30047 then rate / wd.weight -- measured in mcgmin + when itemid = 30120 then rate -- measured in mcgkgmin ** there are clear errors, perhaps actually mcgmin + else null end) as vaso_rate + , max(case when itemid in (30047,30120) then amount else null end) as vaso_amount + + FROM inputevents_cv cv + left join weight_durations wd + on cv.icustay_id = wd.icustay_id + and cv.charttime between wd.starttime and wd.endtime + where itemid in (30047,30120) -- norepinephrine + and cv.icustay_id is not null + group by cv.icustay_id, cv.charttime +) +, vasocv2 as +( + select v.* + , sum(vaso_null) over (partition by icustay_id order by charttime) as vaso_partition + from + vasocv1 v +) +, vasocv3 as +( + select v.* + , first_value(vaso_rate) over (partition by icustay_id, vaso_partition order by charttime) as vaso_prevrate_ifnull + from + vasocv2 v +) +, vasocv4 as +( +select + icustay_id + , charttime + -- , (CHARTTIME - (LAG(CHARTTIME, 1) OVER (partition by icustay_id, vaso order by charttime))) AS delta + + , vaso + , vaso_rate + , vaso_amount + , vaso_stopped + , vaso_prevrate_ifnull + + -- We define start time here + , case + when vaso = 0 then null + + -- if this is the first instance of the vasoactive drug + when vaso_rate > 0 and + LAG(vaso_prevrate_ifnull,1) + OVER + ( + partition by icustay_id, vaso, vaso_null + order by charttime + ) + is null + then 1 + + -- you often get a string of 0s + -- we decide not to set these as 1, just because it makes vasonum sequential + when vaso_rate = 0 and + LAG(vaso_prevrate_ifnull,1) + OVER + ( + partition by icustay_id, vaso + order by charttime + ) + = 0 + then 0 + + -- sometimes you get a string of NULL, associated with 0 volumes + -- same reason as before, we decide not to set these as 1 + -- vaso_prevrate_ifnull is equal to the previous value *iff* the current value is null + when vaso_prevrate_ifnull = 0 and + LAG(vaso_prevrate_ifnull,1) + OVER + ( + partition by icustay_id, vaso + order by charttime + ) + = 0 + then 0 + + -- If the last recorded rate was 0, newvaso = 1 + when LAG(vaso_prevrate_ifnull,1) + OVER + ( + partition by icustay_id, vaso + order by charttime + ) = 0 + then 1 + + -- If the last recorded vaso was D/C'd, newvaso = 1 + when + LAG(vaso_stopped,1) + OVER + ( + partition by icustay_id, vaso + order by charttime + ) + = 1 then 1 + + -- ** not sure if the below is needed + --when (CHARTTIME - (LAG(CHARTTIME, 1) OVER (partition by icustay_id, vaso order by charttime))) > (interval '4 hours') then 1 + else null + end as vaso_start + +FROM + vasocv3 +) +-- propagate start/stop flags forward in time +, vasocv5 as +( + select v.* + , SUM(vaso_start) OVER (partition by icustay_id, vaso order by charttime) as vaso_first +FROM + vasocv4 v +) +, vasocv6 as +( + select v.* + -- We define end time here + , case + when vaso = 0 + then null + + -- If the recorded vaso was D/C'd, this is an end time + when vaso_stopped = 1 + then vaso_first + + -- If the rate is zero, this is the end time + when vaso_rate = 0 + then vaso_first + + -- the last row in the table is always a potential end time + -- this captures patients who die/are discharged while on vasopressors + -- in principle, this could add an extra end time for the vasopressor + -- however, since we later group on vaso_start, any extra end times are ignored + when LEAD(CHARTTIME,1) + OVER + ( + partition by icustay_id, vaso + order by charttime + ) is null + then vaso_first + + else null + end as vaso_stop + from vasocv5 v +) + +-- -- if you want to look at the results of the table before grouping: +-- select +-- icustay_id, charttime, vaso, vaso_rate, vaso_amount +-- , vaso_stopped +-- , vaso_start +-- , vaso_first +-- , vaso_stop +-- from vasocv6 order by icustay_id, charttime; + +, vasocv7 as +( +select + icustay_id + , charttime as starttime + , lead(charttime) OVER (partition by icustay_id, vaso_first order by charttime) as endtime + , vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first +from vasocv6 +where + vaso_first is not null -- bogus data +and + vaso_first != 0 -- sometimes *only* a rate of 0 appears, i.e. the drug is never actually delivered +and + icustay_id is not null -- there are data for "floating" admissions, we don't worry about these +) +-- table of start/stop times for event +, vasocv8 as +( + select + icustay_id + , starttime, endtime + , vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first + from vasocv7 + where endtime is not null + and vaso_rate > 0 + and starttime != endtime +) +-- collapse these start/stop times down if the rate doesn't change +, vasocv9 as +( + select + icustay_id + , starttime, endtime + , case + when LAG(endtime) OVER (partition by icustay_id order by starttime, endtime) = starttime + AND LAG(vaso_rate) OVER (partition by icustay_id order by starttime, endtime) = vaso_rate + THEN 0 + else 1 + end as vaso_groups + , vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first + from vasocv8 + where endtime is not null + and vaso_rate > 0 + and starttime != endtime +) +, vasocv10 as +( + select + icustay_id + , starttime, endtime + , vaso_groups + , SUM(vaso_groups) OVER (partition by icustay_id order by starttime, endtime) as vaso_groups_sum + , vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first + from vasocv9 +) +, vasocv as +( + select icustay_id + , min(starttime) as starttime + , max(endtime) as endtime + , vaso_groups_sum + , vaso_rate + , sum(vaso_amount) as vaso_amount + from vasocv10 + group by icustay_id, vaso_groups_sum, vaso_rate +) +-- now we extract the associated data for metavision patients +, vasomv as +( + select + icustay_id, linkorderid + , rate as vaso_rate + , amount as vaso_amount + , starttime + , endtime + from inputevents_mv + where itemid = 221906 -- norepinephrine + and statusdescription != 'Rewritten' -- only valid orders +) +-- now assign this data to every hour of the patient's stay +-- vaso_amount for carevue is not accurate +SELECT icustay_id + , starttime, endtime + , vaso_rate, vaso_amount +from vasocv +UNION ALL +SELECT icustay_id + , starttime, endtime + , vaso_rate, vaso_amount +from vasomv +order by icustay_id, starttime; diff --git a/sql/durations/ventilation_classification.sql b/sql/durations/ventilation_classification.sql new file mode 100644 index 0000000..c7ab3cc --- /dev/null +++ b/sql/durations/ventilation_classification.sql @@ -0,0 +1,142 @@ +-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY. +DROP TABLE IF EXISTS ventilation_classification; CREATE TABLE ventilation_classification AS +-- Identify The presence of a mechanical ventilation using settings +select + icustay_id, charttime + -- case statement determining whether it is an instance of mech vent + , max( + case + when itemid is null or value is null then 0 -- can't have null values + when itemid = 720 and value != 'Other/Remarks' THEN 1 -- VentTypeRecorded + when itemid = 223848 and value != 'Other' THEN 1 + when itemid = 223849 then 1 -- ventilator mode + when itemid = 467 and value = 'Ventilator' THEN 1 -- O2 delivery device == ventilator + when itemid in + ( + 445, 448, 449, 450, 1340, 1486, 1600, 224687 -- minute volume + , 639, 654, 681, 682, 683, 684,224685,224684,224686 -- tidal volume + , 218,436,535,444,459,224697,224695,224696,224746,224747 -- High/Low/Peak/Mean/Neg insp force ("RespPressure") + , 221,1,1211,1655,2000,226873,224738,224419,224750,227187 -- Insp pressure + , 543 -- PlateauPressure + , 5865,5866,224707,224709,224705,224706 -- APRV pressure + , 60,437,505,506,686,220339,224700 -- PEEP + , 3459 -- high pressure relief + , 501,502,503,224702 -- PCV + , 223,667,668,669,670,671,672 -- TCPCV + , 224701 -- PSVlevel + ) + THEN 1 + else 0 + end + ) as MechVent + , max( + case + -- initiation of oxygen therapy indicates the ventilation has ended + when itemid = 226732 and value in + ( + 'Nasal cannula', -- 153714 observations + 'Face tent', -- 24601 observations + 'Aerosol-cool', -- 24560 observations + 'Trach mask ', -- 16435 observations + 'High flow neb', -- 10785 observations + 'Non-rebreather', -- 5182 observations + 'Venti mask ', -- 1947 observations + 'Medium conc mask ', -- 1888 observations + 'T-piece', -- 1135 observations + 'High flow nasal cannula', -- 925 observations + 'Ultrasonic neb', -- 9 observations + 'Vapomist' -- 3 observations + ) then 1 + when itemid = 467 and value in + ( + 'Cannula', -- 278252 observations + 'Nasal Cannula', -- 248299 observations + -- 'None', -- 95498 observations + 'Face Tent', -- 35766 observations + 'Aerosol-Cool', -- 33919 observations + 'Trach Mask', -- 32655 observations + 'Hi Flow Neb', -- 14070 observations + 'Non-Rebreather', -- 10856 observations + 'Venti Mask', -- 4279 observations + 'Medium Conc Mask', -- 2114 observations + 'Vapotherm', -- 1655 observations + 'T-Piece', -- 779 observations + 'Hood', -- 670 observations + 'Hut', -- 150 observations + 'TranstrachealCat', -- 78 observations + 'Heated Neb', -- 37 observations + 'Ultrasonic Neb' -- 2 observations + ) then 1 + else 0 + end + ) as OxygenTherapy + , max( + case when itemid is null or value is null then 0 + -- extubated indicates ventilation event has ended + when itemid = 640 and value = 'Extubated' then 1 + when itemid = 640 and value = 'Self Extubation' then 1 + else 0 + end + ) + as Extubated + , max( + case when itemid is null or value is null then 0 + when itemid = 640 and value = 'Self Extubation' then 1 + else 0 + end + ) + as SelfExtubated +from chartevents ce +where ce.value is not null +-- exclude rows marked as error +and (ce.error != 1 or ce.error IS NULL) +and itemid in +( + -- the below are settings used to indicate ventilation + 720, 223849 -- vent mode + , 223848 -- vent type + , 445, 448, 449, 450, 1340, 1486, 1600, 224687 -- minute volume + , 639, 654, 681, 682, 683, 684,224685,224684,224686 -- tidal volume + , 218,436,535,444,224697,224695,224696,224746,224747 -- High/Low/Peak/Mean ("RespPressure") + , 221,1,1211,1655,2000,226873,224738,224419,224750,227187 -- Insp pressure + , 543 -- PlateauPressure + , 5865,5866,224707,224709,224705,224706 -- APRV pressure + , 60,437,505,506,686,220339,224700 -- PEEP + , 3459 -- high pressure relief + , 501,502,503,224702 -- PCV + , 223,667,668,669,670,671,672 -- TCPCV + , 224701 -- PSVlevel + + -- the below are settings used to indicate extubation + , 640 -- extubated + + -- the below indicate oxygen/NIV, i.e. the end of a mechanical vent event + , 468 -- O2 Delivery Device#2 + , 469 -- O2 Delivery Mode + , 470 -- O2 Flow (lpm) + , 471 -- O2 Flow (lpm) #2 + , 227287 -- O2 Flow (additional cannula) + , 226732 -- O2 Delivery Device(s) + , 223834 -- O2 Flow + + -- used in both oxygen + vent calculation + , 467 -- O2 Delivery Device +) +group by icustay_id, charttime +UNION DISTINCT +-- add in the extubation flags from procedureevents_mv +-- note that we only need the start time for the extubation +-- (extubation is always charted as ending 1 minute after it started) +select + icustay_id, starttime as charttime + , 0 as MechVent + , 0 as OxygenTherapy + , 1 as Extubated + , case when itemid = 225468 then 1 else 0 end as SelfExtubated +from procedureevents_mv +where itemid in +( + 227194 -- "Extubation" +, 225468 -- "Unplanned Extubation (patient-initiated)" +, 225477 -- "Unplanned Extubation (non-patient initiated)" +); diff --git a/sql/durations/ventilation_durations.sql b/sql/durations/ventilation_durations.sql new file mode 100644 index 0000000..8e5aa3b --- /dev/null +++ b/sql/durations/ventilation_durations.sql @@ -0,0 +1,112 @@ +-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY. +DROP TABLE IF EXISTS ventilation_durations; CREATE TABLE ventilation_durations AS +-- This query extracts the duration of mechanical ventilation +-- The main goal of the query is to aggregate sequential ventilator settings +-- into single mechanical ventilation "events". The start and end time of these +-- events can then be used for various purposes: calculating the total duration +-- of mechanical ventilation, cross-checking values (e.g. PaO2:FiO2 on vent), etc + +-- The query's logic is roughly: +-- 1) The presence of a mechanical ventilation setting starts a new ventilation event +-- 2) Any instance of a setting in the next 8 hours continues the event +-- 3) Certain elements end the current ventilation event +-- a) documented extubation ends the current ventilation +-- b) initiation of non-invasive vent and/or oxygen ends the current vent + +-- See the ventilation_classification.sql query for step 1 of the above. +-- This query has the logic for converting events into durations. +with vd0 as +( + select + icustay_id + -- this carries over the previous charttime which had a mechanical ventilation event + , case + when MechVent=1 then + LAG(CHARTTIME, 1) OVER (partition by icustay_id, MechVent order by charttime) + else null + end as charttime_lag + , charttime + , MechVent + , OxygenTherapy + , Extubated + , SelfExtubated + from ventilation_classification +) +, vd1 as +( + select + icustay_id + , charttime_lag + , charttime + , MechVent + , OxygenTherapy + , Extubated + , SelfExtubated + + -- if this is a mechanical ventilation event, we calculate the time since the last event + , case + -- if the current observation indicates mechanical ventilation is present + -- calculate the time since the last vent event + when MechVent=1 then + DATETIME_DIFF(CHARTTIME, charttime_lag, 'MINUTE')/60 + else null + end as ventduration + + , LAG(Extubated,1) + OVER + ( + partition by icustay_id, case when MechVent=1 or Extubated=1 then 1 else 0 end + order by charttime + ) as ExtubatedLag + + -- now we determine if the current mech vent event is a "new", i.e. they've just been intubated + , case + -- if there is an extubation flag, we mark any subsequent ventilation as a new ventilation event + --when Extubated = 1 then 0 -- extubation is *not* a new ventilation event, the *subsequent* row is + when + LAG(Extubated,1) + OVER + ( + partition by icustay_id, case when MechVent=1 or Extubated=1 then 1 else 0 end + order by charttime + ) + = 1 then 1 + -- if patient has initiated oxygen therapy, and is not currently vented, start a newvent + when MechVent = 0 and OxygenTherapy = 1 then 1 + -- if there is less than 8 hours between vent settings, we do not treat this as a new ventilation event + when CHARTTIME > DATETIME_ADD(charttime_lag, INTERVAL '8' HOUR) + then 1 + else 0 + end as newvent + -- use the staging table with only vent settings from chart events + FROM vd0 ventsettings +) +, vd2 as +( + select vd1.* + -- create a cumulative sum of the instances of new ventilation + -- this results in a monotonic integer assigned to each instance of ventilation + , case when MechVent=1 or Extubated = 1 then + SUM( newvent ) + OVER ( partition by icustay_id order by charttime ) + else null end + as ventnum + --- now we convert CHARTTIME of ventilator settings into durations + from vd1 +) +-- create the durations for each mechanical ventilation instance +select icustay_id + -- regenerate ventnum so it's sequential + , ROW_NUMBER() over (partition by icustay_id order by ventnum) as ventnum + , min(charttime) as starttime + , max(charttime) as endtime + , DATETIME_DIFF(max(charttime), min(charttime), 'MINUTE')/60 AS duration_hours +from vd2 +group by icustay_id, vd2.ventnum +having min(charttime) != max(charttime) +-- patient had to be mechanically ventilated at least once +-- i.e. max(mechvent) should be 1 +-- this excludes a frequent situation of NIV/oxygen before intub +-- in these cases, ventnum=0 and max(mechvent)=0, so they are ignored +and max(mechvent) = 1 +order by icustay_id, ventnum \ No newline at end of file diff --git a/sql/durations/weight_durations.sql b/sql/durations/weight_durations.sql new file mode 100644 index 0000000..9a5a61f --- /dev/null +++ b/sql/durations/weight_durations.sql @@ -0,0 +1,207 @@ +-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY. +DROP TABLE IF EXISTS weight_durations; CREATE TABLE weight_durations AS +-- This query extracts weights for adult ICU patients with start/stop times +-- if an admission weight is given, then this is assigned from intime to outtime + +-- This query extracts weights for adult ICU patients with start/stop times +-- if an admission weight is given, then this is assigned from intime to outtime +WITH wt_neonate AS +( + SELECT c.icustay_id, c.charttime + , MAX(CASE WHEN c.itemid = 3580 THEN c.valuenum END) as wt_kg + , MAX(CASE WHEN c.itemid = 3581 THEN c.valuenum END) as wt_lb + , MAX(CASE WHEN c.itemid = 3582 THEN c.valuenum END) as wt_oz + FROM chartevents c + WHERE c.itemid in (3580, 3581, 3582) + AND c.icustay_id IS NOT NULL + AND COALESCE(c.error, 0) = 0 + -- wt_oz/wt_lb/wt_kg are only 0 erroneously, so drop these rows + AND c.valuenum > 0 + -- a separate query was run to manually verify only 1 value exists per + -- icustay_id/charttime/itemid grouping + -- therefore, we can use max() across itemid to collapse these values to 1 row per group + GROUP BY c.icustay_id, c.charttime +) +, birth_wt AS +( + SELECT c.icustay_id, c.charttime + , MAX( + CASE + WHEN c.itemid = 4183 THEN + -- clean free-text birth weight data + CASE + -- ignore value if there are any non-numeric characters + WHEN REGEXP_CONTAINS(c.value, '[^0-9\\.]') THEN NULL + -- convert grams to kd + WHEN CAST(c.value AS NUMERIC) > 100 THEN CAST(c.value AS NUMERIC)/1000 + -- keep kg as is, filtering bad values (largest baby ever born was conveniently 9.98kg) + WHEN CAST(c.value AS NUMERIC) < 10 THEN CAST(c.value AS NUMERIC) + -- ignore other values (those between 10-100) - junk data + ELSE NULL END + -- itemid 3723 happily has all numeric data - also doesn't store any grams data + WHEN c.itemid = 3723 AND c.valuenum < 10 THEN c.valuenum + ELSE NULL END) as wt_kg + FROM chartevents c + WHERE c.itemid in (3723, 4183) + AND c.icustay_id IS NOT NULL + AND COALESCE(c.error, 0) = 0 + -- a separate query was run to manually verify only 1 value exists per + -- icustay_id/charttime/itemid grouping + -- therefore, we can use max() across itemid to collapse these values to 1 row per group + GROUP BY c.icustay_id, c.charttime +) +, wt_stg as +( + SELECT + c.icustay_id + , c.charttime + , case when c.itemid in (762,226512) then 'admit' + else 'daily' end as weight_type + -- TODO: eliminate obvious outliers if there is a reasonable weight + , c.valuenum as weight + FROM chartevents c + WHERE c.valuenum IS NOT NULL + AND c.itemid in + ( + 762,226512 -- Admit Wt + , 763,224639 -- Daily Weight + ) + AND c.icustay_id IS NOT NULL + AND c.valuenum > 0 + -- exclude rows marked as error + AND COALESCE(c.error, 0) = 0 + UNION ALL + SELECT + n.icustay_id + , n.charttime + , 'daily' AS weight_type + , CASE + WHEN wt_kg IS NOT NULL THEN wt_kg + WHEN wt_lb IS NOT NULL THEN wt_lb*0.45359237 + wt_oz*0.0283495231 + ELSE NULL END AS weight + FROM wt_neonate n + UNION ALL + SELECT + b.icustay_id + , b.charttime + -- birth weight of neonates is treated as admission weight + , 'admit' AS weight_type + , wt_kg as weight + FROM birth_wt b +) +-- get more weights from echo - completes data for ~2500 patients +-- we only use echo data if there is *no* charted data +-- we impute the median echo weight for their entire ICU stay +, echo as +( + select + ie.icustay_id + , ec.charttime + , 'echo' AS weight_type + , 0.453592*ec.weight as weight + from icustays ie + inner join echo_data ec + on ie.hadm_id = ec.hadm_id + where ec.weight is not null + and ie.icustay_id not in (select distinct icustay_id from wt_stg) +) +, wt_stg0 AS +( + SELECT icustay_id, charttime, weight_type, weight + FROM wt_stg + UNION ALL + SELECT icustay_id, charttime, weight_type, weight + FROM echo +) +-- assign ascending row number +, wt_stg1 as +( + select + icustay_id + , charttime + , weight_type + , weight + , ROW_NUMBER() OVER (partition by icustay_id, weight_type order by charttime) as rn + from wt_stg0 + WHERE weight IS NOT NULL +) +-- change charttime to intime for the first admission weight recorded +, wt_stg2 AS +( + SELECT + wt_stg1.icustay_id + , ie.intime, ie.outtime + , case when wt_stg1.weight_type = 'admit' and wt_stg1.rn = 1 + then DATETIME_SUB(ie.intime, INTERVAL '2' HOUR) + else wt_stg1.charttime end as starttime + , wt_stg1.weight + from wt_stg1 + INNER JOIN icustays ie + on ie.icustay_id = wt_stg1.icustay_id +) +, wt_stg3 as +( + select + icustay_id + , intime, outtime + , starttime + , coalesce( + LEAD(starttime) OVER (PARTITION BY icustay_id ORDER BY starttime), + DATETIME_ADD(GREATEST(outtime, starttime), INTERVAL '2' HOUR) + ) as endtime + , weight + from wt_stg2 +) +-- this table is the start/stop times from admit/daily weight in charted data +, wt1 as +( + select + icustay_id + , starttime + , coalesce(endtime, + LEAD(starttime) OVER (partition by icustay_id order by starttime), + -- impute ICU discharge as the end of the final weight measurement + -- plus a 2 hour "fuzziness" window + DATETIME_ADD(outtime, INTERVAL '2' HOUR) + ) as endtime + , weight + from wt_stg3 +) +-- if the intime for the patient is < the first charted daily weight +-- then we will have a "gap" at the start of their stay +-- to prevent this, we look for these gaps and backfill the first weight +-- this adds (153255-149657)=3598 rows, meaning this fix helps for up to 3598 icustay_id +, wt_fix as +( + select ie.icustay_id + -- we add a 2 hour "fuzziness" window + , DATETIME_SUB(ie.intime, INTERVAL '2' HOUR) as starttime + , wt.starttime as endtime + , wt.weight + from icustays ie + inner join + -- the below subquery returns one row for each unique icustay_id + -- the row contains: the first starttime and the corresponding weight + ( + SELECT wt1.icustay_id, wt1.starttime, wt1.weight + , ROW_NUMBER() OVER (PARTITION BY wt1.icustay_id ORDER BY wt1.starttime) as rn + FROM wt1 + ) wt + ON ie.icustay_id = wt.icustay_id + AND wt.rn = 1 + and ie.intime < wt.starttime +) +-- add the backfill rows to the main weight table +select + wt1.icustay_id + , wt1.starttime + , wt1.endtime + , wt1.weight +from wt1 +UNION ALL +SELECT + wt_fix.icustay_id + , wt_fix.starttime + , wt_fix.endtime + , wt_fix.weight +from wt_fix diff --git a/sql/echo_data.sql b/sql/echo_data.sql new file mode 100644 index 0000000..99e888a --- /dev/null +++ b/sql/echo_data.sql @@ -0,0 +1,48 @@ +-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY. +DROP TABLE IF EXISTS echo_data; CREATE TABLE echo_data AS +-- This code extracts structured data from echocardiographies +-- You can join it to the text notes using ROW_ID +-- Just note that ROW_ID will differ across versions of MIMIC-III. + +select ROW_ID + , subject_id, hadm_id + , chartdate + + -- charttime is always null for echoes.. + -- however, the time is available in the echo text, e.g.: + -- , substring(ne.text, 'Date/Time: [\[\]0-9*-]+ at ([0-9:]+)') as TIMESTAMP + -- we can therefore impute it and re-create charttime + , PARSE_DATETIME + ( + '%Y-%m-%d%H:%M:%S', + FORMAT_DATE('%Y-%m-%d', chartdate) + || REGEXP_EXTRACT(ne.text, 'Date/Time: .+? at ([0-9]+:[0-9]{2})') + || ':00' + ) AS charttime + + -- explanation of below substring: + -- 'Indication: ' - matched verbatim + -- (.*?) - match any character + -- \n - the end of the line + -- substring only returns the item in ()s + -- note: the '?' makes it non-greedy. if you exclude it, it matches until it reaches the *last* \n + + , REGEXP_EXTRACT(ne.text, 'Indication: (.*?)\n') as Indication + + -- sometimes numeric values contain de-id text, e.g. [** Numeric Identifier **] + -- this removes that text + , cast(REGEXP_EXTRACT(ne.text, 'Height: \\x28in\\x29 ([0-9]+)') as numeric) as Height + , cast(REGEXP_EXTRACT(ne.text, 'Weight \\x28lb\\x29: ([0-9]+)\n') as numeric) as Weight + , cast(REGEXP_EXTRACT(ne.text, 'BSA \\x28m2\\x29: ([0-9]+) m2\n') as numeric) as BSA -- ends in 'm2' + , REGEXP_EXTRACT(ne.text, 'BP \\x28mm Hg\\x29: (.+)\n') as BP -- Sys/Dias + , cast(REGEXP_EXTRACT(ne.text, 'BP \\x28mm Hg\\x29: ([0-9]+)/[0-9]+?\n') as numeric) as BPSys -- first part of fraction + , cast(REGEXP_EXTRACT(ne.text, 'BP \\x28mm Hg\\x29: [0-9]+/([0-9]+?)\n') as numeric) as BPDias -- second part of fraction + , cast(REGEXP_EXTRACT(ne.text, 'HR \\x28bpm\\x29: ([0-9]+?)\n') as numeric) as HR + + , REGEXP_EXTRACT(ne.text, 'Status: (.*?)\n') as Status + , REGEXP_EXTRACT(ne.text, 'Test: (.*?)\n') as Test + , REGEXP_EXTRACT(ne.text, 'Doppler: (.*?)\n') as Doppler + , REGEXP_EXTRACT(ne.text, 'Contrast: (.*?)\n') as Contrast + , REGEXP_EXTRACT(ne.text, 'Technical Quality: (.*?)\n') as TechnicalQuality +FROM noteevents ne +where category = 'Echo'; diff --git a/sql/firstday/blood_gas_first_day.sql b/sql/firstday/blood_gas_first_day.sql new file mode 100644 index 0000000..f56046a --- /dev/null +++ b/sql/firstday/blood_gas_first_day.sql @@ -0,0 +1,108 @@ +-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY. +DROP TABLE IF EXISTS blood_gas_first_day; CREATE TABLE blood_gas_first_day AS +-- The aim of this query is to pivot entries related to blood gases and +-- chemistry values which were found in LABEVENTS + +-- things to check: +-- when a mixed venous/arterial blood sample are taken at the same time, is the store time different? + +with pvt as +( -- begin query that extracts the data + select ie.subject_id, ie.hadm_id, ie.icustay_id + -- here we assign labels to ITEMIDs + -- this also fuses together multiple ITEMIDs containing the same data + , case + when itemid = 50800 then 'SPECIMEN' + when itemid = 50801 then 'AADO2' + when itemid = 50802 then 'BASEEXCESS' + when itemid = 50803 then 'BICARBONATE' + when itemid = 50804 then 'TOTALCO2' + when itemid = 50805 then 'CARBOXYHEMOGLOBIN' + when itemid = 50806 then 'CHLORIDE' + when itemid = 50808 then 'CALCIUM' + when itemid = 50809 then 'GLUCOSE' + when itemid = 50810 then 'HEMATOCRIT' + when itemid = 50811 then 'HEMOGLOBIN' + when itemid = 50812 then 'INTUBATED' + when itemid = 50813 then 'LACTATE' + when itemid = 50814 then 'METHEMOGLOBIN' + when itemid = 50815 then 'O2FLOW' + when itemid = 50816 then 'FIO2' + when itemid = 50817 then 'SO2' -- OXYGENSATURATION + when itemid = 50818 then 'PCO2' + when itemid = 50819 then 'PEEP' + when itemid = 50820 then 'PH' + when itemid = 50821 then 'PO2' + when itemid = 50822 then 'POTASSIUM' + when itemid = 50823 then 'REQUIREDO2' + when itemid = 50824 then 'SODIUM' + when itemid = 50825 then 'TEMPERATURE' + when itemid = 50826 then 'TIDALVOLUME' + when itemid = 50827 then 'VENTILATIONRATE' + when itemid = 50828 then 'VENTILATOR' + else null + end as label + , charttime + , value + -- add in some sanity checks on the values + , case + when valuenum <= 0 and itemid != 50802 then null -- allow negative baseexcess + when itemid = 50810 and valuenum > 100 then null -- hematocrit + -- ensure FiO2 is a valid number between 21-100 + -- mistakes are rare (<100 obs out of ~100,000) + -- there are 862 obs of valuenum == 20 - some people round down! + -- rather than risk imputing garbage data for FiO2, we simply NULL invalid values + when itemid = 50816 and valuenum < 20 then null + when itemid = 50816 and valuenum > 100 then null + when itemid = 50817 and valuenum > 100 then null -- O2 sat + when itemid = 50815 and valuenum > 70 then null -- O2 flow + when itemid = 50821 and valuenum > 800 then null -- PO2 + -- conservative upper limit + else valuenum + end as valuenum + + FROM icustays ie + left join labevents le + on le.subject_id = ie.subject_id and le.hadm_id = ie.hadm_id + and le.charttime between (DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)) and (DATETIME_ADD(ie.intime, INTERVAL '1' DAY)) + and le.ITEMID in + -- blood gases + ( + 50800, 50801, 50802, 50803, 50804, 50805, 50806, 50807, 50808, 50809 + , 50810, 50811, 50812, 50813, 50814, 50815, 50816, 50817, 50818, 50819 + , 50820, 50821, 50822, 50823, 50824, 50825, 50826, 50827, 50828 + , 51545 + ) +) +select pvt.SUBJECT_ID, pvt.HADM_ID, pvt.ICUSTAY_ID, pvt.CHARTTIME +, max(case when label = 'SPECIMEN' then value else null end) as specimen +, max(case when label = 'AADO2' then valuenum else null end) as aado2 +, max(case when label = 'BASEEXCESS' then valuenum else null end) as baseexcess +, max(case when label = 'BICARBONATE' then valuenum else null end) as bicarbonate +, max(case when label = 'TOTALCO2' then valuenum else null end) as totalco2 +, max(case when label = 'CARBOXYHEMOGLOBIN' then valuenum else null end) as carboxyhemoglobin +, max(case when label = 'CHLORIDE' then valuenum else null end) as chloride +, max(case when label = 'CALCIUM' then valuenum else null end) as calcium +, max(case when label = 'GLUCOSE' then valuenum else null end) as glucose +, max(case when label = 'HEMATOCRIT' then valuenum else null end) as hematocrit +, max(case when label = 'HEMOGLOBIN' then valuenum else null end) as hemoglobin +, max(case when label = 'INTUBATED' then valuenum else null end) as intubated +, max(case when label = 'LACTATE' then valuenum else null end) as lactate +, max(case when label = 'METHEMOGLOBIN' then valuenum else null end) as methemoglobin +, max(case when label = 'O2FLOW' then valuenum else null end) as o2flow +, max(case when label = 'FIO2' then valuenum else null end) as fio2 +, max(case when label = 'SO2' then valuenum else null end) as so2 -- OXYGENSATURATION +, max(case when label = 'PCO2' then valuenum else null end) as pco2 +, max(case when label = 'PEEP' then valuenum else null end) as peep +, max(case when label = 'PH' then valuenum else null end) as ph +, max(case when label = 'PO2' then valuenum else null end) as po2 +, max(case when label = 'POTASSIUM' then valuenum else null end) as potassium +, max(case when label = 'REQUIREDO2' then valuenum else null end) as requiredo2 +, max(case when label = 'SODIUM' then valuenum else null end) as sodium +, max(case when label = 'TEMPERATURE' then valuenum else null end) as temperature +, max(case when label = 'TIDALVOLUME' then valuenum else null end) as tidalvolume +, max(case when label = 'VENTILATIONRATE' then valuenum else null end) as ventilationrate +, max(case when label = 'VENTILATOR' then valuenum else null end) as ventilator +from pvt +group by pvt.subject_id, pvt.hadm_id, pvt.icustay_id, pvt.CHARTTIME +order by pvt.subject_id, pvt.hadm_id, pvt.icustay_id, pvt.CHARTTIME; diff --git a/sql/firstday/blood_gas_first_day_arterial.sql b/sql/firstday/blood_gas_first_day_arterial.sql new file mode 100644 index 0000000..d2e7504 --- /dev/null +++ b/sql/firstday/blood_gas_first_day_arterial.sql @@ -0,0 +1,156 @@ +-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY. +DROP TABLE IF EXISTS blood_gas_first_day_arterial; CREATE TABLE blood_gas_first_day_arterial AS + +with stg_spo2 as +( + select subject_id, hadm_id, icustay_id, charttime + -- max here is just used to group SpO2 by charttime + , max(case when valuenum <= 0 or valuenum > 100 then null else valuenum end) as SpO2 + FROM chartevents + -- o2 sat + where ITEMID in + ( + 646 -- SpO2 + , 220277 -- O2 saturation pulseoxymetry + ) + group by subject_id, hadm_id, icustay_id, charttime +) +, stg_fio2 as +( + select subject_id, hadm_id, icustay_id, charttime + -- pre-process the FiO2s to ensure they are between 21-100% + , max( + case + when itemid = 223835 + then case + when valuenum > 0 and valuenum <= 1 + then valuenum * 100 + -- improperly input data - looks like O2 flow in litres + when valuenum > 1 and valuenum < 21 + then null + when valuenum >= 21 and valuenum <= 100 + then valuenum + else null end -- unphysiological + when itemid in (3420, 3422) + -- all these values are well formatted + then valuenum + when itemid = 190 and valuenum > 0.20 and valuenum < 1 + -- well formatted but not in % + then valuenum * 100 + else null end + ) as fio2_chartevents + FROM chartevents + where ITEMID in + ( + 3420 -- FiO2 + , 190 -- FiO2 set + , 223835 -- Inspired O2 Fraction (FiO2) + , 3422 -- FiO2 [measured] + ) + -- exclude rows marked as error + AND (error IS NULL OR error = 0) + group by subject_id, hadm_id, icustay_id, charttime +) +, stg2 as +( +select bg.* + , ROW_NUMBER() OVER (partition by bg.icustay_id, bg.charttime order by s1.charttime DESC) as lastRowSpO2 + , s1.spo2 +from blood_gas_first_day bg +left join stg_spo2 s1 + -- same patient + on bg.icustay_id = s1.icustay_id + -- spo2 occurred at most 2 hours before this blood gas + and s1.charttime >= DATETIME_SUB(bg.charttime, INTERVAL '2' HOUR) + and s1.charttime <= bg.charttime +where bg.po2 is not null +) +, stg3 as +( +select bg.* + , ROW_NUMBER() OVER (partition by bg.icustay_id, bg.charttime order by s2.charttime DESC) as lastRowFiO2 + , s2.fio2_chartevents + + -- create our specimen prediction + , 1/(1+exp(-(-0.02544 + + 0.04598 * po2 + + coalesce(-0.15356 * spo2 , -0.15356 * 97.49420 + 0.13429) + + coalesce( 0.00621 * fio2_chartevents , 0.00621 * 51.49550 + -0.24958) + + coalesce( 0.10559 * hemoglobin , 0.10559 * 10.32307 + 0.05954) + + coalesce( 0.13251 * so2 , 0.13251 * 93.66539 + -0.23172) + + coalesce(-0.01511 * pco2 , -0.01511 * 42.08866 + -0.01630) + + coalesce( 0.01480 * fio2 , 0.01480 * 63.97836 + -0.31142) + + coalesce(-0.00200 * aado2 , -0.00200 * 442.21186 + -0.01328) + + coalesce(-0.03220 * bicarbonate , -0.03220 * 22.96894 + -0.06535) + + coalesce( 0.05384 * totalco2 , 0.05384 * 24.72632 + -0.01405) + + coalesce( 0.08202 * lactate , 0.08202 * 3.06436 + 0.06038) + + coalesce( 0.10956 * ph , 0.10956 * 7.36233 + -0.00617) + + coalesce( 0.00848 * o2flow , 0.00848 * 7.59362 + -0.35803) + ))) as SPECIMEN_PROB +from stg2 bg +left join stg_fio2 s2 + -- same patient + on bg.icustay_id = s2.icustay_id + -- fio2 occurred at most 4 hours before this blood gas + and s2.charttime between DATETIME_SUB(bg.charttime, INTERVAL '4' HOUR) and bg.charttime +where bg.lastRowSpO2 = 1 -- only the row with the most recent SpO2 (if no SpO2 found lastRowSpO2 = 1) +) + +select subject_id, hadm_id, +icustay_id, charttime +, specimen -- raw data indicating sample type, only present 80% of the time + +-- prediction of specimen for missing data +, case + when SPECIMEN is not null then SPECIMEN + when SPECIMEN_PROB > 0.75 then 'ART' + else null end as SPECIMEN_PRED +, specimen_prob + +-- oxygen related parameters +, so2, spo2 -- note spo2 is FROM chartevents +, po2, pco2 +, fio2_chartevents, fio2 +, aado2 +-- also calculate AADO2 +, case + when PO2 is not null + and pco2 is not null + and coalesce(fio2, fio2_chartevents) is not null + -- multiple by 100 because FiO2 is in a % but should be a fraction + then (coalesce(fio2, fio2_chartevents)/100) * (760 - 47) - (pco2/0.8) - po2 + else null + end as AADO2_calc +, case + when PO2 is not null and coalesce(fio2, fio2_chartevents) is not null + -- multiply by 100 because FiO2 is in a % but should be a fraction + then 100*PO2/(coalesce(fio2, fio2_chartevents)) + else null + end as PaO2FiO2 +-- acid-base parameters +, ph, baseexcess +, bicarbonate, totalco2 + +-- blood count parameters +, hematocrit +, hemoglobin +, carboxyhemoglobin +, methemoglobin + +-- chemistry +, chloride, calcium +, temperature +, potassium, sodium +, lactate +, glucose + +-- ventilation stuff that's sometimes input +, intubated, tidalvolume, ventilationrate, ventilator +, peep, o2flow +, requiredo2 + +from stg3 +where lastRowFiO2 = 1 -- only the most recent FiO2 +-- restrict it to *only* arterial samples +and (specimen = 'ART' or specimen_prob > 0.75) +order by icustay_id, charttime; diff --git a/sql/firstday/gcs_first_day.sql b/sql/firstday/gcs_first_day.sql new file mode 100644 index 0000000..6654617 --- /dev/null +++ b/sql/firstday/gcs_first_day.sql @@ -0,0 +1,143 @@ +-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY. +DROP TABLE IF EXISTS gcs_first_day; CREATE TABLE gcs_first_day AS +-- ITEMIDs used: + +-- CAREVUE +-- 723 as GCSVerbal +-- 454 as GCSMotor +-- 184 as GCSEyes + +-- METAVISION +-- 223900 GCS - Verbal Response +-- 223901 GCS - Motor Response +-- 220739 GCS - Eye Opening + +-- The code combines the ITEMIDs into the carevue itemids, then pivots those +-- So 223900 is changed to 723, then the ITEMID 723 is pivoted to form GCSVerbal + +-- Note: +-- The GCS for sedated patients is defaulted to 15 in this code. +-- This is in line with how the data is meant to be collected. +-- e.g., from the SAPS II publication: +-- For sedated patients, the Glasgow Coma Score before sedation was used. +-- This was ascertained either from interviewing the physician who ordered the sedation, +-- or by reviewing the patient's medical record. + +with base as +( + SELECT pvt.ICUSTAY_ID + , pvt.charttime + + -- Easier names - note we coalesced Metavision and CareVue IDs below + , max(case when pvt.itemid = 454 then pvt.valuenum else null end) as GCSMotor + , max(case when pvt.itemid = 723 then pvt.valuenum else null end) as GCSVerbal + , max(case when pvt.itemid = 184 then pvt.valuenum else null end) as GCSEyes + + -- If verbal was set to 0 in the below select, then this is an intubated patient + , case + when max(case when pvt.itemid = 723 then pvt.valuenum else null end) = 0 + then 1 + else 0 + end as EndoTrachFlag + + , ROW_NUMBER () + OVER (PARTITION BY pvt.ICUSTAY_ID ORDER BY pvt.charttime ASC) as rn + + FROM ( + select l.ICUSTAY_ID + -- merge the ITEMIDs so that the pivot applies to both metavision/carevue data + , case + when l.ITEMID in (723,223900) then 723 + when l.ITEMID in (454,223901) then 454 + when l.ITEMID in (184,220739) then 184 + else l.ITEMID end + as ITEMID + + -- convert the data into a number, reserving a value of 0 for ET/Trach + , case + -- endotrach/vent is assigned a value of 0, later parsed specially + when l.ITEMID = 723 and l.VALUE = '1.0 ET/Trach' then 0 -- carevue + when l.ITEMID = 223900 and l.VALUE = 'No Response-ETT' then 0 -- metavision + + else VALUENUM + end + as VALUENUM + , l.CHARTTIME + FROM chartevents l + + -- get intime for charttime subselection + inner join icustays b + on l.icustay_id = b.icustay_id + + -- Isolate the desired GCS variables + where l.ITEMID in + ( + -- 198 -- GCS + -- GCS components, CareVue + 184, 454, 723 + -- GCS components, Metavision + , 223900, 223901, 220739 + ) + -- Only get data for the first 24 hours + and l.charttime between b.intime and DATETIME_ADD(b.intime, INTERVAL '1' DAY) + -- exclude rows marked as error + AND (l.error IS NULL OR l.error = 0) + ) pvt + group by pvt.ICUSTAY_ID, pvt.charttime +) +, gcs as ( + select b.* + , b2.GCSVerbal as GCSVerbalPrev + , b2.GCSMotor as GCSMotorPrev + , b2.GCSEyes as GCSEyesPrev + -- Calculate GCS, factoring in special case when they are intubated and prev vals + -- note that the coalesce are used to implement the following if: + -- if current value exists, use it + -- if previous value exists, use it + -- otherwise, default to normal + , case + -- replace GCS during sedation with 15 + when b.GCSVerbal = 0 + then 15 + when b.GCSVerbal is null and b2.GCSVerbal = 0 + then 15 + -- if previously they were intub, but they aren't now, do not use previous GCS values + when b2.GCSVerbal = 0 + then + coalesce(b.GCSMotor,6) + + coalesce(b.GCSVerbal,5) + + coalesce(b.GCSEyes,4) + -- otherwise, add up score normally, imputing previous value if none available at current time + else + coalesce(b.GCSMotor,coalesce(b2.GCSMotor,6)) + + coalesce(b.GCSVerbal,coalesce(b2.GCSVerbal,5)) + + coalesce(b.GCSEyes,coalesce(b2.GCSEyes,4)) + end as GCS + + from base b + -- join to itself within 6 hours to get previous value + left join base b2 + on b.ICUSTAY_ID = b2.ICUSTAY_ID and b.rn = b2.rn+1 and b2.charttime > DATETIME_SUB(b.charttime, INTERVAL '6' HOUR) +) +, gcs_final as ( + select gcs.* + -- This sorts the data by GCS, so rn=1 is the the lowest GCS values to keep + , ROW_NUMBER () + OVER (PARTITION BY gcs.ICUSTAY_ID + ORDER BY gcs.GCS + ) as IsMinGCS + from gcs +) +select ie.subject_id, ie.hadm_id, ie.icustay_id +-- The minimum GCS is determined by the above row partition, we only join if IsMinGCS=1 +, GCS as mingcs +, coalesce(GCSMotor,GCSMotorPrev) as gcsmotor +, coalesce(GCSVerbal,GCSVerbalPrev) as gcsverbal +, coalesce(GCSEyes,GCSEyesPrev) as gcseyes +, EndoTrachFlag as endotrachflag + +-- subselect down to the cohort of eligible patients +FROM icustays ie +left join gcs_final gs + on ie.icustay_id = gs.icustay_id and gs.IsMinGCS = 1 +ORDER BY ie.icustay_id; diff --git a/sql/firstday/labs_first_day.sql b/sql/firstday/labs_first_day.sql new file mode 100644 index 0000000..eaa4f0b --- /dev/null +++ b/sql/firstday/labs_first_day.sql @@ -0,0 +1,155 @@ +-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY. +DROP TABLE IF EXISTS labs_first_day; CREATE TABLE labs_first_day AS +-- This query pivots lab values taken in the first 24 hours of a patient's stay + +-- Have already confirmed that the unit of measurement is always the same: null or the correct unit + +SELECT + pvt.subject_id, pvt.hadm_id, pvt.icustay_id + + , min(CASE WHEN label = 'ANION GAP' THEN valuenum ELSE NULL END) AS aniongap_min + , max(CASE WHEN label = 'ANION GAP' THEN valuenum ELSE NULL END) AS aniongap_max + , min(CASE WHEN label = 'ALBUMIN' THEN valuenum ELSE NULL END) AS albumin_min + , max(CASE WHEN label = 'ALBUMIN' THEN valuenum ELSE NULL END) AS albumin_max + , min(CASE WHEN label = 'BANDS' THEN valuenum ELSE NULL END) AS bands_min + , max(CASE WHEN label = 'BANDS' THEN valuenum ELSE NULL END) AS bands_max + , min(CASE WHEN label = 'BICARBONATE' THEN valuenum ELSE NULL END) AS bicarbonate_min + , max(CASE WHEN label = 'BICARBONATE' THEN valuenum ELSE NULL END) AS bicarbonate_max + , min(CASE WHEN label = 'BILIRUBIN' THEN valuenum ELSE NULL END) AS bilirubin_min + , max(CASE WHEN label = 'BILIRUBIN' THEN valuenum ELSE NULL END) AS bilirubin_max + , min(CASE WHEN label = 'CREATININE' THEN valuenum ELSE NULL END) AS creatinine_min + , max(CASE WHEN label = 'CREATININE' THEN valuenum ELSE NULL END) AS creatinine_max + , min(CASE WHEN label = 'CHLORIDE' THEN valuenum ELSE NULL END) AS chloride_min + , max(CASE WHEN label = 'CHLORIDE' THEN valuenum ELSE NULL END) AS chloride_max + , min(CASE WHEN label = 'GLUCOSE' THEN valuenum ELSE NULL END) AS glucose_min + , max(CASE WHEN label = 'GLUCOSE' THEN valuenum ELSE NULL END) AS glucose_max + , min(CASE WHEN label = 'HEMATOCRIT' THEN valuenum ELSE NULL END) AS hematocrit_min + , max(CASE WHEN label = 'HEMATOCRIT' THEN valuenum ELSE NULL END) AS hematocrit_max + , min(CASE WHEN label = 'HEMOGLOBIN' THEN valuenum ELSE NULL END) AS hemoglobin_min + , max(CASE WHEN label = 'HEMOGLOBIN' THEN valuenum ELSE NULL END) AS hemoglobin_max + , min(CASE WHEN label = 'LACTATE' THEN valuenum ELSE NULL END) AS lactate_min + , max(CASE WHEN label = 'LACTATE' THEN valuenum ELSE NULL END) AS lactate_max + , min(CASE WHEN label = 'PLATELET' THEN valuenum ELSE NULL END) AS platelet_min + , max(CASE WHEN label = 'PLATELET' THEN valuenum ELSE NULL END) AS platelet_max + , min(CASE WHEN label = 'POTASSIUM' THEN valuenum ELSE NULL END) AS potassium_min + , max(CASE WHEN label = 'POTASSIUM' THEN valuenum ELSE NULL END) AS potassium_max + , min(CASE WHEN label = 'PTT' THEN valuenum ELSE NULL END) AS ptt_min + , max(CASE WHEN label = 'PTT' THEN valuenum ELSE NULL END) AS ptt_max + , min(CASE WHEN label = 'INR' THEN valuenum ELSE NULL END) AS inr_min + , max(CASE WHEN label = 'INR' THEN valuenum ELSE NULL END) AS inr_max + , min(CASE WHEN label = 'PT' THEN valuenum ELSE NULL END) AS pt_min + , max(CASE WHEN label = 'PT' THEN valuenum ELSE NULL END) AS pt_max + , min(CASE WHEN label = 'SODIUM' THEN valuenum ELSE NULL END) AS sodium_min + , max(CASE WHEN label = 'SODIUM' THEN valuenum ELSE NULL END) AS sodium_max + , min(CASE WHEN label = 'BUN' THEN valuenum ELSE NULL END) AS bun_min + , max(CASE WHEN label = 'BUN' THEN valuenum ELSE NULL END) AS bun_max + , min(CASE WHEN label = 'WBC' THEN valuenum ELSE NULL END) AS wbc_min + , max(CASE WHEN label = 'WBC' THEN valuenum ELSE NULL END) AS wbc_max + + +FROM +( -- begin query that extracts the data + SELECT ie.subject_id, ie.hadm_id, ie.icustay_id + -- here we assign labels to ITEMIDs + -- this also fuses together multiple ITEMIDs containing the same data + , CASE + WHEN itemid = 50868 THEN 'ANION GAP' + WHEN itemid = 50862 THEN 'ALBUMIN' + WHEN itemid = 51144 THEN 'BANDS' + WHEN itemid = 50882 THEN 'BICARBONATE' + WHEN itemid = 50885 THEN 'BILIRUBIN' + WHEN itemid = 50912 THEN 'CREATININE' + WHEN itemid = 50806 THEN 'CHLORIDE' + WHEN itemid = 50902 THEN 'CHLORIDE' + WHEN itemid = 50809 THEN 'GLUCOSE' + WHEN itemid = 50931 THEN 'GLUCOSE' + WHEN itemid = 50810 THEN 'HEMATOCRIT' + WHEN itemid = 51221 THEN 'HEMATOCRIT' + WHEN itemid = 50811 THEN 'HEMOGLOBIN' + WHEN itemid = 51222 THEN 'HEMOGLOBIN' + WHEN itemid = 50813 THEN 'LACTATE' + WHEN itemid = 51265 THEN 'PLATELET' + WHEN itemid = 50822 THEN 'POTASSIUM' + WHEN itemid = 50971 THEN 'POTASSIUM' + WHEN itemid = 51275 THEN 'PTT' + WHEN itemid = 51237 THEN 'INR' + WHEN itemid = 51274 THEN 'PT' + WHEN itemid = 50824 THEN 'SODIUM' + WHEN itemid = 50983 THEN 'SODIUM' + WHEN itemid = 51006 THEN 'BUN' + WHEN itemid = 51300 THEN 'WBC' + WHEN itemid = 51301 THEN 'WBC' + ELSE null + END as label + , -- add in some sanity checks on the values + -- the where clause below requires all valuenum to be > 0, so these are only upper limit checks + CASE + WHEN itemid = 50862 and valuenum > 10 THEN null -- g/dL 'ALBUMIN' + WHEN itemid = 50868 and valuenum > 10000 THEN null -- mEq/L 'ANION GAP' + WHEN itemid = 51144 and valuenum < 0 THEN null -- immature band forms, % + WHEN itemid = 51144 and valuenum > 100 THEN null -- immature band forms, % + WHEN itemid = 50882 and valuenum > 10000 THEN null -- mEq/L 'BICARBONATE' + WHEN itemid = 50885 and valuenum > 150 THEN null -- mg/dL 'BILIRUBIN' + WHEN itemid = 50806 and valuenum > 10000 THEN null -- mEq/L 'CHLORIDE' + WHEN itemid = 50902 and valuenum > 10000 THEN null -- mEq/L 'CHLORIDE' + WHEN itemid = 50912 and valuenum > 150 THEN null -- mg/dL 'CREATININE' + WHEN itemid = 50809 and valuenum > 10000 THEN null -- mg/dL 'GLUCOSE' + WHEN itemid = 50931 and valuenum > 10000 THEN null -- mg/dL 'GLUCOSE' + WHEN itemid = 50810 and valuenum > 100 THEN null -- % 'HEMATOCRIT' + WHEN itemid = 51221 and valuenum > 100 THEN null -- % 'HEMATOCRIT' + WHEN itemid = 50811 and valuenum > 50 THEN null -- g/dL 'HEMOGLOBIN' + WHEN itemid = 51222 and valuenum > 50 THEN null -- g/dL 'HEMOGLOBIN' + WHEN itemid = 50813 and valuenum > 50 THEN null -- mmol/L 'LACTATE' + WHEN itemid = 51265 and valuenum > 10000 THEN null -- K/uL 'PLATELET' + WHEN itemid = 50822 and valuenum > 30 THEN null -- mEq/L 'POTASSIUM' + WHEN itemid = 50971 and valuenum > 30 THEN null -- mEq/L 'POTASSIUM' + WHEN itemid = 51275 and valuenum > 150 THEN null -- sec 'PTT' + WHEN itemid = 51237 and valuenum > 50 THEN null -- 'INR' + WHEN itemid = 51274 and valuenum > 150 THEN null -- sec 'PT' + WHEN itemid = 50824 and valuenum > 200 THEN null -- mEq/L == mmol/L 'SODIUM' + WHEN itemid = 50983 and valuenum > 200 THEN null -- mEq/L == mmol/L 'SODIUM' + WHEN itemid = 51006 and valuenum > 300 THEN null -- 'BUN' + WHEN itemid = 51300 and valuenum > 1000 THEN null -- 'WBC' + WHEN itemid = 51301 and valuenum > 1000 THEN null -- 'WBC' + ELSE le.valuenum + END as valuenum + + FROM icustays ie + + LEFT JOIN labevents le + ON le.subject_id = ie.subject_id AND le.hadm_id = ie.hadm_id + AND le.charttime BETWEEN (DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)) AND (DATETIME_ADD(ie.intime, INTERVAL '1' DAY)) + AND le.ITEMID in + ( + -- comment is: LABEL | CATEGORY | FLUID | NUMBER OF ROWS IN LABEVENTS + 50868, -- ANION GAP | CHEMISTRY | BLOOD | 769895 + 50862, -- ALBUMIN | CHEMISTRY | BLOOD | 146697 + 51144, -- BANDS - hematology + 50882, -- BICARBONATE | CHEMISTRY | BLOOD | 780733 + 50885, -- BILIRUBIN, TOTAL | CHEMISTRY | BLOOD | 238277 + 50912, -- CREATININE | CHEMISTRY | BLOOD | 797476 + 50902, -- CHLORIDE | CHEMISTRY | BLOOD | 795568 + 50806, -- CHLORIDE, WHOLE BLOOD | BLOOD GAS | BLOOD | 48187 + 50931, -- GLUCOSE | CHEMISTRY | BLOOD | 748981 + 50809, -- GLUCOSE | BLOOD GAS | BLOOD | 196734 + 51221, -- HEMATOCRIT | HEMATOLOGY | BLOOD | 881846 + 50810, -- HEMATOCRIT, CALCULATED | BLOOD GAS | BLOOD | 89715 + 51222, -- HEMOGLOBIN | HEMATOLOGY | BLOOD | 752523 + 50811, -- HEMOGLOBIN | BLOOD GAS | BLOOD | 89712 + 50813, -- LACTATE | BLOOD GAS | BLOOD | 187124 + 51265, -- PLATELET COUNT | HEMATOLOGY | BLOOD | 778444 + 50971, -- POTASSIUM | CHEMISTRY | BLOOD | 845825 + 50822, -- POTASSIUM, WHOLE BLOOD | BLOOD GAS | BLOOD | 192946 + 51275, -- PTT | HEMATOLOGY | BLOOD | 474937 + 51237, -- INR(PT) | HEMATOLOGY | BLOOD | 471183 + 51274, -- PT | HEMATOLOGY | BLOOD | 469090 + 50983, -- SODIUM | CHEMISTRY | BLOOD | 808489 + 50824, -- SODIUM, WHOLE BLOOD | BLOOD GAS | BLOOD | 71503 + 51006, -- UREA NITROGEN | CHEMISTRY | BLOOD | 791925 + 51301, -- WHITE BLOOD CELLS | HEMATOLOGY | BLOOD | 753301 + 51300 -- WBC COUNT | HEMATOLOGY | BLOOD | 2371 + ) + AND valuenum IS NOT null AND valuenum > 0 -- lab values cannot be 0 and cannot be negative +) pvt +GROUP BY pvt.subject_id, pvt.hadm_id, pvt.icustay_id +ORDER BY pvt.subject_id, pvt.hadm_id, pvt.icustay_id; diff --git a/sql/firstday/urine_output_first_day.sql b/sql/firstday/urine_output_first_day.sql new file mode 100644 index 0000000..c7a94ff --- /dev/null +++ b/sql/firstday/urine_output_first_day.sql @@ -0,0 +1,58 @@ +-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY. +DROP TABLE IF EXISTS urine_output_first_day; CREATE TABLE urine_output_first_day AS +-- ------------------------------------------------------------------ +-- Purpose: Create a view of the urine output for each ICUSTAY_ID over the first 24 hours. +-- ------------------------------------------------------------------ + +select + -- patient identifiers + ie.subject_id, ie.hadm_id, ie.icustay_id + + -- volumes associated with urine output ITEMIDs + , sum( + -- we consider input of GU irrigant as a negative volume + case + when oe.itemid = 227488 and oe.value > 0 then -1*oe.value + else oe.value + end) as urineoutput +FROM icustays ie +-- Join to the outputevents table to get urine output +left join outputevents oe +-- join on all patient identifiers +on ie.subject_id = oe.subject_id and ie.hadm_id = oe.hadm_id and ie.icustay_id = oe.icustay_id +-- and ensure the data occurs during the first day +and oe.charttime between ie.intime and (DATETIME_ADD(ie.intime, INTERVAL '1' DAY)) -- first ICU day +where itemid in +( +-- these are the most frequently occurring urine output observations in CareVue +40055, -- "Urine Out Foley" +43175, -- "Urine ." +40069, -- "Urine Out Void" +40094, -- "Urine Out Condom Cath" +40715, -- "Urine Out Suprapubic" +40473, -- "Urine Out IleoConduit" +40085, -- "Urine Out Incontinent" +40057, -- "Urine Out Rt Nephrostomy" +40056, -- "Urine Out Lt Nephrostomy" +40405, -- "Urine Out Other" +40428, -- "Urine Out Straight Cath" +40086,-- Urine Out Incontinent +40096, -- "Urine Out Ureteral Stent #1" +40651, -- "Urine Out Ureteral Stent #2" + +-- these are the most frequently occurring urine output observations in MetaVision +226559, -- "Foley" +226560, -- "Void" +226561, -- "Condom Cath" +226584, -- "Ileoconduit" +226563, -- "Suprapubic" +226564, -- "R Nephrostomy" +226565, -- "L Nephrostomy" +226567, -- Straight Cath +226557, -- R Ureteral Stent +226558, -- L Ureteral Stent +227488, -- GU Irrigant Volume In +227489 -- GU Irrigant/Urine Volume Out +) +group by ie.subject_id, ie.hadm_id, ie.icustay_id +order by ie.subject_id, ie.hadm_id, ie.icustay_id; diff --git a/sql/firstday/vitals_first_day.sql b/sql/firstday/vitals_first_day.sql new file mode 100644 index 0000000..3a47f3c --- /dev/null +++ b/sql/firstday/vitals_first_day.sql @@ -0,0 +1,120 @@ +-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY. +DROP TABLE IF EXISTS vitals_first_day; CREATE TABLE vitals_first_day AS +-- This query pivots the vital signs for the first 24 hours of a patient's stay +-- Vital signs include heart rate, blood pressure, respiration rate, and temperature + +SELECT pvt.subject_id, pvt.hadm_id, pvt.icustay_id + +-- Easier names +, min(case when VitalID = 1 then valuenum ELSE NULL END) AS heartrate_min +, max(case when VitalID = 1 then valuenum ELSE NULL END) AS heartrate_max +, avg(case when VitalID = 1 then valuenum ELSE NULL END) AS heartrate_mean +, min(case when VitalID = 2 then valuenum ELSE NULL END) AS sysbp_min +, max(case when VitalID = 2 then valuenum ELSE NULL END) AS sysbp_max +, avg(case when VitalID = 2 then valuenum ELSE NULL END) AS sysbp_mean +, min(case when VitalID = 3 then valuenum ELSE NULL END) AS diasbp_min +, max(case when VitalID = 3 then valuenum ELSE NULL END) AS diasbp_max +, avg(case when VitalID = 3 then valuenum ELSE NULL END) AS diasbp_mean +, min(case when VitalID = 4 then valuenum ELSE NULL END) AS meanbp_min +, max(case when VitalID = 4 then valuenum ELSE NULL END) AS meanbp_max +, avg(case when VitalID = 4 then valuenum ELSE NULL END) AS meanbp_mean +, min(case when VitalID = 5 then valuenum ELSE NULL END) AS resprate_min +, max(case when VitalID = 5 then valuenum ELSE NULL END) AS resprate_max +, avg(case when VitalID = 5 then valuenum ELSE NULL END) AS resprate_mean +, min(case when VitalID = 6 then valuenum ELSE NULL END) AS tempc_min +, max(case when VitalID = 6 then valuenum ELSE NULL END) AS tempc_max +, avg(case when VitalID = 6 then valuenum ELSE NULL END) AS tempc_mean +, min(case when VitalID = 7 then valuenum ELSE NULL END) AS spo2_min +, max(case when VitalID = 7 then valuenum ELSE NULL END) AS spo2_max +, avg(case when VitalID = 7 then valuenum ELSE NULL END) AS spo2_mean +, min(case when VitalID = 8 then valuenum ELSE NULL END) AS glucose_min +, max(case when VitalID = 8 then valuenum ELSE NULL END) AS glucose_max +, avg(case when VitalID = 8 then valuenum ELSE NULL END) AS glucose_mean + +FROM ( + select ie.subject_id, ie.hadm_id, ie.icustay_id + , case + when itemid in (211,220045) and valuenum > 0 and valuenum < 300 then 1 -- HeartRate + when itemid in (51,442,455,6701,220179,220050) and valuenum > 0 and valuenum < 400 then 2 -- SysBP + when itemid in (8368,8440,8441,8555,220180,220051) and valuenum > 0 and valuenum < 300 then 3 -- DiasBP + when itemid in (456,52,6702,443,220052,220181,225312) and valuenum > 0 and valuenum < 300 then 4 -- MeanBP + when itemid in (615,618,220210,224690) and valuenum > 0 and valuenum < 70 then 5 -- RespRate + when itemid in (223761,678) and valuenum > 70 and valuenum < 120 then 6 -- TempF, converted to degC in valuenum call + when itemid in (223762,676) and valuenum > 10 and valuenum < 50 then 6 -- TempC + when itemid in (646,220277) and valuenum > 0 and valuenum <= 100 then 7 -- SpO2 + when itemid in (807,811,1529,3745,3744,225664,220621,226537) and valuenum > 0 then 8 -- Glucose + + else null end as vitalid + -- convert F to C + , case when itemid in (223761,678) then (valuenum-32)/1.8 else valuenum end as valuenum + + from icustays ie + left join chartevents ce + on ie.icustay_id = ce.icustay_id + and ce.charttime between ie.intime and DATETIME_ADD(ie.intime, INTERVAL '1' DAY) + and DATETIME_DIFF(ce.charttime, ie.intime, 'SECOND') > 0 + and DATETIME_DIFF(ce.charttime, ie.intime, 'HOUR') <= 24 + -- exclude rows marked as error + and (ce.error IS NULL or ce.error = 0) + where ce.itemid in + ( + -- HEART RATE + 211, --"Heart Rate" + 220045, --"Heart Rate" + + -- Systolic/diastolic + + 51, -- Arterial BP [Systolic] + 442, -- Manual BP [Systolic] + 455, -- NBP [Systolic] + 6701, -- Arterial BP #2 [Systolic] + 220179, -- Non Invasive Blood Pressure systolic + 220050, -- Arterial Blood Pressure systolic + + 8368, -- Arterial BP [Diastolic] + 8440, -- Manual BP [Diastolic] + 8441, -- NBP [Diastolic] + 8555, -- Arterial BP #2 [Diastolic] + 220180, -- Non Invasive Blood Pressure diastolic + 220051, -- Arterial Blood Pressure diastolic + + + -- MEAN ARTERIAL PRESSURE + 456, --"NBP Mean" + 52, --"Arterial BP Mean" + 6702, -- Arterial BP Mean #2 + 443, -- Manual BP Mean(calc) + 220052, --"Arterial Blood Pressure mean" + 220181, --"Non Invasive Blood Pressure mean" + 225312, --"ART BP mean" + + -- RESPIRATORY RATE + 618,-- Respiratory Rate + 615,-- Resp Rate (Total) + 220210,-- Respiratory Rate + 224690, -- Respiratory Rate (Total) + + + -- SPO2, peripheral + 646, 220277, + + -- GLUCOSE, both lab and fingerstick + 807,-- Fingerstick Glucose + 811,-- Glucose (70-105) + 1529,-- Glucose + 3745,-- BloodGlucose + 3744,-- Blood Glucose + 225664,-- Glucose finger stick + 220621,-- Glucose (serum) + 226537,-- Glucose (whole blood) + + -- TEMPERATURE + 223762, -- "Temperature Celsius" + 676, -- "Temperature C" + 223761, -- "Temperature Fahrenheit" + 678 -- "Temperature F" + + ) +) pvt +group by pvt.subject_id, pvt.hadm_id, pvt.icustay_id +order by pvt.subject_id, pvt.hadm_id, pvt.icustay_id; diff --git a/sql/fluid_balance/urine_output.sql b/sql/fluid_balance/urine_output.sql new file mode 100644 index 0000000..1305620 --- /dev/null +++ b/sql/fluid_balance/urine_output.sql @@ -0,0 +1,45 @@ +-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY. +DROP TABLE IF EXISTS urine_output; CREATE TABLE urine_output AS +-- First we drop the table if it exists +select oe.icustay_id, oe.charttime +, SUM( + -- we consider input of GU irrigant as a negative volume + case when oe.itemid = 227488 then -1*value + else value end + ) as value +from outputevents oe +where oe.itemid in +( + -- these are the most frequently occurring urine output observations in CareVue + 40055, -- "Urine Out Foley" + 43175, -- "Urine ." + 40069, -- "Urine Out Void" + 40094, -- "Urine Out Condom Cath" + 40715, -- "Urine Out Suprapubic" + 40473, -- "Urine Out IleoConduit" + 40085, -- "Urine Out Incontinent" + 40057, -- "Urine Out Rt Nephrostomy" + 40056, -- "Urine Out Lt Nephrostomy" + 40405, -- "Urine Out Other" + 40428, -- "Urine Out Straight Cath" + 40086,-- Urine Out Incontinent + 40096, -- "Urine Out Ureteral Stent #1" + 40651, -- "Urine Out Ureteral Stent #2" + + -- these are the most frequently occurring urine output observations in MetaVision + 226559, -- "Foley" + 226560, -- "Void" + 226561, -- "Condom Cath" + 226584, -- "Ileoconduit" + 226563, -- "Suprapubic" + 226564, -- "R Nephrostomy" + 226565, -- "L Nephrostomy" + 226567, -- Straight Cath + 226557, -- R Ureteral Stent + 226558, -- L Ureteral Stent + 227488, -- GU Irrigant Volume In + 227489 -- GU Irrigant/Urine Volume Out +) +and oe.value < 5000 -- sanity check on urine value +and oe.icustay_id is not null +group by icustay_id, charttime; diff --git a/sql/postgres-functions.sql b/sql/postgres-functions.sql new file mode 100644 index 0000000..e886b9a --- /dev/null +++ b/sql/postgres-functions.sql @@ -0,0 +1,163 @@ +-- (Optional): set the search_path so all functions are generated on the mimiciii schema +-- SET search_path TO mimiciii; + +CREATE OR REPLACE FUNCTION REGEXP_EXTRACT(str TEXT, pattern TEXT) RETURNS TEXT AS $$ +BEGIN +RETURN substring(str from pattern); +END; $$ +LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION REGEXP_CONTAINS(str TEXT, pattern TEXT) RETURNS BOOL AS $$ +BEGIN +RETURN str ~ pattern; +END; $$ +LANGUAGE PLPGSQL; + +-- alias generate_series with generate_array +CREATE OR REPLACE FUNCTION GENERATE_ARRAY(i INTEGER, j INTEGER) +RETURNS setof INTEGER language sql as $$ + SELECT GENERATE_SERIES(i, j) +$$; + +-- datetime functions +CREATE OR REPLACE FUNCTION DATETIME(dt DATE) RETURNS TIMESTAMP(3) AS $$ +BEGIN +RETURN CAST(dt AS TIMESTAMP(3)); +END; $$ +LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION DATETIME(year INTEGER, month INTEGER, day INTEGER, hour INTEGER, minute INTEGER, second INTEGER) RETURNS TIMESTAMP(3) AS $$ +BEGIN +RETURN TO_TIMESTAMP( + TO_CHAR(year, '0000') || TO_CHAR(month, '00') || TO_CHAR(day, '00') || TO_CHAR(hour, '00') || TO_CHAR(minute, '00') || TO_CHAR(second, '00'), + 'yyyymmddHH24MISS' +); +END; $$ +LANGUAGE PLPGSQL; + +-- note: in bigquery, `INTERVAL 1 YEAR` is a valid interval +-- but in postgres, it must be `INTERVAL '1' YEAR` + +-- DATETIME_ADD(datetime, INTERVAL 'n' DATEPART) -> datetime + INTERVAL 'n' DATEPART +CREATE OR REPLACE FUNCTION DATETIME_ADD(datetime_val TIMESTAMP(3), intvl INTERVAL) RETURNS TIMESTAMP(3) AS $$ +BEGIN +RETURN datetime_val + intvl; +END; $$ +LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION DATE_ADD(dt DATE, intvl INTERVAL) RETURNS TIMESTAMP(3) AS $$ +BEGIN +RETURN CAST(dt AS TIMESTAMP(3)) + intvl; +END; $$ +LANGUAGE PLPGSQL; + +-- DATETIME_SUB(datetime, INTERVAL 'n' DATEPART) -> datetime - INTERVAL 'n' DATEPART +CREATE OR REPLACE FUNCTION DATETIME_SUB(datetime_val TIMESTAMP(3), intvl INTERVAL) RETURNS TIMESTAMP(3) AS $$ +BEGIN +RETURN datetime_val - intvl; +END; $$ +LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION DATE_SUB(dt DATE, intvl INTERVAL) RETURNS TIMESTAMP(3) AS $$ +BEGIN +RETURN CAST(dt AS TIMESTAMP(3)) - intvl; +END; $$ +LANGUAGE PLPGSQL; + +-- TODO: +-- DATETIME_TRUNC(datetime, PART) -> DATE_TRUNC('datepart', datetime) + +-- below requires a regex to convert datepart from primitive to a string +-- i.e. encapsulate it in single quotes +CREATE OR REPLACE FUNCTION DATETIME_DIFF(endtime TIMESTAMP(3), starttime TIMESTAMP(3), datepart TEXT) RETURNS NUMERIC AS $$ +BEGIN +RETURN + EXTRACT(EPOCH FROM endtime - starttime) / + CASE + WHEN datepart = 'SECOND' THEN 1.0 + WHEN datepart = 'MINUTE' THEN 60.0 + WHEN datepart = 'HOUR' THEN 3600.0 + WHEN datepart = 'DAY' THEN 24*3600.0 + WHEN datepart = 'YEAR' THEN 365.242*24*3600.0 + ELSE NULL END; +END; $$ +LANGUAGE PLPGSQL; + +-- BigQuery has a custom data type, PART +-- It's difficult to replicate this in postgresql, which recognizes the PART as a column name, +-- unless it is within an EXTRACT() function. + +CREATE OR REPLACE FUNCTION BIGQUERY_FORMAT_TO_PSQL(format_str VARCHAR(255)) RETURNS TEXT AS $$ +BEGIN +RETURN + -- use replace to convert BigQuery string format to postgres string format + -- only handles a few cases since we don't extensively use this function + REPLACE( + REPLACE( + REPLACE( + REPLACE( + REPLACE( + REPLACE( + format_str + , '%S', 'SS' + ) + , '%M', 'MI' + ) + , '%H', 'HH24' + ) + , '%d', 'dd' + ) + , '%m', 'mm' + ) + , '%Y', 'yyyy' + ) +; +END; $$ +LANGUAGE PLPGSQL; + + +CREATE OR REPLACE FUNCTION FORMAT_DATE(format_str VARCHAR(255), datetime_val TIMESTAMP(3)) RETURNS TEXT AS $$ +BEGIN +RETURN TO_CHAR( + datetime_val, + -- use replace to convert BigQuery string format to postgres string format + -- only handles a few cases since we don't extensively use this function + BIGQUERY_FORMAT_TO_PSQL(format_str) +); +END; $$ +LANGUAGE PLPGSQL; + + +CREATE OR REPLACE FUNCTION PARSE_DATE(format_str VARCHAR(255), string_val VARCHAR(255)) RETURNS DATE AS $$ +BEGIN +RETURN TO_DATE( + string_val, + -- use replace to convert BigQuery string format to postgres string format + -- only handles a few cases since we don't extensively use this function + BIGQUERY_FORMAT_TO_PSQL(format_str) +); +END; $$ +LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION FORMAT_DATETIME(format_str VARCHAR(255), datetime_val TIMESTAMP(3)) RETURNS TEXT AS $$ +BEGIN +RETURN TO_CHAR( + datetime_val, + -- use replace to convert BigQuery string format to postgres string format + -- only handles a few cases since we don't extensively use this function + BIGQUERY_FORMAT_TO_PSQL(format_str) +); +END; $$ +LANGUAGE PLPGSQL; + + +CREATE OR REPLACE FUNCTION PARSE_DATETIME(format_str VARCHAR(255), string_val VARCHAR(255)) RETURNS TIMESTAMP(3) AS $$ +BEGIN +RETURN TO_TIMESTAMP( + string_val, + -- use replace to convert BigQuery string format to postgres string format + -- only handles a few cases since we don't extensively use this function + BIGQUERY_FORMAT_TO_PSQL(format_str) +); +END; $$ +LANGUAGE PLPGSQL; diff --git a/sql/schemas.sql b/sql/schemas.sql new file mode 100644 index 0000000..197bcc6 --- /dev/null +++ b/sql/schemas.sql @@ -0,0 +1,605 @@ +-- ------------------------------------------------------------------ +-- Reference CREATE TABLE schemas for every derived table produced by +-- sql/build_sapsii.sql +-- sql/build_sepsis3.sql +-- +-- This file is documentation only. The actual build scripts use +-- `DROP TABLE IF EXISTS ...; CREATE TABLE ... AS SELECT ...`, so +-- column types are inferred by PostgreSQL at build time from the +-- MIMIC-III v1.3 base schema and from the expressions in the SELECT. +-- The types below match what PostgreSQL infers when the build is run +-- on a stock MIMIC-III v1.3 PostgreSQL restore (where for example +-- chartevents.valuenum is DOUBLE PRECISION, outputevents.value is +-- DOUBLE PRECISION, *.charttime is TIMESTAMP(0), etc.). +-- +-- Use this file as: +-- * a quick reference for column names and types of each derived +-- table (handy for downstream consumers that need to know the +-- output schema without grep'ing through the build SQL); +-- * a stub for declaring empty derived tables ahead of time (e.g. +-- in a migration that just `CREATE TABLE IF NOT EXISTS ...`s +-- them, then later runs the build to populate them); +-- * a checklist when porting these scripts to another flavor of +-- MIMIC (e.g. MIMIC-III v1.4 or MIMIC-IV). +-- ------------------------------------------------------------------ + + +-- ================================================================== +-- SAPS-II +-- ================================================================== + +-- 1. Helper: all-time urine output (from outputevents). +DROP TABLE IF EXISTS urine_output; +CREATE TABLE urine_output ( + icustay_id INTEGER, + charttime TIMESTAMP(0), + value DOUBLE PRECISION +); + +-- 2. Ventilation: classification (per charttime) and durations +-- (per ventilation episode). +DROP TABLE IF EXISTS ventilation_classification; +CREATE TABLE ventilation_classification ( + icustay_id INTEGER, + charttime TIMESTAMP(0), + mechvent INTEGER, + oxygentherapy INTEGER, + extubated INTEGER, + selfextubated INTEGER +); + +DROP TABLE IF EXISTS ventilation_durations; +CREATE TABLE ventilation_durations ( + icustay_id INTEGER, + ventnum BIGINT, + starttime TIMESTAMP(0), + endtime TIMESTAMP(0), + duration_hours NUMERIC +); + +-- 3. First-day pivots feeding SAPS-II. +DROP TABLE IF EXISTS blood_gas_first_day; +CREATE TABLE blood_gas_first_day ( + subject_id INTEGER, + hadm_id INTEGER, + icustay_id INTEGER, + charttime TIMESTAMP(0), + specimen VARCHAR(200), + aado2 DOUBLE PRECISION, + baseexcess DOUBLE PRECISION, + bicarbonate DOUBLE PRECISION, + totalco2 DOUBLE PRECISION, + carboxyhemoglobin DOUBLE PRECISION, + chloride DOUBLE PRECISION, + calcium DOUBLE PRECISION, + glucose DOUBLE PRECISION, + hematocrit DOUBLE PRECISION, + hemoglobin DOUBLE PRECISION, + intubated DOUBLE PRECISION, + lactate DOUBLE PRECISION, + methemoglobin DOUBLE PRECISION, + o2flow DOUBLE PRECISION, + fio2 DOUBLE PRECISION, + so2 DOUBLE PRECISION, + pco2 DOUBLE PRECISION, + peep DOUBLE PRECISION, + ph DOUBLE PRECISION, + po2 DOUBLE PRECISION, + potassium DOUBLE PRECISION, + requiredo2 DOUBLE PRECISION, + sodium DOUBLE PRECISION, + temperature DOUBLE PRECISION, + tidalvolume DOUBLE PRECISION, + ventilationrate DOUBLE PRECISION, + ventilator DOUBLE PRECISION +); + +DROP TABLE IF EXISTS blood_gas_first_day_arterial; +CREATE TABLE blood_gas_first_day_arterial ( + subject_id INTEGER, + hadm_id INTEGER, + icustay_id INTEGER, + charttime TIMESTAMP(0), + specimen VARCHAR(200), + specimen_pred VARCHAR(200), + specimen_prob DOUBLE PRECISION, + so2 DOUBLE PRECISION, + spo2 DOUBLE PRECISION, + po2 DOUBLE PRECISION, + pco2 DOUBLE PRECISION, + fio2_chartevents DOUBLE PRECISION, + fio2 DOUBLE PRECISION, + aado2 DOUBLE PRECISION, + aado2_calc DOUBLE PRECISION, + pao2fio2 DOUBLE PRECISION, + ph DOUBLE PRECISION, + baseexcess DOUBLE PRECISION, + bicarbonate DOUBLE PRECISION, + totalco2 DOUBLE PRECISION, + hematocrit DOUBLE PRECISION, + hemoglobin DOUBLE PRECISION, + carboxyhemoglobin DOUBLE PRECISION, + methemoglobin DOUBLE PRECISION, + chloride DOUBLE PRECISION, + calcium DOUBLE PRECISION, + temperature DOUBLE PRECISION, + potassium DOUBLE PRECISION, + sodium DOUBLE PRECISION, + lactate DOUBLE PRECISION, + glucose DOUBLE PRECISION, + intubated DOUBLE PRECISION, + tidalvolume DOUBLE PRECISION, + ventilationrate DOUBLE PRECISION, + ventilator DOUBLE PRECISION, + peep DOUBLE PRECISION, + o2flow DOUBLE PRECISION, + requiredo2 DOUBLE PRECISION +); + +DROP TABLE IF EXISTS gcs_first_day; +CREATE TABLE gcs_first_day ( + subject_id INTEGER, + hadm_id INTEGER, + icustay_id INTEGER, + mingcs DOUBLE PRECISION, + gcsmotor DOUBLE PRECISION, + gcsverbal DOUBLE PRECISION, + gcseyes DOUBLE PRECISION, + endotrachflag INTEGER +); + +DROP TABLE IF EXISTS labs_first_day; +CREATE TABLE labs_first_day ( + subject_id INTEGER, + hadm_id INTEGER, + icustay_id INTEGER, + aniongap_min DOUBLE PRECISION, + aniongap_max DOUBLE PRECISION, + albumin_min DOUBLE PRECISION, + albumin_max DOUBLE PRECISION, + bands_min DOUBLE PRECISION, + bands_max DOUBLE PRECISION, + bicarbonate_min DOUBLE PRECISION, + bicarbonate_max DOUBLE PRECISION, + bilirubin_min DOUBLE PRECISION, + bilirubin_max DOUBLE PRECISION, + creatinine_min DOUBLE PRECISION, + creatinine_max DOUBLE PRECISION, + chloride_min DOUBLE PRECISION, + chloride_max DOUBLE PRECISION, + glucose_min DOUBLE PRECISION, + glucose_max DOUBLE PRECISION, + hematocrit_min DOUBLE PRECISION, + hematocrit_max DOUBLE PRECISION, + hemoglobin_min DOUBLE PRECISION, + hemoglobin_max DOUBLE PRECISION, + lactate_min DOUBLE PRECISION, + lactate_max DOUBLE PRECISION, + platelet_min DOUBLE PRECISION, + platelet_max DOUBLE PRECISION, + potassium_min DOUBLE PRECISION, + potassium_max DOUBLE PRECISION, + ptt_min DOUBLE PRECISION, + ptt_max DOUBLE PRECISION, + inr_min DOUBLE PRECISION, + inr_max DOUBLE PRECISION, + pt_min DOUBLE PRECISION, + pt_max DOUBLE PRECISION, + sodium_min DOUBLE PRECISION, + sodium_max DOUBLE PRECISION, + bun_min DOUBLE PRECISION, + bun_max DOUBLE PRECISION, + wbc_min DOUBLE PRECISION, + wbc_max DOUBLE PRECISION +); + +DROP TABLE IF EXISTS urine_output_first_day; +CREATE TABLE urine_output_first_day ( + subject_id INTEGER, + hadm_id INTEGER, + icustay_id INTEGER, + urineoutput DOUBLE PRECISION +); + +DROP TABLE IF EXISTS vitals_first_day; +CREATE TABLE vitals_first_day ( + subject_id INTEGER, + hadm_id INTEGER, + icustay_id INTEGER, + heartrate_min DOUBLE PRECISION, + heartrate_max DOUBLE PRECISION, + heartrate_mean DOUBLE PRECISION, + sysbp_min DOUBLE PRECISION, + sysbp_max DOUBLE PRECISION, + sysbp_mean DOUBLE PRECISION, + diasbp_min DOUBLE PRECISION, + diasbp_max DOUBLE PRECISION, + diasbp_mean DOUBLE PRECISION, + meanbp_min DOUBLE PRECISION, + meanbp_max DOUBLE PRECISION, + meanbp_mean DOUBLE PRECISION, + resprate_min DOUBLE PRECISION, + resprate_max DOUBLE PRECISION, + resprate_mean DOUBLE PRECISION, + tempc_min DOUBLE PRECISION, + tempc_max DOUBLE PRECISION, + tempc_mean DOUBLE PRECISION, + spo2_min DOUBLE PRECISION, + spo2_max DOUBLE PRECISION, + spo2_mean DOUBLE PRECISION, + glucose_min DOUBLE PRECISION, + glucose_max DOUBLE PRECISION, + glucose_mean DOUBLE PRECISION +); + +-- 4. Final SAPS-II score table (one row per ICU stay). +DROP TABLE IF EXISTS sapsii; +CREATE TABLE sapsii ( + subject_id INTEGER, + hadm_id INTEGER, + icustay_id INTEGER, + sapsii INTEGER, + sapsii_prob DOUBLE PRECISION, + age_score INTEGER, + hr_score INTEGER, + sysbp_score INTEGER, + temp_score INTEGER, + pao2fio2_score INTEGER, + uo_score INTEGER, + bun_score INTEGER, + wbc_score INTEGER, + potassium_score INTEGER, + sodium_score INTEGER, + bicarbonate_score INTEGER, + bilirubin_score INTEGER, + gcs_score INTEGER, + comorbidity_score INTEGER, + admissiontype_score INTEGER +); + + +-- ================================================================== +-- Sepsis-3 +-- ================================================================== +-- +-- Sepsis-3 reuses these SAPS-II tables: +-- urine_output, ventilation_classification, ventilation_durations +-- (defined above). The tables below are the ones added by +-- build_sepsis3.sql. + +-- 1. Echo extraction (used to impute weight when chartevents weight +-- is missing; also keyed by ROW_ID to the noteevents row). +DROP TABLE IF EXISTS echo_data; +CREATE TABLE echo_data ( + row_id INTEGER, + subject_id INTEGER, + hadm_id INTEGER, + chartdate TIMESTAMP(0), + charttime TIMESTAMP(3), + indication TEXT, + height NUMERIC, + weight NUMERIC, + bsa NUMERIC, + bp TEXT, + bpsys NUMERIC, + bpdias NUMERIC, + hr NUMERIC, + status TEXT, + test TEXT, + doppler TEXT, + contrast TEXT, + technicalquality TEXT +); + +-- 2. Per-stay weight durations (admit + daily + neonate + echo +-- imputed); used for mcg/kg/min vasopressor unit conversion. +DROP TABLE IF EXISTS weight_durations; +CREATE TABLE weight_durations ( + icustay_id INTEGER, + starttime TIMESTAMP(0), + endtime TIMESTAMP(0), + weight DOUBLE PRECISION +); + +-- 3. Vasopressor dose tables. All four have the same schema; rates +-- are merged CareVue + MetaVision and converted to mcg/kg/min. +DROP TABLE IF EXISTS dobutamine_dose; +CREATE TABLE dobutamine_dose ( + icustay_id INTEGER, + starttime TIMESTAMP(0), + endtime TIMESTAMP(0), + vaso_rate DOUBLE PRECISION, + vaso_amount DOUBLE PRECISION +); + +DROP TABLE IF EXISTS dopamine_dose; +CREATE TABLE dopamine_dose ( + icustay_id INTEGER, + starttime TIMESTAMP(0), + endtime TIMESTAMP(0), + vaso_rate DOUBLE PRECISION, + vaso_amount DOUBLE PRECISION +); + +DROP TABLE IF EXISTS epinephrine_dose; +CREATE TABLE epinephrine_dose ( + icustay_id INTEGER, + starttime TIMESTAMP(0), + endtime TIMESTAMP(0), + vaso_rate DOUBLE PRECISION, + vaso_amount DOUBLE PRECISION +); + +DROP TABLE IF EXISTS norepinephrine_dose; +CREATE TABLE norepinephrine_dose ( + icustay_id INTEGER, + starttime TIMESTAMP(0), + endtime TIMESTAMP(0), + vaso_rate DOUBLE PRECISION, + vaso_amount DOUBLE PRECISION +); + +-- 4. All-time pivots feeding hourly SOFA. +DROP TABLE IF EXISTS blood_gas_arterial; +CREATE TABLE blood_gas_arterial ( + subject_id INTEGER, + hadm_id INTEGER, + icustay_id INTEGER, + charttime TIMESTAMP(0), + specimen VARCHAR(200), + specimen_pred VARCHAR(200), + specimen_prob DOUBLE PRECISION, + so2 DOUBLE PRECISION, + spo2 DOUBLE PRECISION, + po2 DOUBLE PRECISION, + pco2 DOUBLE PRECISION, + fio2_chartevents DOUBLE PRECISION, + fio2 DOUBLE PRECISION, + aado2 DOUBLE PRECISION, + aado2_calc DOUBLE PRECISION, + pao2fio2 DOUBLE PRECISION, + ph DOUBLE PRECISION, + baseexcess DOUBLE PRECISION, + bicarbonate DOUBLE PRECISION, + totalco2 DOUBLE PRECISION, + hematocrit DOUBLE PRECISION, + hemoglobin DOUBLE PRECISION, + carboxyhemoglobin DOUBLE PRECISION, + methemoglobin DOUBLE PRECISION, + chloride DOUBLE PRECISION, + calcium DOUBLE PRECISION, + temperature DOUBLE PRECISION, + potassium DOUBLE PRECISION, + sodium DOUBLE PRECISION, + lactate DOUBLE PRECISION, + glucose DOUBLE PRECISION, + intubated DOUBLE PRECISION, + tidalvolume DOUBLE PRECISION, + ventilationrate DOUBLE PRECISION, + ventilator DOUBLE PRECISION, + peep DOUBLE PRECISION, + o2flow DOUBLE PRECISION, + requiredo2 DOUBLE PRECISION +); + +DROP TABLE IF EXISTS gcs_all; +CREATE TABLE gcs_all ( + icustay_id INTEGER, + charttime TIMESTAMP(0), + gcs DOUBLE PRECISION, + endotrachflag INTEGER +); + +-- 5. Hourly SOFA pipeline. Each measurement class is materialised +-- into a narrow staging table keyed by (icustay_id, hr); these +-- are kept (not dropped) so each stage can be inspected with +-- EXPLAIN ANALYZE. + +-- 5a. Hourly grid (one row per ICU hour per stay). +DROP TABLE IF EXISTS sofa_grid; +CREATE TABLE sofa_grid ( + subject_id INTEGER, + hadm_id INTEGER, + icustay_id INTEGER, + hr INTEGER, + starttime TIMESTAMP(0), + endtime TIMESTAMP(0) +); + +-- 5b. MAP minimum within each hour. +DROP TABLE IF EXISTS sofa_vs; +CREATE TABLE sofa_vs ( + icustay_id INTEGER, + hr INTEGER, + meanbp_min DOUBLE PRECISION +); + +-- 5c. GCS minimum within each hour (from gcs_all, which already has +-- the carry-forward and ET-trach=15 rules applied). +DROP TABLE IF EXISTS sofa_gcs; +CREATE TABLE sofa_gcs ( + icustay_id INTEGER, + hr INTEGER, + gcs_min DOUBLE PRECISION +); + +-- 5d. Bilirubin maximum within each hour. +DROP TABLE IF EXISTS sofa_bili; +CREATE TABLE sofa_bili ( + icustay_id INTEGER, + hr INTEGER, + bilirubin_max DOUBLE PRECISION +); + +-- 5e. Creatinine maximum within each hour. +DROP TABLE IF EXISTS sofa_cr; +CREATE TABLE sofa_cr ( + icustay_id INTEGER, + hr INTEGER, + creatinine_max DOUBLE PRECISION +); + +-- 5f. Platelet minimum within each hour. +DROP TABLE IF EXISTS sofa_plt; +CREATE TABLE sofa_plt ( + icustay_id INTEGER, + hr INTEGER, + platelet_min DOUBLE PRECISION +); + +-- 5g. PaO2/FiO2: split into vent / no-vent based on whether an +-- active ventilation episode covered the blood gas. +DROP TABLE IF EXISTS sofa_pf; +CREATE TABLE sofa_pf ( + icustay_id INTEGER, + hr INTEGER, + pao2fio2_novent DOUBLE PRECISION, + pao2fio2_vent DOUBLE PRECISION +); + +-- 5h. Urine output rolling sum + count of distinct charted hours +-- within the past 24 h. +DROP TABLE IF EXISTS sofa_uo; +CREATE TABLE sofa_uo ( + icustay_id INTEGER, + hr INTEGER, + uo_24hr DOUBLE PRECISION, + uo_tm_24hr BIGINT +); + +-- 5i. Vasopressor rate snapshot at the hour boundary. +DROP TABLE IF EXISTS sofa_vaso; +CREATE TABLE sofa_vaso ( + icustay_id INTEGER, + hr INTEGER, + rate_epinephrine DOUBLE PRECISION, + rate_norepinephrine DOUBLE PRECISION, + rate_dopamine DOUBLE PRECISION, + rate_dobutamine DOUBLE PRECISION +); + +-- 5j. Wide assembly: grid LEFT JOINed onto every measurement table. +DROP TABLE IF EXISTS sofa_wide; +CREATE TABLE sofa_wide ( + subject_id INTEGER, + hadm_id INTEGER, + icustay_id INTEGER, + hr INTEGER, + starttime TIMESTAMP(0), + endtime TIMESTAMP(0), + meanbp_min DOUBLE PRECISION, + gcs_min DOUBLE PRECISION, + bilirubin_max DOUBLE PRECISION, + creatinine_max DOUBLE PRECISION, + platelet_min DOUBLE PRECISION, + pao2fio2_novent DOUBLE PRECISION, + pao2fio2_vent DOUBLE PRECISION, + uo_24hr DOUBLE PRECISION, + uo_tm_24hr BIGINT, + rate_epinephrine DOUBLE PRECISION, + rate_norepinephrine DOUBLE PRECISION, + rate_dopamine DOUBLE PRECISION, + rate_dobutamine DOUBLE PRECISION +); + +-- 5k. Per-hour component scores (no rolling window yet). +DROP TABLE IF EXISTS sofa_components; +CREATE TABLE sofa_components ( + subject_id INTEGER, + hadm_id INTEGER, + icustay_id INTEGER, + hr INTEGER, + starttime TIMESTAMP(0), + endtime TIMESTAMP(0), + meanbp_min DOUBLE PRECISION, + gcs_min DOUBLE PRECISION, + bilirubin_max DOUBLE PRECISION, + creatinine_max DOUBLE PRECISION, + platelet_min DOUBLE PRECISION, + pao2fio2_novent DOUBLE PRECISION, + pao2fio2_vent DOUBLE PRECISION, + uo_24hr DOUBLE PRECISION, + uo_tm_24hr BIGINT, + rate_epinephrine DOUBLE PRECISION, + rate_norepinephrine DOUBLE PRECISION, + rate_dopamine DOUBLE PRECISION, + rate_dobutamine DOUBLE PRECISION, + respiration INTEGER, + coagulation INTEGER, + liver INTEGER, + cardiovascular INTEGER, + cns INTEGER, + renal INTEGER +); + +-- 5l. Final hourly SOFA: 24-hour rolling MAX per component, summed. +DROP TABLE IF EXISTS sofa_hourly; +CREATE TABLE sofa_hourly ( + subject_id INTEGER, + hadm_id INTEGER, + icustay_id INTEGER, + hr INTEGER, + starttime TIMESTAMP(0), + endtime TIMESTAMP(0), + respiration INTEGER, + coagulation INTEGER, + liver INTEGER, + cardiovascular INTEGER, + cns INTEGER, + renal INTEGER, + respiration_24hours INTEGER, + coagulation_24hours INTEGER, + liver_24hours INTEGER, + cardiovascular_24hours INTEGER, + cns_24hours INTEGER, + renal_24hours INTEGER, + sofa_24hours INTEGER +); + +-- 6. Suspicion of infection. +DROP TABLE IF EXISTS antibiotic; +CREATE TABLE antibiotic ( + subject_id INTEGER, + hadm_id INTEGER, + icustay_id INTEGER, + antibiotic VARCHAR(255), + route VARCHAR(120), + -- MIMIC-III prescriptions has DATE-precision startdate / enddate + -- (stored as TIMESTAMP(0) but always at 00:00:00). + starttime TIMESTAMP(0), + stoptime TIMESTAMP(0) +); + +DROP TABLE IF EXISTS suspicion_of_infection; +CREATE TABLE suspicion_of_infection ( + subject_id INTEGER, + icustay_id INTEGER, + hadm_id INTEGER, + ab_id BIGINT, + antibiotic VARCHAR(255), + antibiotic_time TIMESTAMP, + suspected_infection INTEGER, + suspected_infection_time TIMESTAMP, + culture_time TIMESTAMP, + specimen VARCHAR(100), + positive_culture INTEGER +); + +-- 7. Final Sepsis-3 onset table (one row per ICU stay). +DROP TABLE IF EXISTS sepsis3; +CREATE TABLE sepsis3 ( + subject_id INTEGER, + icustay_id INTEGER, + antibiotic_time TIMESTAMP, + culture_time TIMESTAMP, + suspected_infection_time TIMESTAMP, + sofa_time TIMESTAMP(0), + sofa_score INTEGER, + respiration INTEGER, + coagulation INTEGER, + liver INTEGER, + cardiovascular INTEGER, + cns INTEGER, + renal INTEGER, + sepsis3 BOOLEAN +); diff --git a/sql/sepsis/antibiotic.sql b/sql/sepsis/antibiotic.sql new file mode 100644 index 0000000..213dfeb --- /dev/null +++ b/sql/sepsis/antibiotic.sql @@ -0,0 +1,215 @@ +-- ------------------------------------------------------------------ +-- Title: Antibiotic prescriptions +-- +-- Adapted from the MIMIC-IV upstream +-- concepts/medication/antibiotic.sql +-- and ported to MIMIC-III v1.3 vanilla PostgreSQL. +-- +-- PORT NOTES: +-- 1. MIMIC-III `prescriptions` has DATE-precision `startdate` / +-- `enddate` instead of MIMIC-IV's TIMESTAMP `starttime` / +-- `stoptime`. We carry these through unchanged; downstream +-- consumers (suspicion_of_infection.sql) treat the antibiotic +-- time as the start-of-day timestamp of `startdate`. +-- 2. MIMIC-III `prescriptions` already has a populated `icustay_id` +-- column on most rows, so we can pick the ICU stay directly +-- rather than recomputing from `intime`/`outtime`. We still +-- back-fill it from `icustays` where missing, the same way +-- MIMIC-IV does. +-- ------------------------------------------------------------------ + +DROP TABLE IF EXISTS antibiotic; +CREATE TABLE antibiotic AS + +WITH abx AS +( + SELECT DISTINCT + drug + , route + , CASE + WHEN LOWER(drug) LIKE '%adoxa%' THEN 1 + WHEN LOWER(drug) LIKE '%ala-tet%' THEN 1 + WHEN LOWER(drug) LIKE '%alodox%' THEN 1 + WHEN LOWER(drug) LIKE '%amikacin%' THEN 1 + WHEN LOWER(drug) LIKE '%amikin%' THEN 1 + WHEN LOWER(drug) LIKE '%amoxicill%' THEN 1 + WHEN LOWER(drug) LIKE '%amphotericin%' THEN 1 + WHEN LOWER(drug) LIKE '%anidulafungin%' THEN 1 + WHEN LOWER(drug) LIKE '%ancef%' THEN 1 + WHEN LOWER(drug) LIKE '%clavulanate%' THEN 1 + WHEN LOWER(drug) LIKE '%ampicillin%' THEN 1 + WHEN LOWER(drug) LIKE '%augmentin%' THEN 1 + WHEN LOWER(drug) LIKE '%avelox%' THEN 1 + WHEN LOWER(drug) LIKE '%avidoxy%' THEN 1 + WHEN LOWER(drug) LIKE '%azactam%' THEN 1 + WHEN LOWER(drug) LIKE '%azithromycin%' THEN 1 + WHEN LOWER(drug) LIKE '%aztreonam%' THEN 1 + WHEN LOWER(drug) LIKE '%axetil%' THEN 1 + WHEN LOWER(drug) LIKE '%bactocill%' THEN 1 + WHEN LOWER(drug) LIKE '%bactrim%' THEN 1 + WHEN LOWER(drug) LIKE '%bactroban%' THEN 1 + WHEN LOWER(drug) LIKE '%bethkis%' THEN 1 + WHEN LOWER(drug) LIKE '%biaxin%' THEN 1 + WHEN LOWER(drug) LIKE '%bicillin l-a%' THEN 1 + WHEN LOWER(drug) LIKE '%cayston%' THEN 1 + WHEN LOWER(drug) LIKE '%cefazolin%' THEN 1 + WHEN LOWER(drug) LIKE '%cedax%' THEN 1 + WHEN LOWER(drug) LIKE '%cefoxitin%' THEN 1 + WHEN LOWER(drug) LIKE '%ceftazidime%' THEN 1 + WHEN LOWER(drug) LIKE '%cefaclor%' THEN 1 + WHEN LOWER(drug) LIKE '%cefadroxil%' THEN 1 + WHEN LOWER(drug) LIKE '%cefdinir%' THEN 1 + WHEN LOWER(drug) LIKE '%cefditoren%' THEN 1 + WHEN LOWER(drug) LIKE '%cefepime%' THEN 1 + WHEN LOWER(drug) LIKE '%cefotan%' THEN 1 + WHEN LOWER(drug) LIKE '%cefotetan%' THEN 1 + WHEN LOWER(drug) LIKE '%cefotaxime%' THEN 1 + WHEN LOWER(drug) LIKE '%ceftaroline%' THEN 1 + WHEN LOWER(drug) LIKE '%cefpodoxime%' THEN 1 + WHEN LOWER(drug) LIKE '%cefpirome%' THEN 1 + WHEN LOWER(drug) LIKE '%cefprozil%' THEN 1 + WHEN LOWER(drug) LIKE '%ceftibuten%' THEN 1 + WHEN LOWER(drug) LIKE '%ceftin%' THEN 1 + WHEN LOWER(drug) LIKE '%ceftriaxone%' THEN 1 + WHEN LOWER(drug) LIKE '%cefuroxime%' THEN 1 + WHEN LOWER(drug) LIKE '%cephalexin%' THEN 1 + WHEN LOWER(drug) LIKE '%cephalothin%' THEN 1 + WHEN LOWER(drug) LIKE '%cephapririn%' THEN 1 + WHEN LOWER(drug) LIKE '%chloramphenicol%' THEN 1 + WHEN LOWER(drug) LIKE '%cipro%' THEN 1 + WHEN LOWER(drug) LIKE '%ciprofloxacin%' THEN 1 + WHEN LOWER(drug) LIKE '%claforan%' THEN 1 + WHEN LOWER(drug) LIKE '%clarithromycin%' THEN 1 + WHEN LOWER(drug) LIKE '%cleocin%' THEN 1 + WHEN LOWER(drug) LIKE '%clindamycin%' THEN 1 + WHEN LOWER(drug) LIKE '%cubicin%' THEN 1 + WHEN LOWER(drug) LIKE '%dicloxacillin%' THEN 1 + WHEN LOWER(drug) LIKE '%dirithromycin%' THEN 1 + WHEN LOWER(drug) LIKE '%doryx%' THEN 1 + WHEN LOWER(drug) LIKE '%doxycy%' THEN 1 + WHEN LOWER(drug) LIKE '%duricef%' THEN 1 + WHEN LOWER(drug) LIKE '%dynacin%' THEN 1 + WHEN LOWER(drug) LIKE '%ery-tab%' THEN 1 + WHEN LOWER(drug) LIKE '%eryped%' THEN 1 + WHEN LOWER(drug) LIKE '%eryc%' THEN 1 + WHEN LOWER(drug) LIKE '%erythrocin%' THEN 1 + WHEN LOWER(drug) LIKE '%erythromycin%' THEN 1 + WHEN LOWER(drug) LIKE '%factive%' THEN 1 + WHEN LOWER(drug) LIKE '%flagyl%' THEN 1 + WHEN LOWER(drug) LIKE '%fortaz%' THEN 1 + WHEN LOWER(drug) LIKE '%furadantin%' THEN 1 + WHEN LOWER(drug) LIKE '%garamycin%' THEN 1 + WHEN LOWER(drug) LIKE '%gentamicin%' THEN 1 + WHEN LOWER(drug) LIKE '%kanamycin%' THEN 1 + WHEN LOWER(drug) LIKE '%keflex%' THEN 1 + WHEN LOWER(drug) LIKE '%kefzol%' THEN 1 + WHEN LOWER(drug) LIKE '%ketek%' THEN 1 + WHEN LOWER(drug) LIKE '%levaquin%' THEN 1 + WHEN LOWER(drug) LIKE '%levofloxacin%' THEN 1 + WHEN LOWER(drug) LIKE '%lincocin%' THEN 1 + WHEN LOWER(drug) LIKE '%linezolid%' THEN 1 + WHEN LOWER(drug) LIKE '%macrobid%' THEN 1 + WHEN LOWER(drug) LIKE '%macrodantin%' THEN 1 + WHEN LOWER(drug) LIKE '%maxipime%' THEN 1 + WHEN LOWER(drug) LIKE '%mefoxin%' THEN 1 + WHEN LOWER(drug) LIKE '%metronidazole%' THEN 1 + WHEN LOWER(drug) LIKE '%meropenem%' THEN 1 + WHEN LOWER(drug) LIKE '%methicillin%' THEN 1 + WHEN LOWER(drug) LIKE '%minocin%' THEN 1 + WHEN LOWER(drug) LIKE '%minocycline%' THEN 1 + WHEN LOWER(drug) LIKE '%monodox%' THEN 1 + WHEN LOWER(drug) LIKE '%monurol%' THEN 1 + WHEN LOWER(drug) LIKE '%morgidox%' THEN 1 + WHEN LOWER(drug) LIKE '%moxatag%' THEN 1 + WHEN LOWER(drug) LIKE '%moxifloxacin%' THEN 1 + WHEN LOWER(drug) LIKE '%mupirocin%' THEN 1 + WHEN LOWER(drug) LIKE '%myrac%' THEN 1 + WHEN LOWER(drug) LIKE '%nafcillin%' THEN 1 + WHEN LOWER(drug) LIKE '%neomycin%' THEN 1 + WHEN LOWER(drug) LIKE '%nicazel doxy 30%' THEN 1 + WHEN LOWER(drug) LIKE '%nitrofurantoin%' THEN 1 + WHEN LOWER(drug) LIKE '%norfloxacin%' THEN 1 + WHEN LOWER(drug) LIKE '%noroxin%' THEN 1 + WHEN LOWER(drug) LIKE '%ocudox%' THEN 1 + WHEN LOWER(drug) LIKE '%ofloxacin%' THEN 1 + WHEN LOWER(drug) LIKE '%omnicef%' THEN 1 + WHEN LOWER(drug) LIKE '%oracea%' THEN 1 + WHEN LOWER(drug) LIKE '%oraxyl%' THEN 1 + WHEN LOWER(drug) LIKE '%oxacillin%' THEN 1 + WHEN LOWER(drug) LIKE '%pc pen vk%' THEN 1 + WHEN LOWER(drug) LIKE '%pce dispertab%' THEN 1 + WHEN LOWER(drug) LIKE '%panixine%' THEN 1 + WHEN LOWER(drug) LIKE '%pediazole%' THEN 1 + WHEN LOWER(drug) LIKE '%penicillin%' THEN 1 + WHEN LOWER(drug) LIKE '%periostat%' THEN 1 + WHEN LOWER(drug) LIKE '%pfizerpen%' THEN 1 + WHEN LOWER(drug) LIKE '%piperacillin%' THEN 1 + WHEN LOWER(drug) LIKE '%tazobactam%' THEN 1 + WHEN LOWER(drug) LIKE '%primsol%' THEN 1 + WHEN LOWER(drug) LIKE '%proquin%' THEN 1 + WHEN LOWER(drug) LIKE '%raniclor%' THEN 1 + WHEN LOWER(drug) LIKE '%rifadin%' THEN 1 + WHEN LOWER(drug) LIKE '%rifampin%' THEN 1 + WHEN LOWER(drug) LIKE '%rocephin%' THEN 1 + WHEN LOWER(drug) LIKE '%smz-tmp%' THEN 1 + WHEN LOWER(drug) LIKE '%septra%' THEN 1 + WHEN LOWER(drug) LIKE '%septra ds%' THEN 1 + WHEN LOWER(drug) LIKE '%solodyn%' THEN 1 + WHEN LOWER(drug) LIKE '%spectracef%' THEN 1 + WHEN LOWER(drug) LIKE '%streptomycin%' THEN 1 + WHEN LOWER(drug) LIKE '%sulfadiazine%' THEN 1 + WHEN LOWER(drug) LIKE '%sulfamethoxazole%' THEN 1 + WHEN LOWER(drug) LIKE '%trimethoprim%' THEN 1 + WHEN LOWER(drug) LIKE '%sulfatrim%' THEN 1 + WHEN LOWER(drug) LIKE '%sulfisoxazole%' THEN 1 + WHEN LOWER(drug) LIKE '%suprax%' THEN 1 + WHEN LOWER(drug) LIKE '%synercid%' THEN 1 + WHEN LOWER(drug) LIKE '%tazicef%' THEN 1 + WHEN LOWER(drug) LIKE '%tetracycline%' THEN 1 + WHEN LOWER(drug) LIKE '%timentin%' THEN 1 + WHEN LOWER(drug) LIKE '%tobramycin%' THEN 1 + WHEN LOWER(drug) LIKE '%unasyn%' THEN 1 + WHEN LOWER(drug) LIKE '%vancocin%' THEN 1 + WHEN LOWER(drug) LIKE '%vancomycin%' THEN 1 + WHEN LOWER(drug) LIKE '%vantin%' THEN 1 + WHEN LOWER(drug) LIKE '%vibativ%' THEN 1 + WHEN LOWER(drug) LIKE '%vibra-tabs%' THEN 1 + WHEN LOWER(drug) LIKE '%vibramycin%' THEN 1 + WHEN LOWER(drug) LIKE '%zinacef%' THEN 1 + WHEN LOWER(drug) LIKE '%zithromax%' THEN 1 + WHEN LOWER(drug) LIKE '%zosyn%' THEN 1 + WHEN LOWER(drug) LIKE '%zyvox%' THEN 1 + ELSE 0 + END AS antibiotic + FROM prescriptions + WHERE drug_type NOT IN ('BASE') + -- match upstream: NULL routes are excluded by the NOT IN. + AND route NOT IN ('OU','OS','OD','AU','AS','AD','TP') + AND LOWER(route) NOT LIKE '%ear%' + AND LOWER(route) NOT LIKE '%eye%' + AND LOWER(drug) NOT LIKE '%cream%' + AND LOWER(drug) NOT LIKE '%desensitization%' + AND LOWER(drug) NOT LIKE '%ophth oint%' + AND LOWER(drug) NOT LIKE '%gel%' +) +SELECT pr.subject_id + , pr.hadm_id + , COALESCE(pr.icustay_id, ie.icustay_id) AS icustay_id + , pr.drug AS antibiotic + , pr.route + -- DATE-precision in MIMIC-III; downstream treats this as the + -- start-of-day timestamp. + , pr.startdate AS starttime + , pr.enddate AS stoptime +FROM prescriptions pr +INNER JOIN abx + ON pr.drug = abx.drug + AND pr.route = abx.route +LEFT JOIN icustays ie + ON pr.hadm_id = ie.hadm_id + AND pr.startdate >= CAST(ie.intime AS DATE) + AND pr.startdate <= CAST(ie.outtime AS DATE) +WHERE abx.antibiotic = 1; + +CREATE INDEX IF NOT EXISTS antibiotic_idx + ON antibiotic (subject_id, hadm_id, starttime); diff --git a/sql/sepsis/blood_gas_arterial.sql b/sql/sepsis/blood_gas_arterial.sql new file mode 100644 index 0000000..1680f02 --- /dev/null +++ b/sql/sepsis/blood_gas_arterial.sql @@ -0,0 +1,230 @@ +-- ------------------------------------------------------------------ +-- All-time arterial blood-gas pivot (PaO2 / FiO2 ratio at every gas). +-- +-- This script is a fusion of the upstream MIMIC-III concepts_postgres +-- files +-- firstday/blood_gas_first_day.sql +-- firstday/blood_gas_first_day_arterial.sql +-- with their day-1 time predicate removed, so we get one row per +-- (icustay_id, charttime) for the entire ICU stay. +-- +-- Output table: blood_gas_arterial +-- Output cols : subject_id, hadm_id, icustay_id, charttime, +-- specimen, specimen_pred, specimen_prob, +-- so2, spo2, po2, pco2, fio2_chartevents, fio2, +-- aado2, aado2_calc, pao2fio2, ph, baseexcess, +-- bicarbonate, totalco2, hematocrit, hemoglobin, +-- carboxyhemoglobin, methemoglobin, chloride, calcium, +-- temperature, potassium, sodium, lactate, glucose, +-- intubated, tidalvolume, ventilationrate, ventilator, +-- peep, o2flow, requiredo2 +-- +-- Restricted to *arterial* samples (specimen = 'ART' or +-- specimen_prob > 0.75). +-- ------------------------------------------------------------------ + +DROP TABLE IF EXISTS blood_gas_arterial; +CREATE TABLE blood_gas_arterial AS + +WITH bg_pvt AS +( + SELECT ie.subject_id, ie.hadm_id, ie.icustay_id + , CASE + WHEN itemid = 50800 THEN 'SPECIMEN' + WHEN itemid = 50801 THEN 'AADO2' + WHEN itemid = 50802 THEN 'BASEEXCESS' + WHEN itemid = 50803 THEN 'BICARBONATE' + WHEN itemid = 50804 THEN 'TOTALCO2' + WHEN itemid = 50805 THEN 'CARBOXYHEMOGLOBIN' + WHEN itemid = 50806 THEN 'CHLORIDE' + WHEN itemid = 50808 THEN 'CALCIUM' + WHEN itemid = 50809 THEN 'GLUCOSE' + WHEN itemid = 50810 THEN 'HEMATOCRIT' + WHEN itemid = 50811 THEN 'HEMOGLOBIN' + WHEN itemid = 50812 THEN 'INTUBATED' + WHEN itemid = 50813 THEN 'LACTATE' + WHEN itemid = 50814 THEN 'METHEMOGLOBIN' + WHEN itemid = 50815 THEN 'O2FLOW' + WHEN itemid = 50816 THEN 'FIO2' + WHEN itemid = 50817 THEN 'SO2' + WHEN itemid = 50818 THEN 'PCO2' + WHEN itemid = 50819 THEN 'PEEP' + WHEN itemid = 50820 THEN 'PH' + WHEN itemid = 50821 THEN 'PO2' + WHEN itemid = 50822 THEN 'POTASSIUM' + WHEN itemid = 50823 THEN 'REQUIREDO2' + WHEN itemid = 50824 THEN 'SODIUM' + WHEN itemid = 50825 THEN 'TEMPERATURE' + WHEN itemid = 50826 THEN 'TIDALVOLUME' + WHEN itemid = 50827 THEN 'VENTILATIONRATE' + WHEN itemid = 50828 THEN 'VENTILATOR' + ELSE NULL + END AS label + , le.charttime + , le.value + , CASE + WHEN valuenum <= 0 AND itemid != 50802 THEN NULL + WHEN itemid = 50810 AND valuenum > 100 THEN NULL + WHEN itemid = 50816 AND valuenum < 20 THEN NULL + WHEN itemid = 50816 AND valuenum > 100 THEN NULL + WHEN itemid = 50817 AND valuenum > 100 THEN NULL + WHEN itemid = 50815 AND valuenum > 70 THEN NULL + WHEN itemid = 50821 AND valuenum > 800 THEN NULL + ELSE valuenum + END AS valuenum + FROM icustays ie + INNER JOIN labevents le + ON le.subject_id = ie.subject_id + AND le.hadm_id = ie.hadm_id + AND le.charttime BETWEEN ie.intime AND ie.outtime + AND le.itemid IN ( + 50800,50801,50802,50803,50804,50805,50806,50807,50808,50809 + , 50810,50811,50812,50813,50814,50815,50816,50817,50818,50819 + , 50820,50821,50822,50823,50824,50825,50826,50827,50828 + , 51545 + ) +) +, bg AS +( + SELECT subject_id, hadm_id, icustay_id, charttime + , MAX(CASE WHEN label = 'SPECIMEN' THEN value END) AS specimen + , MAX(CASE WHEN label = 'AADO2' THEN valuenum END) AS aado2 + , MAX(CASE WHEN label = 'BASEEXCESS' THEN valuenum END) AS baseexcess + , MAX(CASE WHEN label = 'BICARBONATE' THEN valuenum END) AS bicarbonate + , MAX(CASE WHEN label = 'TOTALCO2' THEN valuenum END) AS totalco2 + , MAX(CASE WHEN label = 'CARBOXYHEMOGLOBIN' THEN valuenum END) AS carboxyhemoglobin + , MAX(CASE WHEN label = 'CHLORIDE' THEN valuenum END) AS chloride + , MAX(CASE WHEN label = 'CALCIUM' THEN valuenum END) AS calcium + , MAX(CASE WHEN label = 'GLUCOSE' THEN valuenum END) AS glucose + , MAX(CASE WHEN label = 'HEMATOCRIT' THEN valuenum END) AS hematocrit + , MAX(CASE WHEN label = 'HEMOGLOBIN' THEN valuenum END) AS hemoglobin + , MAX(CASE WHEN label = 'INTUBATED' THEN valuenum END) AS intubated + , MAX(CASE WHEN label = 'LACTATE' THEN valuenum END) AS lactate + , MAX(CASE WHEN label = 'METHEMOGLOBIN' THEN valuenum END) AS methemoglobin + , MAX(CASE WHEN label = 'O2FLOW' THEN valuenum END) AS o2flow + , MAX(CASE WHEN label = 'FIO2' THEN valuenum END) AS fio2 + , MAX(CASE WHEN label = 'SO2' THEN valuenum END) AS so2 + , MAX(CASE WHEN label = 'PCO2' THEN valuenum END) AS pco2 + , MAX(CASE WHEN label = 'PEEP' THEN valuenum END) AS peep + , MAX(CASE WHEN label = 'PH' THEN valuenum END) AS ph + , MAX(CASE WHEN label = 'PO2' THEN valuenum END) AS po2 + , MAX(CASE WHEN label = 'POTASSIUM' THEN valuenum END) AS potassium + , MAX(CASE WHEN label = 'REQUIREDO2' THEN valuenum END) AS requiredo2 + , MAX(CASE WHEN label = 'SODIUM' THEN valuenum END) AS sodium + , MAX(CASE WHEN label = 'TEMPERATURE' THEN valuenum END) AS temperature + , MAX(CASE WHEN label = 'TIDALVOLUME' THEN valuenum END) AS tidalvolume + , MAX(CASE WHEN label = 'VENTILATIONRATE' THEN valuenum END) AS ventilationrate + , MAX(CASE WHEN label = 'VENTILATOR' THEN valuenum END) AS ventilator + FROM bg_pvt + GROUP BY subject_id, hadm_id, icustay_id, charttime +) +, stg_spo2 AS +( + SELECT subject_id, hadm_id, icustay_id, charttime + , MAX(CASE WHEN valuenum <= 0 OR valuenum > 100 THEN NULL ELSE valuenum END) AS spo2 + FROM chartevents + WHERE itemid IN (646, 220277) + GROUP BY subject_id, hadm_id, icustay_id, charttime +) +, stg_fio2 AS +( + SELECT subject_id, hadm_id, icustay_id, charttime + , MAX( + CASE + WHEN itemid = 223835 THEN + CASE + WHEN valuenum > 0 AND valuenum <= 1 THEN valuenum * 100 + WHEN valuenum > 1 AND valuenum < 21 THEN NULL + WHEN valuenum >= 21 AND valuenum <= 100 THEN valuenum + ELSE NULL + END + WHEN itemid IN (3420, 3422) THEN valuenum + WHEN itemid = 190 AND valuenum > 0.20 AND valuenum < 1 + THEN valuenum * 100 + ELSE NULL + END + ) AS fio2_chartevents + FROM chartevents + WHERE itemid IN (3420, 190, 223835, 3422) + AND COALESCE(error, 0) = 0 + GROUP BY subject_id, hadm_id, icustay_id, charttime +) +, stg2 AS +( + SELECT bg.* + , ROW_NUMBER() OVER ( + PARTITION BY bg.icustay_id, bg.charttime + ORDER BY s1.charttime DESC + ) AS lastrowspo2 + , s1.spo2 + FROM bg + LEFT JOIN stg_spo2 s1 + ON bg.icustay_id = s1.icustay_id + AND s1.charttime >= DATETIME_SUB(bg.charttime, INTERVAL '2' HOUR) + AND s1.charttime <= bg.charttime + WHERE bg.po2 IS NOT NULL +) +, stg3 AS +( + SELECT stg2.* + , ROW_NUMBER() OVER ( + PARTITION BY stg2.icustay_id, stg2.charttime + ORDER BY s2.charttime DESC + ) AS lastrowfio2 + , s2.fio2_chartevents + , 1 / (1 + EXP(-(-0.02544 + + 0.04598 * po2 + + COALESCE(-0.15356 * spo2 , -0.15356 * 97.49420 + 0.13429) + + COALESCE( 0.00621 * s2.fio2_chartevents, 0.00621 * 51.49550 + -0.24958) + + COALESCE( 0.10559 * hemoglobin , 0.10559 * 10.32307 + 0.05954) + + COALESCE( 0.13251 * so2 , 0.13251 * 93.66539 + -0.23172) + + COALESCE(-0.01511 * pco2 , -0.01511 * 42.08866 + -0.01630) + + COALESCE( 0.01480 * fio2 , 0.01480 * 63.97836 + -0.31142) + + COALESCE(-0.00200 * aado2 , -0.00200 * 442.21186 + -0.01328) + + COALESCE(-0.03220 * bicarbonate , -0.03220 * 22.96894 + -0.06535) + + COALESCE( 0.05384 * totalco2 , 0.05384 * 24.72632 + -0.01405) + + COALESCE( 0.08202 * lactate , 0.08202 * 3.06436 + 0.06038) + + COALESCE( 0.10956 * ph , 0.10956 * 7.36233 + -0.00617) + + COALESCE( 0.00848 * o2flow , 0.00848 * 7.59362 + -0.35803) + ))) AS specimen_prob + FROM stg2 + LEFT JOIN stg_fio2 s2 + ON stg2.icustay_id = s2.icustay_id + AND s2.charttime BETWEEN DATETIME_SUB(stg2.charttime, INTERVAL '4' HOUR) + AND stg2.charttime + WHERE stg2.lastrowspo2 = 1 +) +SELECT subject_id, hadm_id, icustay_id, charttime + , specimen + , CASE + WHEN specimen IS NOT NULL THEN specimen + WHEN specimen_prob > 0.75 THEN 'ART' + ELSE NULL + END AS specimen_pred + , specimen_prob + , so2, spo2, po2, pco2 + , fio2_chartevents, fio2 + , aado2 + , CASE + WHEN po2 IS NOT NULL + AND pco2 IS NOT NULL + AND COALESCE(fio2, fio2_chartevents) IS NOT NULL + THEN (COALESCE(fio2, fio2_chartevents) / 100) * (760 - 47) - (pco2 / 0.8) - po2 + ELSE NULL + END AS aado2_calc + , CASE + WHEN po2 IS NOT NULL AND COALESCE(fio2, fio2_chartevents) IS NOT NULL + THEN 100 * po2 / COALESCE(fio2, fio2_chartevents) + ELSE NULL + END AS pao2fio2 + , ph, baseexcess, bicarbonate, totalco2 + , hematocrit, hemoglobin, carboxyhemoglobin, methemoglobin + , chloride, calcium, temperature, potassium, sodium, lactate, glucose + , intubated, tidalvolume, ventilationrate, ventilator + , peep, o2flow, requiredo2 +FROM stg3 +WHERE lastrowfio2 = 1 + AND (specimen = 'ART' OR specimen_prob > 0.75); + +CREATE INDEX IF NOT EXISTS blood_gas_arterial_idx + ON blood_gas_arterial (icustay_id, charttime); diff --git a/sql/sepsis/gcs_all.sql b/sql/sepsis/gcs_all.sql new file mode 100644 index 0000000..dda6aec --- /dev/null +++ b/sql/sepsis/gcs_all.sql @@ -0,0 +1,78 @@ +-- ------------------------------------------------------------------ +-- All-time GCS pivot. +-- +-- Adapted from the upstream MIMIC-III concepts_postgres file +-- firstday/gcs_first_day.sql +-- with the day-1 time predicate removed and the row reduced to one +-- row per (icustay_id, charttime, gcs) for the entire ICU stay. +-- +-- The carry-forward logic (impute missing components from the +-- immediately preceding charttime within 6 h) is preserved. GCS +-- during sedation/intubation is set to 15, matching upstream. +-- ------------------------------------------------------------------ + +DROP TABLE IF EXISTS gcs_all; +CREATE TABLE gcs_all AS + +WITH base AS +( + SELECT pvt.icustay_id + , pvt.charttime + , MAX(CASE WHEN pvt.itemid = 454 THEN pvt.valuenum END) AS gcsmotor + , MAX(CASE WHEN pvt.itemid = 723 THEN pvt.valuenum END) AS gcsverbal + , MAX(CASE WHEN pvt.itemid = 184 THEN pvt.valuenum END) AS gcseyes + , CASE + WHEN MAX(CASE WHEN pvt.itemid = 723 THEN pvt.valuenum END) = 0 + THEN 1 ELSE 0 + END AS endotrachflag + , ROW_NUMBER() OVER (PARTITION BY pvt.icustay_id ORDER BY pvt.charttime ASC) AS rn + FROM ( + SELECT l.icustay_id + , CASE + WHEN l.itemid IN (723, 223900) THEN 723 + WHEN l.itemid IN (454, 223901) THEN 454 + WHEN l.itemid IN (184, 220739) THEN 184 + ELSE l.itemid + END AS itemid + , CASE + WHEN l.itemid = 723 AND l.value = '1.0 ET/Trach' THEN 0 + WHEN l.itemid = 223900 AND l.value = 'No Response-ETT' THEN 0 + ELSE l.valuenum + END AS valuenum + , l.charttime + FROM chartevents l + INNER JOIN icustays b + ON l.icustay_id = b.icustay_id + WHERE l.itemid IN (184, 454, 723, 223900, 223901, 220739) + AND l.charttime BETWEEN b.intime AND b.outtime + AND COALESCE(l.error, 0) = 0 + ) pvt + GROUP BY pvt.icustay_id, pvt.charttime +) +, gcs AS +( + SELECT b.icustay_id + , b.charttime + , CASE + WHEN b.gcsverbal = 0 THEN 15 + WHEN b.gcsverbal IS NULL AND b2.gcsverbal = 0 THEN 15 + WHEN b2.gcsverbal = 0 THEN + COALESCE(b.gcsmotor , 6) + + COALESCE(b.gcsverbal, 5) + + COALESCE(b.gcseyes , 4) + ELSE + COALESCE(b.gcsmotor , COALESCE(b2.gcsmotor , 6)) + + COALESCE(b.gcsverbal, COALESCE(b2.gcsverbal, 5)) + + COALESCE(b.gcseyes , COALESCE(b2.gcseyes , 4)) + END AS gcs + , b.endotrachflag + FROM base b + LEFT JOIN base b2 + ON b.icustay_id = b2.icustay_id + AND b.rn = b2.rn + 1 + AND b2.charttime > DATETIME_SUB(b.charttime, INTERVAL '6' HOUR) +) +SELECT icustay_id, charttime, gcs, endotrachflag +FROM gcs; + +CREATE INDEX IF NOT EXISTS gcs_all_idx ON gcs_all (icustay_id, charttime); diff --git a/sql/sepsis/mortality_checks.sql b/sql/sepsis/mortality_checks.sql new file mode 100644 index 0000000..db12199 --- /dev/null +++ b/sql/sepsis/mortality_checks.sql @@ -0,0 +1,265 @@ +-- ------------------------------------------------------------------ +-- Mortality verification for Sepsis-3 in MIMIC-III v1.3. +-- +-- Usage: +-- psql -d mimic -v ON_ERROR_STOP=1 \ +-- -c 'SET search_path TO mimiciii, public;' \ +-- -f sql/sepsis/mortality_checks.sql +-- +-- Purpose: +-- `sanity_checks.sql` reported a 14.6% in-hospital mortality among +-- Sepsis-3 = TRUE patients, well below the 25-35% range in the +-- literature. The hypothesis was that the broad cohort (neonates, +-- re-admissions, short stays included) drags the number down. +-- +-- This script walks an exclusion funnel and shows mortality at +-- each step so you can confirm. It also shows 30-day mortality, +-- stratification by age band, and a direct comparison with the +-- numbers published in: +-- +-- Johnson AEW et al., Crit Care Med 2018. +-- "A Comparative Analysis of Sepsis Identification Methods +-- in an Electronic Database." +-- Reported on MIMIC-III v1.4 with adult, first-ICU-stay, +-- LOS >= 4 h cohort: +-- n = 21 927 sepsis-3 stays +-- in-hospital mortality = 21.0% +-- 30-day mortality = 25.4% +-- +-- All ages use a clamp at 91 (MIMIC-III shifts DOB by 300 y for +-- patients > 89; we treat them as 91 for stratification). +-- ------------------------------------------------------------------ + +\set ON_ERROR_STOP on +\timing on + +-- Build a working cohort table once with everything we need +DROP TABLE IF EXISTS sepsis3_cohort_check; +CREATE TEMP TABLE sepsis3_cohort_check AS +SELECT ie.subject_id + , ie.hadm_id + , ie.icustay_id + , ie.intime + , ie.outtime + , ie.first_careunit + , EXTRACT(EPOCH FROM (ie.outtime - ie.intime)) / 3600.0 AS los_hours + , LEAST( + 91.0, + EXTRACT(EPOCH FROM (ie.intime - pat.dob)) + / (365.242 * 86400.0) + ) AS age_at_intime + , ROW_NUMBER() OVER ( + PARTITION BY ie.subject_id + ORDER BY ie.intime + ) AS icustay_seq + , adm.hospital_expire_flag AS died_in_hospital + , (pat.dod IS NOT NULL + AND pat.dod <= ie.intime + INTERVAL '30 days')::int AS died_within_30d + , COALESCE(s.sepsis3, FALSE) AS sepsis3 +FROM icustays ie +JOIN admissions adm ON adm.hadm_id = ie.hadm_id +JOIN patients pat ON pat.subject_id = ie.subject_id +LEFT JOIN sepsis3 s ON s.icustay_id = ie.icustay_id; + +CREATE INDEX ON sepsis3_cohort_check (icustay_id); +ANALYZE sepsis3_cohort_check; + +\echo +\echo '==================================================================' +\echo ' 1. Cohort exclusion funnel (incremental filtering)' +\echo '==================================================================' +\echo "Each row applies an additional restriction. The 'sepsis3' columns" +\echo "report stats among rows where sepsis3 = TRUE within that cohort." +\echo +\echo "EXPECTED progression: as we narrow to the canonical adult/first-stay/" +\echo "LOS >= 24h cohort, in-hospital mortality among Sepsis-3 should rise" +\echo "from ~14% toward ~25-30%." +\echo +WITH levels AS ( + -- 0. Everyone + SELECT 0 AS lvl, '0. all icustays' AS step + , c.* FROM sepsis3_cohort_check c + + UNION ALL + -- 1. Exclude neonatal ICU + SELECT 1, '1. + exclude NICU' + , c.* FROM sepsis3_cohort_check c + WHERE c.first_careunit != 'NICU' + + UNION ALL + -- 2. Adult (age >= 18) on top of (1) + SELECT 2, '2. + age >= 18' + , c.* FROM sepsis3_cohort_check c + WHERE c.first_careunit != 'NICU' + AND c.age_at_intime >= 18 + + UNION ALL + -- 3. First ICU stay only on top of (2) + SELECT 3, '3. + first ICU stay only' + , c.* FROM sepsis3_cohort_check c + WHERE c.first_careunit != 'NICU' + AND c.age_at_intime >= 18 + AND c.icustay_seq = 1 + + UNION ALL + -- 4. LOS >= 24h on top of (3) -- the canonical Seymour cohort + SELECT 4, '4. + LOS >= 24 h (canonical)' + , c.* FROM sepsis3_cohort_check c + WHERE c.first_careunit != 'NICU' + AND c.age_at_intime >= 18 + AND c.icustay_seq = 1 + AND c.los_hours >= 24 +) +SELECT lvl + , step + , count(*) AS n_total + , sum(CASE WHEN sepsis3 THEN 1 ELSE 0 END) AS n_sepsis3 + , round(100.0 * sum(CASE WHEN sepsis3 THEN 1 ELSE 0 END) + / count(*), 1) AS pct_sepsis3 + -- in-hospital mortality among Sepsis-3 = TRUE + , round(100.0 * sum(CASE WHEN sepsis3 AND died_in_hospital = 1 + THEN 1 ELSE 0 END) + / NULLIF(sum(CASE WHEN sepsis3 THEN 1 ELSE 0 END), 0), 1) + AS sep_inhosp_pct + -- 30-day mortality among Sepsis-3 = TRUE + , round(100.0 * sum(CASE WHEN sepsis3 AND died_within_30d = 1 + THEN 1 ELSE 0 END) + / NULLIF(sum(CASE WHEN sepsis3 THEN 1 ELSE 0 END), 0), 1) + AS sep_30d_pct + -- in-hospital mortality among NOT Sepsis-3 + , round(100.0 * sum(CASE WHEN NOT sepsis3 AND died_in_hospital = 1 + THEN 1 ELSE 0 END) + / NULLIF(sum(CASE WHEN NOT sepsis3 THEN 1 ELSE 0 END), 0), 1) + AS nonsep_inhosp_pct +FROM levels +GROUP BY lvl, step +ORDER BY lvl; + + +\echo +\echo '==================================================================' +\echo ' 2. Mortality stratified by age band, canonical cohort only' +\echo '==================================================================' +\echo "EXPECTED:" +\echo " Mortality among Sepsis-3 = TRUE rises monotonically with age." +\echo " Adults < 30: ~10-15%" +\echo " 30 - 50: ~15-20%" +\echo " 50 - 70: ~20-25%" +\echo " 70 - 90+: ~30-40%" +\echo +SELECT CASE + WHEN age_at_intime < 30 THEN '1. <30' + WHEN age_at_intime < 50 THEN '2. 30-49' + WHEN age_at_intime < 70 THEN '3. 50-69' + WHEN age_at_intime < 90 THEN '4. 70-89' + ELSE '5. 90+' + END AS age_band + , count(*) AS n_sepsis3 + , round(100.0 * sum(died_in_hospital) / count(*), 1) AS pct_inhosp + , round(100.0 * sum(died_within_30d) / count(*), 1) AS pct_30d +FROM sepsis3_cohort_check +WHERE sepsis3 + AND first_careunit != 'NICU' + AND age_at_intime >= 18 + AND icustay_seq = 1 + AND los_hours >= 24 +GROUP BY 1 +ORDER BY 1; + + +\echo +\echo '==================================================================' +\echo ' 3. Mortality stratified by max-SOFA day-1 (canonical cohort)' +\echo '==================================================================' +\echo "EXPECTED:" +\echo " Mortality should rise monotonically with SOFA. This is the" +\echo " classic dose-response curve of organ dysfunction vs death," +\echo " and is the strongest semantic check that the SOFA pipeline" +\echo " itself is computing the right thing." +\echo " SOFA 0-3: ~5-10%" +\echo " SOFA 4-7: ~15-25%" +\echo " SOFA 8-11: ~30-40%" +\echo " SOFA 12+: ~50-65%" +\echo +WITH d1 AS ( + SELECT icustay_id, max(sofa_24hours) AS d1_sofa + FROM sofa_hourly + WHERE hr <= 24 + GROUP BY icustay_id +) +SELECT CASE + WHEN d1_sofa <= 3 THEN '1. 0-3' + WHEN d1_sofa <= 7 THEN '2. 4-7' + WHEN d1_sofa <= 11 THEN '3. 8-11' + ELSE '4. 12+' + END AS sofa_band + , count(*) AS n + , round(100.0 * sum(died_in_hospital) / count(*), 1) AS pct_inhosp + , round(100.0 * sum(died_within_30d) / count(*), 1) AS pct_30d +FROM sepsis3_cohort_check c +JOIN d1 USING (icustay_id) +WHERE first_careunit != 'NICU' + AND age_at_intime >= 18 + AND icustay_seq = 1 + AND los_hours >= 24 +GROUP BY 1 +ORDER BY 1; + + +\echo +\echo '==================================================================' +\echo ' 4. Direct comparison with Johnson 2018 (canonical cohort)' +\echo '==================================================================' +\echo "Johnson 2018 published numbers for Sepsis-3 on MIMIC-III v1.4" +\echo "with the cohort: adult, first ICU stay only, LOS >= 4 h." +\echo "They reported:" +\echo " n_sepsis3 = 21 927" +\echo " in-hospital pct_died = 21.0%" +\echo " 30-day pct_died = 25.4%" +\echo +\echo "We use LOS >= 24 h here (the more common Seymour 2016 cutoff)," +\echo "so our n will be a bit smaller and our mortality slightly" +\echo "higher than Johnson's." +\echo +SELECT count(*) AS n_sepsis3 + , round(100.0 * sum(died_in_hospital) / count(*), 1) AS pct_inhosp + , round(100.0 * sum(died_within_30d) / count(*), 1) AS pct_30d +FROM sepsis3_cohort_check +WHERE sepsis3 + AND first_careunit != 'NICU' + AND age_at_intime >= 18 + AND icustay_seq = 1 + AND los_hours >= 24; + + +\echo +\echo '==================================================================' +\echo ' 5. Sanity: where did the missing mortality "go"?' +\echo '==================================================================' +\echo "Decompose the gap between the broad-cohort 14.6% and the" +\echo "canonical-cohort number from section 4. This shows how much" +\echo "of the gap is explained by each filter individually." +\echo +WITH s3 AS ( + SELECT * FROM sepsis3_cohort_check WHERE sepsis3 +) +SELECT 'all sepsis3' AS slice + , count(*) AS n + , round(100.0 * sum(died_in_hospital)/count(*), 1) AS pct_inhosp +FROM s3 +UNION ALL SELECT 'NICU only', + count(*), round(100.0 * sum(died_in_hospital)/count(*), 1) +FROM s3 WHERE first_careunit = 'NICU' +UNION ALL SELECT 'age < 18 only', + count(*), round(100.0 * sum(died_in_hospital)/count(*), 1) +FROM s3 WHERE age_at_intime < 18 +UNION ALL SELECT 're-admission only', + count(*), round(100.0 * sum(died_in_hospital)/count(*), 1) +FROM s3 WHERE icustay_seq > 1 +UNION ALL SELECT 'LOS < 24h only', + count(*), round(100.0 * sum(died_in_hospital)/count(*), 1) +FROM s3 WHERE los_hours < 24; + +\echo +\echo 'Done. Compare the section-4 result to Johnson 2018 (~21% in-hospital,' +\echo '~25% 30-day) for the headline check.' diff --git a/sql/sepsis/sanity_checks.sql b/sql/sepsis/sanity_checks.sql new file mode 100644 index 0000000..613b472 --- /dev/null +++ b/sql/sepsis/sanity_checks.sql @@ -0,0 +1,393 @@ +-- ------------------------------------------------------------------ +-- Sepsis-3 sanity checks for MIMIC-III v1.3. +-- +-- Usage: +-- psql -d mimic -v ON_ERROR_STOP=1 \ +-- -c 'SET search_path TO mimiciii, public;' \ +-- -f sql/sepsis/sanity_checks.sql +-- +-- Each section prints a short result set. Compare against the +-- "EXPECTED" comment. None of these are pass/fail tests; they are +-- bounds-style checks designed to catch obvious upstream breakage +-- (an empty staging table, an off-by-one in the hourly grid, a +-- vasopressor unit-conversion error, etc.). +-- +-- Reference numbers come from: +-- Seymour CW et al., JAMA 2016 (the Sepsis-3 paper) +-- Johnson AEW et al., Crit Care Med 2018 ("A Comparative Analysis +-- of Sepsis Identification Methods in an Electronic Database", +-- which reproduces Sepsis-3 on MIMIC-III) +-- ------------------------------------------------------------------ + +\set ON_ERROR_STOP on +\timing on + +\echo +\echo '==================================================================' +\echo ' 1. Row counts of every table in the pipeline' +\echo '==================================================================' +\echo "EXPECTED (MIMIC-III v1.3 full restore, all 61.5k ICU stays):" +\echo " icustays ~ 61 532" +\echo " sofa_grid ~ 6 - 8 M (60k stays * ~4d mean LOS * 24h)" +\echo " sofa_hourly same as sofa_grid" +\echo " blood_gas_arterial ~ 500 k - 1 M" +\echo " gcs_all ~ 4 - 6 M" +\echo " antibiotic ~ 500 k - 700 k prescription rows" +\echo " suspicion_of_infection same as antibiotic" +\echo " sepsis3 ~ 20 k - 30 k rows (one row per ICU stay" +\echo " that ever had any abx + qualifying SOFA)" +\echo +SELECT 'icustays' AS table_name, count(*) AS n FROM icustays +UNION ALL SELECT 'sofa_grid', count(*) FROM sofa_grid +UNION ALL SELECT 'sofa_vs', count(*) FROM sofa_vs +UNION ALL SELECT 'sofa_gcs', count(*) FROM sofa_gcs +UNION ALL SELECT 'sofa_bili', count(*) FROM sofa_bili +UNION ALL SELECT 'sofa_cr', count(*) FROM sofa_cr +UNION ALL SELECT 'sofa_plt', count(*) FROM sofa_plt +UNION ALL SELECT 'sofa_pf', count(*) FROM sofa_pf +UNION ALL SELECT 'sofa_uo', count(*) FROM sofa_uo +UNION ALL SELECT 'sofa_vaso', count(*) FROM sofa_vaso +UNION ALL SELECT 'sofa_wide', count(*) FROM sofa_wide +UNION ALL SELECT 'sofa_components', count(*) FROM sofa_components +UNION ALL SELECT 'sofa_hourly', count(*) FROM sofa_hourly +UNION ALL SELECT 'blood_gas_arterial', count(*) FROM blood_gas_arterial +UNION ALL SELECT 'gcs_all', count(*) FROM gcs_all +UNION ALL SELECT 'antibiotic', count(*) FROM antibiotic +UNION ALL SELECT 'suspicion_of_infection', count(*) FROM suspicion_of_infection +UNION ALL SELECT 'sepsis3', count(*) FROM sepsis3 +ORDER BY 1; + + +\echo +\echo '==================================================================' +\echo ' 2. Hourly grid integrity' +\echo '==================================================================' +\echo "EXPECTED:" +\echo " bad_hr_seq = 0 (hours per stay must be 1..N consecutive)" +\echo " bad_endtime = 0 (endtime > starttime)" +\echo " duplicate_grid = 0 (no (icustay_id, hr) duplicates)" +\echo " grid_eq_hourly = 0 (sofa_grid and sofa_hourly row counts match)" +\echo +WITH per_stay AS ( + SELECT icustay_id + , min(hr) AS hr_min + , max(hr) AS hr_max + , count(*) AS n + FROM sofa_grid + GROUP BY icustay_id +) +SELECT + (SELECT count(*) FROM per_stay + WHERE hr_min != 1 OR hr_max != n) AS bad_hr_seq + , (SELECT count(*) FROM sofa_grid + WHERE endtime <= starttime) AS bad_endtime + , (SELECT count(*) - count(DISTINCT (icustay_id, hr)) + FROM sofa_grid) AS duplicate_grid + , (SELECT count(*) FROM sofa_grid) + - (SELECT count(*) FROM sofa_hourly) AS grid_eq_hourly; + + +\echo +\echo '==================================================================' +\echo ' 3. Per-component SOFA score ranges' +\echo '==================================================================' +\echo "EXPECTED: every per-hour component score is in [0, 4] or NULL." +\echo " Any value outside that range indicates a logic bug." +\echo +SELECT 'respiration' AS component, min(respiration) AS min, max(respiration) AS max FROM sofa_components +UNION ALL SELECT 'coagulation', min(coagulation), max(coagulation) FROM sofa_components +UNION ALL SELECT 'liver', min(liver), max(liver) FROM sofa_components +UNION ALL SELECT 'cardiovascular', min(cardiovascular), max(cardiovascular) FROM sofa_components +UNION ALL SELECT 'cns', min(cns), max(cns) FROM sofa_components +UNION ALL SELECT 'renal', min(renal), max(renal) FROM sofa_components +ORDER BY 1; + + +\echo +\echo '==================================================================' +\echo ' 4. 24-hour rolling SOFA distribution' +\echo '==================================================================' +\echo "EXPECTED:" +\echo " total_sofa min = 0, max ~ 20-24" +\echo " median per-hour total_sofa ~ 2-4" +\echo " Distribution should be heavy-tailed; ~70-80% of hours <= 5," +\echo " ~5-10% of hours >= 10." +\echo +SELECT min(sofa_24hours) AS sofa_min + , max(sofa_24hours) AS sofa_max + , round(avg(sofa_24hours)::numeric, 2) AS sofa_mean + , percentile_disc(0.50) WITHIN GROUP (ORDER BY sofa_24hours) AS sofa_p50 + , percentile_disc(0.90) WITHIN GROUP (ORDER BY sofa_24hours) AS sofa_p90 + , percentile_disc(0.99) WITHIN GROUP (ORDER BY sofa_24hours) AS sofa_p99 +FROM sofa_hourly; + + +\echo +\echo '==================================================================' +\echo ' 5. Day-1 max SOFA per stay (compare with SAPS-II severity)' +\echo '==================================================================' +\echo "EXPECTED for adult ICU (per Singer 2016, Vincent 1996):" +\echo " median day-1 SOFA ~ 4-6" +\echo " ~60-70% of stays have day-1 SOFA >= 2 (Sepsis-3 organ-dys threshold)" +\echo +WITH d1 AS ( + SELECT icustay_id, max(sofa_24hours) AS day1_sofa + FROM sofa_hourly + WHERE hr <= 24 + GROUP BY icustay_id +) +SELECT count(*) AS n_stays + , round(avg(day1_sofa)::numeric, 2) AS mean_d1_sofa + , percentile_disc(0.50) WITHIN GROUP (ORDER BY day1_sofa) AS p50 + , percentile_disc(0.90) WITHIN GROUP (ORDER BY day1_sofa) AS p90 + , round(100.0 * sum(CASE WHEN day1_sofa >= 2 THEN 1 ELSE 0 END) + / count(*), 1) AS pct_ge2 + , round(100.0 * sum(CASE WHEN day1_sofa >= 6 THEN 1 ELSE 0 END) + / count(*), 1) AS pct_ge6 +FROM d1; + + +\echo +\echo '==================================================================' +\echo ' 6. Component-input sanity (raw ranges)' +\echo '==================================================================' +\echo "EXPECTED ranges (after our valuenum filters):" +\echo " meanbp_min 30 - 200 mmHg" +\echo " gcs_min 3 - 15" +\echo " bilirubin_max 0 - 80 mg/dL" +\echo " creatinine_max 0 - 30 mg/dL (capped at 150 in pipeline)" +\echo " platelet_min 0 - 1500 K/uL" +\echo " pao2fio2_* 50 - 700" +\echo " uo_24hr 0 - 20000 mL" +\echo " rate_norepi etc. 0 - 5 mcg/kg/min (rates above ~3 are very rare)" +\echo +SELECT 'meanbp_min' AS metric + , min(meanbp_min)::text AS min + , max(meanbp_min)::text AS max + , round(avg(meanbp_min)::numeric, 1)::text AS mean +FROM sofa_components WHERE meanbp_min IS NOT NULL +UNION ALL SELECT 'gcs_min', + min(gcs_min)::text, max(gcs_min)::text, avg(gcs_min)::numeric(10,1)::text +FROM sofa_components WHERE gcs_min IS NOT NULL +UNION ALL SELECT 'bilirubin_max', + min(bilirubin_max)::text, max(bilirubin_max)::text, + avg(bilirubin_max)::numeric(10,2)::text +FROM sofa_components WHERE bilirubin_max IS NOT NULL +UNION ALL SELECT 'creatinine_max', + min(creatinine_max)::text, max(creatinine_max)::text, + avg(creatinine_max)::numeric(10,2)::text +FROM sofa_components WHERE creatinine_max IS NOT NULL +UNION ALL SELECT 'platelet_min', + min(platelet_min)::text, max(platelet_min)::text, + avg(platelet_min)::numeric(10,1)::text +FROM sofa_components WHERE platelet_min IS NOT NULL +UNION ALL SELECT 'pao2fio2_vent', + min(pao2fio2_vent)::text, max(pao2fio2_vent)::text, + avg(pao2fio2_vent)::numeric(10,1)::text +FROM sofa_components WHERE pao2fio2_vent IS NOT NULL +UNION ALL SELECT 'pao2fio2_novent', + min(pao2fio2_novent)::text, max(pao2fio2_novent)::text, + avg(pao2fio2_novent)::numeric(10,1)::text +FROM sofa_components WHERE pao2fio2_novent IS NOT NULL +UNION ALL SELECT 'uo_24hr', + min(uo_24hr)::text, max(uo_24hr)::text, + avg(uo_24hr)::numeric(10,1)::text +FROM sofa_components WHERE uo_24hr IS NOT NULL +UNION ALL SELECT 'rate_norepinephrine', + min(rate_norepinephrine)::text, max(rate_norepinephrine)::text, + avg(rate_norepinephrine)::numeric(10,3)::text +FROM sofa_components WHERE rate_norepinephrine IS NOT NULL +ORDER BY 1; + + +\echo +\echo '==================================================================' +\echo ' 7. Vasopressor coverage' +\echo '==================================================================' +\echo "EXPECTED: ~25-35% of adult ICU stays receive at least one" +\echo " vasopressor (norepi most common, then epi/dop/dob)." +\echo +SELECT 'any vaso' AS group + , count(DISTINCT icustay_id) AS n_stays +FROM sofa_vaso +UNION ALL SELECT 'norepi', + count(DISTINCT icustay_id) FROM sofa_vaso WHERE rate_norepinephrine IS NOT NULL +UNION ALL SELECT 'epi', + count(DISTINCT icustay_id) FROM sofa_vaso WHERE rate_epinephrine IS NOT NULL +UNION ALL SELECT 'dop', + count(DISTINCT icustay_id) FROM sofa_vaso WHERE rate_dopamine IS NOT NULL +UNION ALL SELECT 'dob', + count(DISTINCT icustay_id) FROM sofa_vaso WHERE rate_dobutamine IS NOT NULL; + + +\echo +\echo '==================================================================' +\echo ' 8. Antibiotic prescriptions: top 15 drugs' +\echo '==================================================================' +\echo "EXPECTED: vancomycin, piperacillin/tazobactam (zosyn)," +\echo " ceftriaxone, levofloxacin, metronidazole near the top." +\echo +SELECT antibiotic, count(*) AS n +FROM antibiotic +GROUP BY antibiotic +ORDER BY n DESC +LIMIT 15; + + +\echo +\echo '==================================================================' +\echo ' 9. Suspicion of infection: matching rate' +\echo '==================================================================' +\echo "EXPECTED:" +\echo " ~50-70% of antibiotic rows are matched to a culture" +\echo " (i.e. suspected_infection = 1). Top specimens should be:" +\echo " BLOOD CULTURE, URINE, MRSA SCREEN, SPUTUM, SWAB." +\echo +SELECT count(*) AS n_total + , sum(suspected_infection) AS n_suspected + , round(100.0 * sum(suspected_infection) + / count(*), 1) AS pct_suspected +FROM suspicion_of_infection; + +SELECT specimen, count(*) AS n +FROM suspicion_of_infection +WHERE suspected_infection = 1 +GROUP BY specimen +ORDER BY n DESC +LIMIT 10; + + +\echo +\echo '==================================================================' +\echo '10. Sepsis-3 prevalence at the ICU-stay level' +\echo '==================================================================' +\echo "EXPECTED (Johnson 2018, MIMIC-III all-cohort):" +\echo " total stays in sepsis3 table : 25 - 35 k" +\echo " (every stay with any abx and a qualifying SOFA window)" +\echo " sepsis3 = TRUE : 18 - 24 k (~30-40% of all ICU stays)" +\echo +SELECT count(*) AS n_rows + , sum(CASE WHEN sepsis3 THEN 1 ELSE 0 END) AS n_sepsis3 + , round(100.0 * sum(CASE WHEN sepsis3 THEN 1 ELSE 0 END) + / NULLIF(count(*),0), 1) AS pct_sepsis3_among_rows + , round(100.0 * sum(CASE WHEN sepsis3 THEN 1 ELSE 0 END) + / NULLIF((SELECT count(*) FROM icustays), 0), 1) + AS pct_sepsis3_of_all_icustays +FROM sepsis3; + + +\echo +\echo '==================================================================' +\echo '11. Sepsis-3 onset timing' +\echo '==================================================================' +\echo "EXPECTED:" +\echo " Most onsets occur early in the stay; median onset is on" +\echo " day 0-1 (~0-24h after intime). A long right tail exists" +\echo " for ICU-acquired sepsis." +\echo " sofa_time should be within [-48h, +24h] of" +\echo " suspected_infection_time by construction." +\echo +SELECT round(avg(EXTRACT(EPOCH FROM (suspected_infection_time - ie.intime))/3600)::numeric, 1) + AS mean_hours_to_onset + , percentile_disc(0.50) WITHIN GROUP ( + ORDER BY EXTRACT(EPOCH FROM (suspected_infection_time - ie.intime))/3600 + ) AS p50_hours + , percentile_disc(0.90) WITHIN GROUP ( + ORDER BY EXTRACT(EPOCH FROM (suspected_infection_time - ie.intime))/3600 + ) AS p90_hours + , min(EXTRACT(EPOCH FROM (sofa_time - suspected_infection_time))/3600) + AS min_sofa_offset_h + , max(EXTRACT(EPOCH FROM (sofa_time - suspected_infection_time))/3600) + AS max_sofa_offset_h +FROM sepsis3 s +JOIN icustays ie ON ie.icustay_id = s.icustay_id +WHERE s.sepsis3 = TRUE; + + +\echo +\echo '==================================================================' +\echo '12. Mortality stratified by Sepsis-3 status' +\echo '==================================================================' +\echo "EXPECTED:" +\echo " In-hospital mortality among Sepsis-3 = TRUE: ~25-35%" +\echo " Among Sepsis-3 = FALSE / no row in sepsis3: ~5-10%" +\echo +WITH cohort AS ( + SELECT ie.icustay_id, ie.hadm_id + , CASE WHEN s.sepsis3 IS TRUE THEN 'sepsis3' + ELSE 'not sepsis3' END AS sepsis_status + FROM icustays ie + LEFT JOIN sepsis3 s ON s.icustay_id = ie.icustay_id +) +SELECT c.sepsis_status + , count(*) AS n_stays + , sum(CASE WHEN adm.hospital_expire_flag = 1 THEN 1 ELSE 0 END) + AS n_died + , round(100.0 * sum(CASE WHEN adm.hospital_expire_flag = 1 THEN 1 ELSE 0 END) + / count(*), 1) AS pct_died +FROM cohort c +JOIN admissions adm ON adm.hadm_id = c.hadm_id +GROUP BY c.sepsis_status +ORDER BY c.sepsis_status DESC; + + +\echo +\echo '==================================================================' +\echo '13. Sepsis-3 vs SAPS-II (cross-score validation)' +\echo '==================================================================' +\echo "EXPECTED:" +\echo " Septic patients should have higher mean SAPS-II than non-septic" +\echo " (typically by ~10-15 points)." +\echo " This sanity check requires that you have already run" +\echo " build_sapsii.sql. If sapsii does not exist, this section" +\echo " will error -- skip it with -v ON_ERROR_STOP=0." +\echo +SELECT CASE WHEN s.sepsis3 IS TRUE THEN 'sepsis3' ELSE 'not sepsis3' END + AS sepsis_status + , count(*) AS n + , round(avg(sa.sapsii)::numeric, 1) AS mean_sapsii + , round(avg(sa.sapsii_prob)::numeric, 3) AS mean_predicted_mortality +FROM icustays ie +LEFT JOIN sepsis3 s ON s.icustay_id = ie.icustay_id +LEFT JOIN sapsii sa ON sa.icustay_id = ie.icustay_id +GROUP BY (s.sepsis3 IS TRUE) +ORDER BY 1 DESC; + + +\echo +\echo '==================================================================' +\echo '14. Spot-check a few stays end-to-end' +\echo '==================================================================' +\echo "Pulls 5 random Sepsis-3 = TRUE stays and shows you the trajectory" +\echo "of sofa_24hours alongside the suspected_infection_time. Eyeball:" +\echo " - sofa_24hours should be >= 2 at hours surrounding the onset" +\echo " - sofa_24hours should plausibly rise then fall over the stay" +\echo " - hour numbering should be consecutive" +\echo +WITH picks AS ( + SELECT icustay_id, suspected_infection_time + FROM sepsis3 WHERE sepsis3 = TRUE + ORDER BY md5(icustay_id::text) + LIMIT 5 +) +SELECT p.icustay_id + , p.suspected_infection_time + , h.hr + , h.endtime + , h.respiration_24hours AS resp + , h.coagulation_24hours AS coag + , h.liver_24hours AS liv + , h.cardiovascular_24hours AS cardio + , h.cns_24hours AS cns + , h.renal_24hours AS ren + , h.sofa_24hours AS sofa +FROM picks p +JOIN sofa_hourly h ON h.icustay_id = p.icustay_id +WHERE h.endtime BETWEEN p.suspected_infection_time - INTERVAL '6 hours' + AND p.suspected_infection_time + INTERVAL '6 hours' +ORDER BY p.icustay_id, h.hr; + + +\echo +\echo 'All sanity checks complete. Anything way off the expected ranges' +\echo 'above is worth investigating before relying on the sepsis-3 cohort.' diff --git a/sql/sepsis/sepsis3.sql b/sql/sepsis/sepsis3.sql new file mode 100644 index 0000000..6858b8a --- /dev/null +++ b/sql/sepsis/sepsis3.sql @@ -0,0 +1,90 @@ +-- ------------------------------------------------------------------ +-- Title: Sepsis-3 onset +-- +-- Adapted from the MIMIC-IV upstream +-- concepts/sepsis/sepsis3.sql +-- and ported to MIMIC-III v1.3 vanilla PostgreSQL. +-- +-- Definition (Singer et al., JAMA 2016): +-- Sepsis-3 = SOFA >= 2 AND suspicion of infection, +-- where the SOFA window must overlap the suspected-infection time +-- by at most 48 h before / 24 h after. +-- The "onset time" is the suspected-infection-time of the earliest +-- row that satisfies these criteria for each ICU stay. +-- +-- Implicitly assumes baseline SOFA = 0 prior to ICU admission, since +-- we do not have premorbid organ-dysfunction data. +-- +-- Dependencies: +-- sepsis/suspicion_of_infection.sql +-- sepsis/sofa_hourly.sql +-- ------------------------------------------------------------------ + +DROP TABLE IF EXISTS sepsis3; +CREATE TABLE sepsis3 AS + +WITH sofa AS +( + SELECT icustay_id + , starttime + , endtime + , respiration_24hours AS respiration + , coagulation_24hours AS coagulation + , liver_24hours AS liver + , cardiovascular_24hours AS cardiovascular + , cns_24hours AS cns + , renal_24hours AS renal + , sofa_24hours AS sofa_score + FROM sofa_hourly + WHERE sofa_24hours >= 2 +) +, s1 AS +( + SELECT soi.subject_id + , soi.icustay_id + , soi.ab_id + , soi.antibiotic + , soi.antibiotic_time + , soi.culture_time + , soi.suspected_infection + , soi.suspected_infection_time + , soi.specimen + , soi.positive_culture + , sofa.starttime + , sofa.endtime + , sofa.respiration + , sofa.coagulation + , sofa.liver + , sofa.cardiovascular + , sofa.cns + , sofa.renal + , sofa.sofa_score + , (sofa.sofa_score >= 2 AND soi.suspected_infection = 1) AS sepsis3 + , ROW_NUMBER() OVER ( + PARTITION BY soi.icustay_id + ORDER BY soi.suspected_infection_time + , soi.antibiotic_time + , soi.culture_time + , sofa.endtime + ) AS rn_sus + FROM suspicion_of_infection soi + INNER JOIN sofa + ON soi.icustay_id = sofa.icustay_id + AND sofa.endtime >= DATETIME_SUB(soi.suspected_infection_time, INTERVAL '48' HOUR) + AND sofa.endtime <= DATETIME_ADD(soi.suspected_infection_time, INTERVAL '24' HOUR) + WHERE soi.icustay_id IS NOT NULL + AND soi.suspected_infection_time IS NOT NULL +) +SELECT subject_id + , icustay_id + , antibiotic_time + , culture_time + , suspected_infection_time + , endtime AS sofa_time + , sofa_score + , respiration, coagulation, liver, cardiovascular, cns, renal + , sepsis3 +FROM s1 +WHERE rn_sus = 1; + +CREATE INDEX IF NOT EXISTS sepsis3_idx ON sepsis3 (icustay_id); diff --git a/sql/sepsis/sofa_hourly.sql b/sql/sepsis/sofa_hourly.sql new file mode 100644 index 0000000..ce6d94f --- /dev/null +++ b/sql/sepsis/sofa_hourly.sql @@ -0,0 +1,397 @@ +-- ------------------------------------------------------------------ +-- Title: Hourly Sequential Organ Failure Assessment (SOFA) +-- +-- Adapted from the MIMIC-IV upstream +-- concepts/score/sofa.sql +-- and ported to MIMIC-III v1.3 vanilla PostgreSQL. +-- +-- Produces one row per (icustay_id, hr) for every hour of the ICU +-- stay, with both the per-component score AT that hour and the +-- 24-hour rolling MAX of each component (which is the value used by +-- Sepsis-3). Final column `sofa_24hours` is the sum of the six +-- 24-hour rolling maxes. +-- +-- Differences vs. MIMIC-IV upstream (search this file for "PORT NOTE"): +-- 1. ID column is `icustay_id`, not `stay_id`. +-- 2. There is no `icustay_hourly` derived table in MIMIC-III; we +-- build the hourly grid inline with `generate_series`. +-- 3. There is no `mimic_derived.ventilation` with a fine-grained +-- `ventilation_status='InvasiveVent'` flag in MIMIC-III; we use +-- the lumped `ventilation_durations` table, so any active +-- ventilation row is treated as invasive ventilation for the +-- purpose of the PaO2:FiO2 vent/novent split. +-- 4. We replicate MIMIC-IV's `urine_output_rate` adjustment inline +-- in `sofa_uo`: we materialise both `uo_24hr` (sum) and +-- `uo_tm_24hr` (count of distinct hours that actually had a UO +-- observation in the past 24 h), and the renal CASE in (11) +-- uses +-- GREATEST(uo_24hr, 0) * 24.0 / uo_tm_24hr +-- only when uo_tm_24hr BETWEEN 22 AND 30, falling back to +-- creatinine alone otherwise. The GREATEST(_, 0) clip prevents +-- patients on continuous bladder irrigation (which the upstream +-- `urine_output.sql` subtracts as a negative volume) from being +-- mis-scored as oliguric. +-- 5. Vasopressor rates come from the upstream +-- durations/{epinephrine,norepinephrine,dopamine,dobutamine}_dose.sql +-- tables, which already merge CareVue + MetaVision and convert +-- to mcg/kg/min. +-- +-- Implementation note: each measurement class is materialised into +-- its own narrow staging table. This avoids forcing the planner to +-- optimise a single ~10-way CTE join, lets each scan of the giant raw +-- tables (`chartevents`, `labevents`, `outputevents`, +-- `inputevents_*`) run independently, and lets you `EXPLAIN ANALYZE` +-- each step in isolation. +-- +-- Dependencies: +-- postgres-functions.sql +-- durations/ventilation_durations.sql +-- durations/{dobutamine,dopamine,epinephrine,norepinephrine}_dose.sql +-- fluid_balance/urine_output.sql +-- sepsis/blood_gas_arterial.sql +-- sepsis/gcs_all.sql +-- ------------------------------------------------------------------ + +-- 1. Hourly grid: one row per (icustay_id, hr) for the entire stay. +DROP TABLE IF EXISTS sofa_grid; +CREATE TABLE sofa_grid AS +SELECT ie.subject_id, ie.hadm_id, ie.icustay_id + , gs.hr + , ie.intime + ((gs.hr - 1) * INTERVAL '1 hour') AS starttime + , ie.intime + (gs.hr * INTERVAL '1 hour') AS endtime +FROM icustays ie + , LATERAL generate_series( + 1, + GREATEST(1, + CEIL(EXTRACT(EPOCH FROM (ie.outtime - ie.intime)) / 3600.0)::int + ) + ) AS gs(hr) +WHERE ie.outtime IS NOT NULL + AND ie.outtime > ie.intime; + +CREATE INDEX IF NOT EXISTS sofa_grid_idx ON sofa_grid (icustay_id, hr); +CREATE INDEX IF NOT EXISTS sofa_grid_time_idx + ON sofa_grid (icustay_id, starttime, endtime); +ANALYZE sofa_grid; + +-- 2. Mean arterial pressure: minimum within the hour. +DROP TABLE IF EXISTS sofa_vs; +CREATE TABLE sofa_vs AS +SELECT g.icustay_id, g.hr + , MIN(ce.valuenum) AS meanbp_min +FROM sofa_grid g +LEFT JOIN chartevents ce + ON ce.icustay_id = g.icustay_id + AND ce.charttime > g.starttime + AND ce.charttime <= g.endtime + AND ce.itemid IN (456, 52, 6702, 443, 220052, 220181, 225312) + AND ce.valuenum > 0 AND ce.valuenum < 300 + AND COALESCE(ce.error, 0) = 0 +GROUP BY g.icustay_id, g.hr; +CREATE INDEX IF NOT EXISTS sofa_vs_idx ON sofa_vs (icustay_id, hr); +ANALYZE sofa_vs; + +-- 3. GCS: minimum within the hour (uses gcs_all carry-forward logic). +DROP TABLE IF EXISTS sofa_gcs; +CREATE TABLE sofa_gcs AS +SELECT g.icustay_id, g.hr + , MIN(gc.gcs) AS gcs_min +FROM sofa_grid g +LEFT JOIN gcs_all gc + ON gc.icustay_id = g.icustay_id + AND gc.charttime > g.starttime + AND gc.charttime <= g.endtime +GROUP BY g.icustay_id, g.hr; +CREATE INDEX IF NOT EXISTS sofa_gcs_idx ON sofa_gcs (icustay_id, hr); +ANALYZE sofa_gcs; + +-- 4. Bilirubin: maximum within the hour. +DROP TABLE IF EXISTS sofa_bili; +CREATE TABLE sofa_bili AS +SELECT g.icustay_id, g.hr + , MAX(le.valuenum) AS bilirubin_max +FROM sofa_grid g +INNER JOIN icustays ie + ON ie.icustay_id = g.icustay_id +LEFT JOIN labevents le + ON le.subject_id = ie.subject_id + AND le.hadm_id = ie.hadm_id + AND le.charttime > g.starttime + AND le.charttime <= g.endtime + AND le.itemid = 50885 + AND le.valuenum IS NOT NULL + AND le.valuenum > 0 +GROUP BY g.icustay_id, g.hr; +CREATE INDEX IF NOT EXISTS sofa_bili_idx ON sofa_bili (icustay_id, hr); +ANALYZE sofa_bili; + +-- 5. Creatinine: maximum within the hour. +DROP TABLE IF EXISTS sofa_cr; +CREATE TABLE sofa_cr AS +SELECT g.icustay_id, g.hr + , MAX(le.valuenum) AS creatinine_max +FROM sofa_grid g +INNER JOIN icustays ie + ON ie.icustay_id = g.icustay_id +LEFT JOIN labevents le + ON le.subject_id = ie.subject_id + AND le.hadm_id = ie.hadm_id + AND le.charttime > g.starttime + AND le.charttime <= g.endtime + AND le.itemid = 50912 + AND le.valuenum IS NOT NULL + AND le.valuenum > 0 + AND le.valuenum < 150 -- sanity (mg/dL) +GROUP BY g.icustay_id, g.hr; +CREATE INDEX IF NOT EXISTS sofa_cr_idx ON sofa_cr (icustay_id, hr); +ANALYZE sofa_cr; + +-- 6. Platelets: minimum within the hour. +DROP TABLE IF EXISTS sofa_plt; +CREATE TABLE sofa_plt AS +SELECT g.icustay_id, g.hr + , MIN(le.valuenum) AS platelet_min +FROM sofa_grid g +INNER JOIN icustays ie + ON ie.icustay_id = g.icustay_id +LEFT JOIN labevents le + ON le.subject_id = ie.subject_id + AND le.hadm_id = ie.hadm_id + AND le.charttime > g.starttime + AND le.charttime <= g.endtime + AND le.itemid = 51265 + AND le.valuenum IS NOT NULL + AND le.valuenum > 0 +GROUP BY g.icustay_id, g.hr; +CREATE INDEX IF NOT EXISTS sofa_plt_idx ON sofa_plt (icustay_id, hr); +ANALYZE sofa_plt; + +-- 7. PaO2/FiO2: split into vent / novent based on whether the patient +-- is on invasive ventilation at the time of the blood gas. We +-- take the worst (lowest) PaO2:FiO2 of each kind during the hour. +DROP TABLE IF EXISTS sofa_pf; +CREATE TABLE sofa_pf AS +WITH pafi AS ( + SELECT bg.icustay_id, bg.charttime + , CASE WHEN vd.icustay_id IS NULL THEN bg.pao2fio2 END AS pao2fio2_novent + , CASE WHEN vd.icustay_id IS NOT NULL THEN bg.pao2fio2 END AS pao2fio2_vent + FROM blood_gas_arterial bg + LEFT JOIN ventilation_durations vd + ON bg.icustay_id = vd.icustay_id + AND bg.charttime >= vd.starttime + AND bg.charttime <= vd.endtime + WHERE bg.pao2fio2 IS NOT NULL +) +SELECT g.icustay_id, g.hr + , MIN(p.pao2fio2_novent) AS pao2fio2_novent + , MIN(p.pao2fio2_vent) AS pao2fio2_vent +FROM sofa_grid g +LEFT JOIN pafi p + ON p.icustay_id = g.icustay_id + AND p.charttime > g.starttime + AND p.charttime <= g.endtime +GROUP BY g.icustay_id, g.hr; +CREATE INDEX IF NOT EXISTS sofa_pf_idx ON sofa_pf (icustay_id, hr); +ANALYZE sofa_pf; + +-- 8. Urine output: sum over the preceding 24 h plus a count of how many +-- distinct hours actually had a UO observation (`uo_tm_24hr`). This +-- matches the MIMIC-IV `urine_output_rate` table's two columns and +-- lets the renal CASE distinguish "no data" from "really oliguric". +-- The renal score in (11) requires uo_tm_24hr to be in a plausible +-- range before the UO branch fires; otherwise it falls back to +-- creatinine alone. See port note #3 in the file header. +DROP TABLE IF EXISTS sofa_uo; +CREATE TABLE sofa_uo AS +SELECT g.icustay_id, g.hr + , SUM(uo.value) AS uo_24hr + , COUNT(DISTINCT date_trunc('hour', uo.charttime)) AS uo_tm_24hr +FROM sofa_grid g +LEFT JOIN urine_output uo + ON uo.icustay_id = g.icustay_id + AND uo.charttime > DATETIME_SUB(g.endtime, INTERVAL '24' HOUR) + AND uo.charttime <= g.endtime +GROUP BY g.icustay_id, g.hr; +CREATE INDEX IF NOT EXISTS sofa_uo_idx ON sofa_uo (icustay_id, hr); +ANALYZE sofa_uo; + +-- 9. Vasopressor rates: take the maximum rate of each pressor active at +-- `endtime`. Each upstream dose table is (icustay_id, starttime, +-- endtime, vaso_rate, vaso_amount). +DROP TABLE IF EXISTS sofa_vaso; +CREATE TABLE sofa_vaso AS +SELECT g.icustay_id, g.hr + , MAX(epi.vaso_rate) AS rate_epinephrine + , MAX(nor.vaso_rate) AS rate_norepinephrine + , MAX(dop.vaso_rate) AS rate_dopamine + , MAX(dob.vaso_rate) AS rate_dobutamine +FROM sofa_grid g +LEFT JOIN epinephrine_dose epi + ON epi.icustay_id = g.icustay_id + AND g.endtime > epi.starttime + AND g.endtime <= epi.endtime +LEFT JOIN norepinephrine_dose nor + ON nor.icustay_id = g.icustay_id + AND g.endtime > nor.starttime + AND g.endtime <= nor.endtime +LEFT JOIN dopamine_dose dop + ON dop.icustay_id = g.icustay_id + AND g.endtime > dop.starttime + AND g.endtime <= dop.endtime +LEFT JOIN dobutamine_dose dob + ON dob.icustay_id = g.icustay_id + AND g.endtime > dob.starttime + AND g.endtime <= dob.endtime +WHERE epi.icustay_id IS NOT NULL + OR nor.icustay_id IS NOT NULL + OR dop.icustay_id IS NOT NULL + OR dob.icustay_id IS NOT NULL +GROUP BY g.icustay_id, g.hr; +CREATE INDEX IF NOT EXISTS sofa_vaso_idx ON sofa_vaso (icustay_id, hr); +ANALYZE sofa_vaso; + +-- 10. Wide assembly: left-join all of the above onto the grid. +DROP TABLE IF EXISTS sofa_wide; +CREATE TABLE sofa_wide AS +SELECT g.subject_id, g.hadm_id, g.icustay_id, g.hr + , g.starttime, g.endtime + , vs.meanbp_min + , gc.gcs_min + , bi.bilirubin_max + , cr.creatinine_max + , pl.platelet_min + , pf.pao2fio2_novent + , pf.pao2fio2_vent + , uo.uo_24hr + , uo.uo_tm_24hr + , va.rate_epinephrine + , va.rate_norepinephrine + , va.rate_dopamine + , va.rate_dobutamine +FROM sofa_grid g +LEFT JOIN sofa_vs vs ON vs.icustay_id = g.icustay_id AND vs.hr = g.hr +LEFT JOIN sofa_gcs gc ON gc.icustay_id = g.icustay_id AND gc.hr = g.hr +LEFT JOIN sofa_bili bi ON bi.icustay_id = g.icustay_id AND bi.hr = g.hr +LEFT JOIN sofa_cr cr ON cr.icustay_id = g.icustay_id AND cr.hr = g.hr +LEFT JOIN sofa_plt pl ON pl.icustay_id = g.icustay_id AND pl.hr = g.hr +LEFT JOIN sofa_pf pf ON pf.icustay_id = g.icustay_id AND pf.hr = g.hr +LEFT JOIN sofa_uo uo ON uo.icustay_id = g.icustay_id AND uo.hr = g.hr +LEFT JOIN sofa_vaso va ON va.icustay_id = g.icustay_id AND va.hr = g.hr; +CREATE INDEX IF NOT EXISTS sofa_wide_idx ON sofa_wide (icustay_id, hr); +ANALYZE sofa_wide; + +-- 11. Per-hour component scores (no rolling window yet). +DROP TABLE IF EXISTS sofa_components; +CREATE TABLE sofa_components AS +SELECT w.* + -- Respiration + , CASE + WHEN pao2fio2_vent < 100 THEN 4 + WHEN pao2fio2_vent < 200 THEN 3 + WHEN pao2fio2_novent < 300 THEN 2 + WHEN pao2fio2_vent < 300 THEN 2 + WHEN pao2fio2_novent < 400 THEN 1 + WHEN pao2fio2_vent < 400 THEN 1 + WHEN COALESCE(pao2fio2_vent, pao2fio2_novent) IS NULL THEN NULL + ELSE 0 + END AS respiration + + -- Coagulation + , CASE + WHEN platelet_min < 20 THEN 4 + WHEN platelet_min < 50 THEN 3 + WHEN platelet_min < 100 THEN 2 + WHEN platelet_min < 150 THEN 1 + WHEN platelet_min IS NULL THEN NULL + ELSE 0 + END AS coagulation + + -- Liver (mg/dL) + , CASE + WHEN bilirubin_max >= 12.0 THEN 4 + WHEN bilirubin_max >= 6.0 THEN 3 + WHEN bilirubin_max >= 2.0 THEN 2 + WHEN bilirubin_max >= 1.2 THEN 1 + WHEN bilirubin_max IS NULL THEN NULL + ELSE 0 + END AS liver + + -- Cardiovascular + , CASE + WHEN rate_dopamine > 15 OR rate_epinephrine > 0.1 OR rate_norepinephrine > 0.1 THEN 4 + WHEN rate_dopamine > 5 OR rate_epinephrine <= 0.1 OR rate_norepinephrine <= 0.1 THEN 3 + WHEN rate_dopamine > 0 OR rate_dobutamine > 0 THEN 2 + WHEN meanbp_min < 70 THEN 1 + WHEN COALESCE(meanbp_min, rate_dopamine, rate_dobutamine, + rate_epinephrine, rate_norepinephrine) IS NULL THEN NULL + ELSE 0 + END AS cardiovascular + + -- CNS (GCS) + , CASE + WHEN gcs_min >= 13 AND gcs_min <= 14 THEN 1 + WHEN gcs_min >= 10 AND gcs_min <= 12 THEN 2 + WHEN gcs_min >= 6 AND gcs_min <= 9 THEN 3 + WHEN gcs_min < 6 THEN 4 + WHEN gcs_min IS NULL THEN NULL + ELSE 0 + END AS cns + + -- Renal + -- We scale uo_24hr to a 24h-equivalent only when the rolling + -- window has at least 22 distinct hours of observations + -- (matching MIMIC-IV's `urine_output_rate` upper-bound check). + -- If fewer than 22 valid hours exist, the UO branch is treated + -- as missing and the renal score falls back to creatinine alone. + -- We also clip GU-irrigant negative net values to zero so a + -- patient on continuous bladder irrigation isn't mis-scored as + -- oliguric. + , CASE + WHEN creatinine_max >= 5.0 THEN 4 + WHEN uo_tm_24hr BETWEEN 22 AND 30 + AND GREATEST(uo_24hr, 0) * 24.0 / uo_tm_24hr < 200 THEN 4 + WHEN creatinine_max >= 3.5 AND creatinine_max < 5.0 THEN 3 + WHEN uo_tm_24hr BETWEEN 22 AND 30 + AND GREATEST(uo_24hr, 0) * 24.0 / uo_tm_24hr < 500 THEN 3 + WHEN creatinine_max >= 2.0 AND creatinine_max < 3.5 THEN 2 + WHEN creatinine_max >= 1.2 AND creatinine_max < 2.0 THEN 1 + WHEN creatinine_max IS NULL + AND NOT (uo_tm_24hr BETWEEN 22 AND 30) THEN NULL + ELSE 0 + END AS renal +FROM sofa_wide w; +CREATE INDEX IF NOT EXISTS sofa_components_idx + ON sofa_components (icustay_id, hr); +ANALYZE sofa_components; + +-- 12. Final hourly SOFA: 24-hour rolling MAX per component, summed. +DROP TABLE IF EXISTS sofa_hourly; +CREATE TABLE sofa_hourly AS +SELECT s.subject_id, s.hadm_id, s.icustay_id, s.hr + , s.starttime, s.endtime + , s.respiration, s.coagulation, s.liver + , s.cardiovascular, s.cns, s.renal + , COALESCE(MAX(s.respiration) OVER w, 0) AS respiration_24hours + , COALESCE(MAX(s.coagulation) OVER w, 0) AS coagulation_24hours + , COALESCE(MAX(s.liver) OVER w, 0) AS liver_24hours + , COALESCE(MAX(s.cardiovascular) OVER w, 0) AS cardiovascular_24hours + , COALESCE(MAX(s.cns) OVER w, 0) AS cns_24hours + , COALESCE(MAX(s.renal) OVER w, 0) AS renal_24hours + , COALESCE(MAX(s.respiration) OVER w, 0) + + COALESCE(MAX(s.coagulation) OVER w, 0) + + COALESCE(MAX(s.liver) OVER w, 0) + + COALESCE(MAX(s.cardiovascular) OVER w, 0) + + COALESCE(MAX(s.cns) OVER w, 0) + + COALESCE(MAX(s.renal) OVER w, 0) + AS sofa_24hours +FROM sofa_components s +WINDOW w AS ( + PARTITION BY s.icustay_id + ORDER BY s.hr + ROWS BETWEEN 23 PRECEDING AND CURRENT ROW +); +CREATE INDEX IF NOT EXISTS sofa_hourly_idx + ON sofa_hourly (icustay_id, hr); +CREATE INDEX IF NOT EXISTS sofa_hourly_time_idx + ON sofa_hourly (icustay_id, endtime); +ANALYZE sofa_hourly; diff --git a/sql/sepsis/suspicion_of_infection.sql b/sql/sepsis/suspicion_of_infection.sql new file mode 100644 index 0000000..a1cf3f3 --- /dev/null +++ b/sql/sepsis/suspicion_of_infection.sql @@ -0,0 +1,153 @@ +-- ------------------------------------------------------------------ +-- Title: Suspicion of Infection +-- +-- Adapted from the MIMIC-IV upstream +-- concepts/sepsis/suspicion_of_infection.sql +-- and ported to MIMIC-III v1.3 vanilla PostgreSQL. +-- +-- Definition (from the original Sepsis-3 paper, Seymour 2016): +-- a patient is "suspected of infection" if a culture and an +-- antibiotic are ordered close in time: +-- - culture <= 72 h before antibiotic, OR +-- - culture <= 24 h after antibiotic. +-- The antibiotic time is taken as the suspected-infection time when +-- a culture comes second; the culture time when it comes first. +-- +-- PORT NOTES: +-- 1. ID column is `icustay_id`, not `stay_id`. +-- 2. MIMIC-III has no `micro_specimen_id`; specimens are identified +-- by the tuple (subject_id, hadm_id, chartdate, charttime, +-- spec_itemid, spec_type_desc) and we deduplicate organism +-- rows by aggregating with that tuple. +-- 3. MIMIC-III `prescriptions.startdate` is DATE-precision only. +-- Consequently `antibiotic_time` always lands on midnight; the +-- MIMIC-IV branches that compare to `me.charttime` still work +-- (DATE auto-casts to TIMESTAMP at 00:00) but give day-level +-- onset precision. +-- ------------------------------------------------------------------ + +DROP TABLE IF EXISTS suspicion_of_infection; +CREATE TABLE suspicion_of_infection AS + +WITH ab_tbl AS +( + SELECT abx.subject_id + , abx.hadm_id + , abx.icustay_id + , abx.antibiotic + , CAST(abx.starttime AS TIMESTAMP) AS antibiotic_time + , CAST(abx.starttime AS DATE) AS antibiotic_date + , CAST(abx.stoptime AS TIMESTAMP) AS antibiotic_stoptime + , ROW_NUMBER() OVER ( + PARTITION BY abx.subject_id + ORDER BY abx.starttime, abx.stoptime, abx.antibiotic + ) AS ab_id + FROM antibiotic abx +) +, me AS +( + SELECT subject_id, hadm_id + , spec_itemid + , spec_type_desc + , MAX(CAST(chartdate AS DATE)) AS chartdate + , MAX(charttime) AS charttime + , MAX(CASE WHEN org_name IS NOT NULL AND org_name != '' + THEN 1 ELSE 0 END) AS positiveculture + FROM microbiologyevents + GROUP BY subject_id, hadm_id, chartdate, charttime, + spec_itemid, spec_type_desc +) +, me_then_ab AS +( + SELECT ab_tbl.subject_id + , ab_tbl.hadm_id + , ab_tbl.icustay_id + , ab_tbl.ab_id + , COALESCE(me72.charttime, + CAST(me72.chartdate AS TIMESTAMP)) AS last72_charttime + , me72.positiveculture AS last72_positiveculture + , me72.spec_type_desc AS last72_specimen + , ROW_NUMBER() OVER ( + PARTITION BY ab_tbl.subject_id, ab_tbl.ab_id + ORDER BY me72.chartdate, me72.charttime NULLS LAST + ) AS micro_seq + FROM ab_tbl + LEFT JOIN me me72 + ON ab_tbl.subject_id = me72.subject_id + AND ( + ( + me72.charttime IS NOT NULL + AND ab_tbl.antibiotic_time > me72.charttime + AND ab_tbl.antibiotic_time <= DATETIME_ADD(me72.charttime, INTERVAL '72' HOUR) + ) + OR ( + me72.charttime IS NULL + AND ab_tbl.antibiotic_date >= me72.chartdate + AND ab_tbl.antibiotic_date <= me72.chartdate + INTERVAL '3 day' + ) + ) +) +, ab_then_me AS +( + SELECT ab_tbl.subject_id + , ab_tbl.hadm_id + , ab_tbl.icustay_id + , ab_tbl.ab_id + , COALESCE(me24.charttime, + CAST(me24.chartdate AS TIMESTAMP)) AS next24_charttime + , me24.positiveculture AS next24_positiveculture + , me24.spec_type_desc AS next24_specimen + , ROW_NUMBER() OVER ( + PARTITION BY ab_tbl.subject_id, ab_tbl.ab_id + ORDER BY me24.chartdate, me24.charttime NULLS LAST + ) AS micro_seq + FROM ab_tbl + LEFT JOIN me me24 + ON ab_tbl.subject_id = me24.subject_id + AND ( + ( + me24.charttime IS NOT NULL + AND ab_tbl.antibiotic_time >= DATETIME_SUB(me24.charttime, INTERVAL '24' HOUR) + AND ab_tbl.antibiotic_time < me24.charttime + ) + OR ( + me24.charttime IS NULL + AND ab_tbl.antibiotic_date >= me24.chartdate - INTERVAL '1 day' + AND ab_tbl.antibiotic_date <= me24.chartdate + ) + ) +) +SELECT ab_tbl.subject_id + , ab_tbl.icustay_id + , ab_tbl.hadm_id + , ab_tbl.ab_id + , ab_tbl.antibiotic + , ab_tbl.antibiotic_time + + , CASE + WHEN me2ab.last72_specimen IS NULL AND ab2me.next24_specimen IS NULL + THEN 0 ELSE 1 + END AS suspected_infection + + , CASE + WHEN me2ab.last72_specimen IS NULL AND ab2me.next24_specimen IS NULL + THEN NULL + ELSE COALESCE(me2ab.last72_charttime, ab_tbl.antibiotic_time) + END AS suspected_infection_time + + , COALESCE(me2ab.last72_charttime, ab2me.next24_charttime) AS culture_time + , COALESCE(me2ab.last72_specimen, ab2me.next24_specimen) AS specimen + , COALESCE(me2ab.last72_positiveculture, + ab2me.next24_positiveculture) AS positive_culture +FROM ab_tbl +LEFT JOIN ab_then_me ab2me + ON ab_tbl.subject_id = ab2me.subject_id + AND ab_tbl.ab_id = ab2me.ab_id + AND ab2me.micro_seq = 1 +LEFT JOIN me_then_ab me2ab + ON ab_tbl.subject_id = me2ab.subject_id + AND ab_tbl.ab_id = me2ab.ab_id + AND me2ab.micro_seq = 1; + +CREATE INDEX IF NOT EXISTS suspicion_of_infection_idx + ON suspicion_of_infection (icustay_id, suspected_infection_time); diff --git a/sql/severityscores/sapsii.sql b/sql/severityscores/sapsii.sql new file mode 100644 index 0000000..3ab3c34 --- /dev/null +++ b/sql/severityscores/sapsii.sql @@ -0,0 +1,384 @@ +-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY. +DROP TABLE IF EXISTS sapsii; CREATE TABLE sapsii AS +-- ------------------------------------------------------------------ +-- Title: Simplified Acute Physiology Score II (SAPS II) +-- This query extracts the simplified acute physiology score II. +-- This score is a measure of patient severity of illness. +-- The score is calculated on the first day of each ICU patients' stay. +-- ------------------------------------------------------------------ + +-- Reference for SAPS II: +-- Le Gall, Jean-Roger, Stanley Lemeshow, and Fabienne Saulnier. +-- "A new simplified acute physiology score (SAPS II) based on a European/North American multicenter study." +-- JAMA 270, no. 24 (1993): 2957-2963. + +-- Variables used in SAPS II: +-- Age, GCS +-- VITALS: Heart rate, systolic blood pressure, temperature +-- FLAGS: ventilation/cpap +-- IO: urine output +-- LABS: PaO2/FiO2 ratio, blood urea nitrogen, WBC, potassium, sodium, HCO3 + +-- The following views are required to run this query: +-- 1) urine_output_first_day - generated by urine-output-first-day.sql +-- 2) ventilation_durations - generated by ventilation_durations.sql +-- 3) vitals_first_day - generated by vitals-first-day.sql +-- 4) gcs_first_day - generated by gcs-first-day.sql +-- 5) labs_first_day - generated by labs-first-day.sql +-- 6) blood_gas_arterial_first_day - generated by blood-gas-first-day-arterial.sql + +-- Note: +-- The score is calculated for *all* ICU patients, with the assumption that the user will subselect appropriate ICUSTAY_IDs. +-- For example, the score is calculated for neonates, but it is likely inappropriate to actually use the score values for these patients. + +-- extract CPAP from the "Oxygen Delivery Device" fields +with cpap as +( + select ie.icustay_id + , min(DATETIME_SUB(charttime, INTERVAL '1' HOUR)) as starttime + , max(DATETIME_ADD(charttime, INTERVAL '4' HOUR)) as endtime + , max(CASE + WHEN lower(ce.value) LIKE '%cpap%' THEN 1 + WHEN lower(ce.value) LIKE '%bipap mask%' THEN 1 + else 0 end) as cpap + FROM icustays ie + inner join chartevents ce + on ie.icustay_id = ce.icustay_id + and ce.charttime between ie.intime and DATETIME_ADD(ie.intime, INTERVAL '1' DAY) + where itemid in + ( + -- TODO: when metavision data import fixed, check the values in 226732 match the value clause below + 467, 469, 226732 + ) + and (lower(ce.value) LIKE '%cpap%' or lower(ce.value) LIKE '%bipap mask%') + -- exclude rows marked as error + AND (ce.error IS NULL OR ce.error = 0) + group by ie.icustay_id +) +-- extract a flag for surgical service +-- this combined with "elective" FROM admissions table defines elective/non-elective surgery +, surgflag as +( + select adm.hadm_id + , case when lower(curr_service) like '%surg%' then 1 else 0 end as surgical + , ROW_NUMBER() over + ( + PARTITION BY adm.HADM_ID + ORDER BY TRANSFERTIME + ) as serviceOrder + FROM admissions adm + left join services se + on adm.hadm_id = se.hadm_id +) +-- icd-9 diagnostic codes are our best source for comorbidity information +-- unfortunately, they are technically a-causal +-- however, this shouldn't matter too much for the SAPS II comorbidities +, comorb as +( +select hadm_id +-- these are slightly different than elixhauser comorbidities, but based on them +-- they include some non-comorbid ICD-9 codes (e.g. 20302, relapse of multiple myeloma) + , max(CASE + when SUBSTR(icd9_code,1,3) BETWEEN '042' AND '044' THEN 1 + end) as aids /* HIV and AIDS */ + , max(CASE + when icd9_code between '20000' and '20238' then 1 -- lymphoma + when icd9_code between '20240' and '20248' then 1 -- leukemia + when icd9_code between '20250' and '20302' then 1 -- lymphoma + when icd9_code between '20310' and '20312' then 1 -- leukemia + when icd9_code between '20302' and '20382' then 1 -- lymphoma + when icd9_code between '20400' and '20522' then 1 -- chronic leukemia + when icd9_code between '20580' and '20702' then 1 -- other myeloid leukemia + when icd9_code between '20720' and '20892' then 1 -- other myeloid leukemia + when SUBSTR(icd9_code,1,4) = '2386' then 1 -- lymphoma + when SUBSTR(icd9_code,1,4) = '2733' then 1 -- lymphoma + end) as hem + , max(CASE + when SUBSTR(icd9_code,1,4) BETWEEN '1960' AND '1991' THEN 1 + when icd9_code between '20970' and '20975' then 1 + when icd9_code = '20979' then 1 + when icd9_code = '78951' then 1 + end) as mets /* Metastatic cancer */ + from diagnoses_icd + group by hadm_id +) +, pafi1 as +( + -- join blood gas to ventilation durations to determine if patient was vent + -- also join to cpap table for the same purpose + select bg.icustay_id, bg.charttime + , pao2fio2 + , case when vd.icustay_id is not null then 1 else 0 end as vent + , case when cp.icustay_id is not null then 1 else 0 end as cpap + from blood_gas_first_day_arterial bg + left join ventilation_durations vd + on bg.icustay_id = vd.icustay_id + and bg.charttime >= vd.starttime + and bg.charttime <= vd.endtime + left join cpap cp + on bg.icustay_id = cp.icustay_id + and bg.charttime >= cp.starttime + and bg.charttime <= cp.endtime +) +, pafi2 as +( + -- get the minimum PaO2/FiO2 ratio *only for ventilated/cpap patients* + select icustay_id + , min(pao2fio2) as pao2fio2_vent_min + from pafi1 + where vent = 1 or cpap = 1 + group by icustay_id +) +, cohort as +( +select ie.subject_id, ie.hadm_id, ie.icustay_id + , ie.intime + , ie.outtime + + -- the casts ensure the result is numeric.. we could equally extract EPOCH from the interval + -- however this code works in Oracle and Postgres + , DATETIME_DIFF(ie.intime, pat.dob, 'YEAR') as age + + , vital.heartrate_max + , vital.heartrate_min + , vital.sysbp_max + , vital.sysbp_min + , vital.tempc_max + , vital.tempc_min + + -- this value is non-null iff the patient is on vent/cpap + , pf.pao2fio2_vent_min + + , uo.urineoutput + + , labs.bun_min + , labs.bun_max + , labs.wbc_min + , labs.wbc_max + , labs.potassium_min + , labs.potassium_max + , labs.sodium_min + , labs.sodium_max + , labs.bicarbonate_min + , labs.bicarbonate_max + , labs.bilirubin_min + , labs.bilirubin_max + + , gcs.mingcs + + , comorb.aids + , comorb.hem + , comorb.mets + + , case + when adm.ADMISSION_TYPE = 'ELECTIVE' and sf.surgical = 1 + then 'ScheduledSurgical' + when adm.ADMISSION_TYPE != 'ELECTIVE' and sf.surgical = 1 + then 'UnscheduledSurgical' + else 'Medical' + end as admissiontype + + +FROM icustays ie +inner join admissions adm + on ie.hadm_id = adm.hadm_id +inner join patients pat + on ie.subject_id = pat.subject_id + +-- join to above views +left join pafi2 pf + on ie.icustay_id = pf.icustay_id +left join surgflag sf + on adm.hadm_id = sf.hadm_id and sf.serviceOrder = 1 +left join comorb + on ie.hadm_id = comorb.hadm_id + +-- join to custom tables to get more data.... +left join gcs_first_day gcs + on ie.icustay_id = gcs.icustay_id +left join vitals_first_day vital + on ie.icustay_id = vital.icustay_id +left join urine_output_first_day uo + on ie.icustay_id = uo.icustay_id +left join labs_first_day labs + on ie.icustay_id = labs.icustay_id +) +, scorecomp as +( +select + cohort.* + -- Below code calculates the component scores needed for SAPS + , case + when age is null then null + when age < 40 then 0 + when age < 60 then 7 + when age < 70 then 12 + when age < 75 then 15 + when age < 80 then 16 + when age >= 80 then 18 + end as age_score + + , case + when heartrate_max is null then null + when heartrate_min < 40 then 11 + when heartrate_max >= 160 then 7 + when heartrate_max >= 120 then 4 + when heartrate_min < 70 then 2 + when heartrate_max >= 70 and heartrate_max < 120 + and heartrate_min >= 70 and heartrate_min < 120 + then 0 + end as hr_score + + , case + when sysbp_min is null then null + when sysbp_min < 70 then 13 + when sysbp_min < 100 then 5 + when sysbp_max >= 200 then 2 + when sysbp_max >= 100 and sysbp_max < 200 + and sysbp_min >= 100 and sysbp_min < 200 + then 0 + end as sysbp_score + + , case + when tempc_max is null then null + when tempc_min < 39.0 then 0 + when tempc_max >= 39.0 then 3 + end as temp_score + + , case + when pao2fio2_vent_min is null then null + when pao2fio2_vent_min < 100 then 11 + when pao2fio2_vent_min < 200 then 9 + when pao2fio2_vent_min >= 200 then 6 + end as pao2fio2_score + + , case + when urineoutput is null then null + when urineoutput < 500.0 then 11 + when urineoutput < 1000.0 then 4 + when urineoutput >= 1000.0 then 0 + end as uo_score + + , case + when bun_max is null then null + when bun_max < 28.0 then 0 + when bun_max < 84.0 then 6 + when bun_max >= 84.0 then 10 + end as bun_score + + , case + when wbc_max is null then null + when wbc_min < 1.0 then 12 + when wbc_max >= 20.0 then 3 + when wbc_max >= 1.0 and wbc_max < 20.0 + and wbc_min >= 1.0 and wbc_min < 20.0 + then 0 + end as wbc_score + + , case + when potassium_max is null then null + when potassium_min < 3.0 then 3 + when potassium_max >= 5.0 then 3 + when potassium_max >= 3.0 and potassium_max < 5.0 + and potassium_min >= 3.0 and potassium_min < 5.0 + then 0 + end as potassium_score + + , case + when sodium_max is null then null + when sodium_min < 125 then 5 + when sodium_max >= 145 then 1 + when sodium_max >= 125 and sodium_max < 145 + and sodium_min >= 125 and sodium_min < 145 + then 0 + end as sodium_score + + , case + when bicarbonate_max is null then null + when bicarbonate_min < 15.0 then 5 + when bicarbonate_min < 20.0 then 3 + when bicarbonate_max >= 20.0 + and bicarbonate_min >= 20.0 + then 0 + end as bicarbonate_score + + , case + when bilirubin_max is null then null + when bilirubin_max < 4.0 then 0 + when bilirubin_max < 6.0 then 4 + when bilirubin_max >= 6.0 then 9 + end as bilirubin_score + + , case + when mingcs is null then null + when mingcs < 3 then null -- erroneous value/on trach + when mingcs < 6 then 26 + when mingcs < 9 then 13 + when mingcs < 11 then 7 + when mingcs < 14 then 5 + when mingcs >= 14 + and mingcs <= 15 + then 0 + end as gcs_score + + , case + when aids = 1 then 17 + when hem = 1 then 10 + when mets = 1 then 9 + else 0 + end as comorbidity_score + + , case + when admissiontype = 'ScheduledSurgical' then 0 + when admissiontype = 'Medical' then 6 + when admissiontype = 'UnscheduledSurgical' then 8 + else null + end as admissiontype_score + +from cohort +) +-- Calculate SAPS II here so we can use it in the probability calculation below +, score as +( + select s.* + -- coalesce statements impute normal score of zero if data element is missing + , coalesce(age_score,0) + + coalesce(hr_score,0) + + coalesce(sysbp_score,0) + + coalesce(temp_score,0) + + coalesce(pao2fio2_score,0) + + coalesce(uo_score,0) + + coalesce(bun_score,0) + + coalesce(wbc_score,0) + + coalesce(potassium_score,0) + + coalesce(sodium_score,0) + + coalesce(bicarbonate_score,0) + + coalesce(bilirubin_score,0) + + coalesce(gcs_score,0) + + coalesce(comorbidity_score,0) + + coalesce(admissiontype_score,0) + as sapsii + from scorecomp s +) +select ie.subject_id, ie.hadm_id, ie.icustay_id +, sapsii +, 1 / (1 + exp(- (-7.7631 + 0.0737*(sapsii) + 0.9971*(ln(sapsii + 1))) )) as sapsii_prob +, age_score +, hr_score +, sysbp_score +, temp_score +, pao2fio2_score +, uo_score +, bun_score +, wbc_score +, potassium_score +, sodium_score +, bicarbonate_score +, bilirubin_score +, gcs_score +, comorbidity_score +, admissiontype_score +FROM icustays ie +left join score s + on ie.icustay_id = s.icustay_id +order by ie.icustay_id;