Files

398 lines
15 KiB
MySQL
Raw Permalink Normal View History

2026-05-05 10:22:17 +02:00
-- ------------------------------------------------------------------
-- Title: Hourly Sequential Organ Failure Assessment (SOFA)
--
-- Adapted from the MIMIC-IV upstream
-- concepts/score/sofa.sql
-- and ported to MIMIC-III v1.3 vanilla PostgreSQL.
--
-- Produces one row per (icustay_id, hr) for every hour of the ICU
-- stay, with both the per-component score AT that hour and the
-- 24-hour rolling MAX of each component (which is the value used by
-- Sepsis-3). Final column `sofa_24hours` is the sum of the six
-- 24-hour rolling maxes.
--
-- Differences vs. MIMIC-IV upstream (search this file for "PORT NOTE"):
-- 1. ID column is `icustay_id`, not `stay_id`.
-- 2. There is no `icustay_hourly` derived table in MIMIC-III; we
-- build the hourly grid inline with `generate_series`.
-- 3. There is no `mimic_derived.ventilation` with a fine-grained
-- `ventilation_status='InvasiveVent'` flag in MIMIC-III; we use
-- the lumped `ventilation_durations` table, so any active
-- ventilation row is treated as invasive ventilation for the
-- purpose of the PaO2:FiO2 vent/novent split.
-- 4. We replicate MIMIC-IV's `urine_output_rate` adjustment inline
-- in `sofa_uo`: we materialise both `uo_24hr` (sum) and
-- `uo_tm_24hr` (count of distinct hours that actually had a UO
-- observation in the past 24 h), and the renal CASE in (11)
-- uses
-- GREATEST(uo_24hr, 0) * 24.0 / uo_tm_24hr
-- only when uo_tm_24hr BETWEEN 22 AND 30, falling back to
-- creatinine alone otherwise. The GREATEST(_, 0) clip prevents
-- patients on continuous bladder irrigation (which the upstream
-- `urine_output.sql` subtracts as a negative volume) from being
-- mis-scored as oliguric.
-- 5. Vasopressor rates come from the upstream
-- durations/{epinephrine,norepinephrine,dopamine,dobutamine}_dose.sql
-- tables, which already merge CareVue + MetaVision and convert
-- to mcg/kg/min.
--
-- Implementation note: each measurement class is materialised into
-- its own narrow staging table. This avoids forcing the planner to
-- optimise a single ~10-way CTE join, lets each scan of the giant raw
-- tables (`chartevents`, `labevents`, `outputevents`,
-- `inputevents_*`) run independently, and lets you `EXPLAIN ANALYZE`
-- each step in isolation.
--
-- Dependencies:
-- postgres-functions.sql
-- durations/ventilation_durations.sql
-- durations/{dobutamine,dopamine,epinephrine,norepinephrine}_dose.sql
-- fluid_balance/urine_output.sql
-- sepsis/blood_gas_arterial.sql
-- sepsis/gcs_all.sql
-- ------------------------------------------------------------------
-- 1. Hourly grid: one row per (icustay_id, hr) for the entire stay.
DROP TABLE IF EXISTS sofa_grid;
CREATE TABLE sofa_grid AS
SELECT ie.subject_id, ie.hadm_id, ie.icustay_id
, gs.hr
, ie.intime + ((gs.hr - 1) * INTERVAL '1 hour') AS starttime
, ie.intime + (gs.hr * INTERVAL '1 hour') AS endtime
FROM icustays ie
, LATERAL generate_series(
1,
GREATEST(1,
CEIL(EXTRACT(EPOCH FROM (ie.outtime - ie.intime)) / 3600.0)::int
)
) AS gs(hr)
WHERE ie.outtime IS NOT NULL
AND ie.outtime > ie.intime;
CREATE INDEX IF NOT EXISTS sofa_grid_idx ON sofa_grid (icustay_id, hr);
CREATE INDEX IF NOT EXISTS sofa_grid_time_idx
ON sofa_grid (icustay_id, starttime, endtime);
ANALYZE sofa_grid;
-- 2. Mean arterial pressure: minimum within the hour.
DROP TABLE IF EXISTS sofa_vs;
CREATE TABLE sofa_vs AS
SELECT g.icustay_id, g.hr
, MIN(ce.valuenum) AS meanbp_min
FROM sofa_grid g
LEFT JOIN chartevents ce
ON ce.icustay_id = g.icustay_id
AND ce.charttime > g.starttime
AND ce.charttime <= g.endtime
AND ce.itemid IN (456, 52, 6702, 443, 220052, 220181, 225312)
AND ce.valuenum > 0 AND ce.valuenum < 300
AND COALESCE(ce.error, 0) = 0
GROUP BY g.icustay_id, g.hr;
CREATE INDEX IF NOT EXISTS sofa_vs_idx ON sofa_vs (icustay_id, hr);
ANALYZE sofa_vs;
-- 3. GCS: minimum within the hour (uses gcs_all carry-forward logic).
DROP TABLE IF EXISTS sofa_gcs;
CREATE TABLE sofa_gcs AS
SELECT g.icustay_id, g.hr
, MIN(gc.gcs) AS gcs_min
FROM sofa_grid g
LEFT JOIN gcs_all gc
ON gc.icustay_id = g.icustay_id
AND gc.charttime > g.starttime
AND gc.charttime <= g.endtime
GROUP BY g.icustay_id, g.hr;
CREATE INDEX IF NOT EXISTS sofa_gcs_idx ON sofa_gcs (icustay_id, hr);
ANALYZE sofa_gcs;
-- 4. Bilirubin: maximum within the hour.
DROP TABLE IF EXISTS sofa_bili;
CREATE TABLE sofa_bili AS
SELECT g.icustay_id, g.hr
, MAX(le.valuenum) AS bilirubin_max
FROM sofa_grid g
INNER JOIN icustays ie
ON ie.icustay_id = g.icustay_id
LEFT JOIN labevents le
ON le.subject_id = ie.subject_id
AND le.hadm_id = ie.hadm_id
AND le.charttime > g.starttime
AND le.charttime <= g.endtime
AND le.itemid = 50885
AND le.valuenum IS NOT NULL
AND le.valuenum > 0
GROUP BY g.icustay_id, g.hr;
CREATE INDEX IF NOT EXISTS sofa_bili_idx ON sofa_bili (icustay_id, hr);
ANALYZE sofa_bili;
-- 5. Creatinine: maximum within the hour.
DROP TABLE IF EXISTS sofa_cr;
CREATE TABLE sofa_cr AS
SELECT g.icustay_id, g.hr
, MAX(le.valuenum) AS creatinine_max
FROM sofa_grid g
INNER JOIN icustays ie
ON ie.icustay_id = g.icustay_id
LEFT JOIN labevents le
ON le.subject_id = ie.subject_id
AND le.hadm_id = ie.hadm_id
AND le.charttime > g.starttime
AND le.charttime <= g.endtime
AND le.itemid = 50912
AND le.valuenum IS NOT NULL
AND le.valuenum > 0
AND le.valuenum < 150 -- sanity (mg/dL)
GROUP BY g.icustay_id, g.hr;
CREATE INDEX IF NOT EXISTS sofa_cr_idx ON sofa_cr (icustay_id, hr);
ANALYZE sofa_cr;
-- 6. Platelets: minimum within the hour.
DROP TABLE IF EXISTS sofa_plt;
CREATE TABLE sofa_plt AS
SELECT g.icustay_id, g.hr
, MIN(le.valuenum) AS platelet_min
FROM sofa_grid g
INNER JOIN icustays ie
ON ie.icustay_id = g.icustay_id
LEFT JOIN labevents le
ON le.subject_id = ie.subject_id
AND le.hadm_id = ie.hadm_id
AND le.charttime > g.starttime
AND le.charttime <= g.endtime
AND le.itemid = 51265
AND le.valuenum IS NOT NULL
AND le.valuenum > 0
GROUP BY g.icustay_id, g.hr;
CREATE INDEX IF NOT EXISTS sofa_plt_idx ON sofa_plt (icustay_id, hr);
ANALYZE sofa_plt;
-- 7. PaO2/FiO2: split into vent / novent based on whether the patient
-- is on invasive ventilation at the time of the blood gas. We
-- take the worst (lowest) PaO2:FiO2 of each kind during the hour.
DROP TABLE IF EXISTS sofa_pf;
CREATE TABLE sofa_pf AS
WITH pafi AS (
SELECT bg.icustay_id, bg.charttime
, CASE WHEN vd.icustay_id IS NULL THEN bg.pao2fio2 END AS pao2fio2_novent
, CASE WHEN vd.icustay_id IS NOT NULL THEN bg.pao2fio2 END AS pao2fio2_vent
FROM blood_gas_arterial bg
LEFT JOIN ventilation_durations vd
ON bg.icustay_id = vd.icustay_id
AND bg.charttime >= vd.starttime
AND bg.charttime <= vd.endtime
WHERE bg.pao2fio2 IS NOT NULL
)
SELECT g.icustay_id, g.hr
, MIN(p.pao2fio2_novent) AS pao2fio2_novent
, MIN(p.pao2fio2_vent) AS pao2fio2_vent
FROM sofa_grid g
LEFT JOIN pafi p
ON p.icustay_id = g.icustay_id
AND p.charttime > g.starttime
AND p.charttime <= g.endtime
GROUP BY g.icustay_id, g.hr;
CREATE INDEX IF NOT EXISTS sofa_pf_idx ON sofa_pf (icustay_id, hr);
ANALYZE sofa_pf;
-- 8. Urine output: sum over the preceding 24 h plus a count of how many
-- distinct hours actually had a UO observation (`uo_tm_24hr`). This
-- matches the MIMIC-IV `urine_output_rate` table's two columns and
-- lets the renal CASE distinguish "no data" from "really oliguric".
-- The renal score in (11) requires uo_tm_24hr to be in a plausible
-- range before the UO branch fires; otherwise it falls back to
-- creatinine alone. See port note #3 in the file header.
DROP TABLE IF EXISTS sofa_uo;
CREATE TABLE sofa_uo AS
SELECT g.icustay_id, g.hr
, SUM(uo.value) AS uo_24hr
, COUNT(DISTINCT date_trunc('hour', uo.charttime)) AS uo_tm_24hr
FROM sofa_grid g
LEFT JOIN urine_output uo
ON uo.icustay_id = g.icustay_id
AND uo.charttime > DATETIME_SUB(g.endtime, INTERVAL '24' HOUR)
AND uo.charttime <= g.endtime
GROUP BY g.icustay_id, g.hr;
CREATE INDEX IF NOT EXISTS sofa_uo_idx ON sofa_uo (icustay_id, hr);
ANALYZE sofa_uo;
-- 9. Vasopressor rates: take the maximum rate of each pressor active at
-- `endtime`. Each upstream dose table is (icustay_id, starttime,
-- endtime, vaso_rate, vaso_amount).
DROP TABLE IF EXISTS sofa_vaso;
CREATE TABLE sofa_vaso AS
SELECT g.icustay_id, g.hr
, MAX(epi.vaso_rate) AS rate_epinephrine
, MAX(nor.vaso_rate) AS rate_norepinephrine
, MAX(dop.vaso_rate) AS rate_dopamine
, MAX(dob.vaso_rate) AS rate_dobutamine
FROM sofa_grid g
LEFT JOIN epinephrine_dose epi
ON epi.icustay_id = g.icustay_id
AND g.endtime > epi.starttime
AND g.endtime <= epi.endtime
LEFT JOIN norepinephrine_dose nor
ON nor.icustay_id = g.icustay_id
AND g.endtime > nor.starttime
AND g.endtime <= nor.endtime
LEFT JOIN dopamine_dose dop
ON dop.icustay_id = g.icustay_id
AND g.endtime > dop.starttime
AND g.endtime <= dop.endtime
LEFT JOIN dobutamine_dose dob
ON dob.icustay_id = g.icustay_id
AND g.endtime > dob.starttime
AND g.endtime <= dob.endtime
WHERE epi.icustay_id IS NOT NULL
OR nor.icustay_id IS NOT NULL
OR dop.icustay_id IS NOT NULL
OR dob.icustay_id IS NOT NULL
GROUP BY g.icustay_id, g.hr;
CREATE INDEX IF NOT EXISTS sofa_vaso_idx ON sofa_vaso (icustay_id, hr);
ANALYZE sofa_vaso;
-- 10. Wide assembly: left-join all of the above onto the grid.
DROP TABLE IF EXISTS sofa_wide;
CREATE TABLE sofa_wide AS
SELECT g.subject_id, g.hadm_id, g.icustay_id, g.hr
, g.starttime, g.endtime
, vs.meanbp_min
, gc.gcs_min
, bi.bilirubin_max
, cr.creatinine_max
, pl.platelet_min
, pf.pao2fio2_novent
, pf.pao2fio2_vent
, uo.uo_24hr
, uo.uo_tm_24hr
, va.rate_epinephrine
, va.rate_norepinephrine
, va.rate_dopamine
, va.rate_dobutamine
FROM sofa_grid g
LEFT JOIN sofa_vs vs ON vs.icustay_id = g.icustay_id AND vs.hr = g.hr
LEFT JOIN sofa_gcs gc ON gc.icustay_id = g.icustay_id AND gc.hr = g.hr
LEFT JOIN sofa_bili bi ON bi.icustay_id = g.icustay_id AND bi.hr = g.hr
LEFT JOIN sofa_cr cr ON cr.icustay_id = g.icustay_id AND cr.hr = g.hr
LEFT JOIN sofa_plt pl ON pl.icustay_id = g.icustay_id AND pl.hr = g.hr
LEFT JOIN sofa_pf pf ON pf.icustay_id = g.icustay_id AND pf.hr = g.hr
LEFT JOIN sofa_uo uo ON uo.icustay_id = g.icustay_id AND uo.hr = g.hr
LEFT JOIN sofa_vaso va ON va.icustay_id = g.icustay_id AND va.hr = g.hr;
CREATE INDEX IF NOT EXISTS sofa_wide_idx ON sofa_wide (icustay_id, hr);
ANALYZE sofa_wide;
-- 11. Per-hour component scores (no rolling window yet).
DROP TABLE IF EXISTS sofa_components;
CREATE TABLE sofa_components AS
SELECT w.*
-- Respiration
, CASE
WHEN pao2fio2_vent < 100 THEN 4
WHEN pao2fio2_vent < 200 THEN 3
WHEN pao2fio2_novent < 300 THEN 2
WHEN pao2fio2_vent < 300 THEN 2
WHEN pao2fio2_novent < 400 THEN 1
WHEN pao2fio2_vent < 400 THEN 1
WHEN COALESCE(pao2fio2_vent, pao2fio2_novent) IS NULL THEN NULL
ELSE 0
END AS respiration
-- Coagulation
, CASE
WHEN platelet_min < 20 THEN 4
WHEN platelet_min < 50 THEN 3
WHEN platelet_min < 100 THEN 2
WHEN platelet_min < 150 THEN 1
WHEN platelet_min IS NULL THEN NULL
ELSE 0
END AS coagulation
-- Liver (mg/dL)
, CASE
WHEN bilirubin_max >= 12.0 THEN 4
WHEN bilirubin_max >= 6.0 THEN 3
WHEN bilirubin_max >= 2.0 THEN 2
WHEN bilirubin_max >= 1.2 THEN 1
WHEN bilirubin_max IS NULL THEN NULL
ELSE 0
END AS liver
-- Cardiovascular
, CASE
WHEN rate_dopamine > 15 OR rate_epinephrine > 0.1 OR rate_norepinephrine > 0.1 THEN 4
WHEN rate_dopamine > 5 OR rate_epinephrine <= 0.1 OR rate_norepinephrine <= 0.1 THEN 3
WHEN rate_dopamine > 0 OR rate_dobutamine > 0 THEN 2
WHEN meanbp_min < 70 THEN 1
WHEN COALESCE(meanbp_min, rate_dopamine, rate_dobutamine,
rate_epinephrine, rate_norepinephrine) IS NULL THEN NULL
ELSE 0
END AS cardiovascular
-- CNS (GCS)
, CASE
WHEN gcs_min >= 13 AND gcs_min <= 14 THEN 1
WHEN gcs_min >= 10 AND gcs_min <= 12 THEN 2
WHEN gcs_min >= 6 AND gcs_min <= 9 THEN 3
WHEN gcs_min < 6 THEN 4
WHEN gcs_min IS NULL THEN NULL
ELSE 0
END AS cns
-- Renal
-- We scale uo_24hr to a 24h-equivalent only when the rolling
-- window has at least 22 distinct hours of observations
-- (matching MIMIC-IV's `urine_output_rate` upper-bound check).
-- If fewer than 22 valid hours exist, the UO branch is treated
-- as missing and the renal score falls back to creatinine alone.
-- We also clip GU-irrigant negative net values to zero so a
-- patient on continuous bladder irrigation isn't mis-scored as
-- oliguric.
, CASE
WHEN creatinine_max >= 5.0 THEN 4
WHEN uo_tm_24hr BETWEEN 22 AND 30
AND GREATEST(uo_24hr, 0) * 24.0 / uo_tm_24hr < 200 THEN 4
WHEN creatinine_max >= 3.5 AND creatinine_max < 5.0 THEN 3
WHEN uo_tm_24hr BETWEEN 22 AND 30
AND GREATEST(uo_24hr, 0) * 24.0 / uo_tm_24hr < 500 THEN 3
WHEN creatinine_max >= 2.0 AND creatinine_max < 3.5 THEN 2
WHEN creatinine_max >= 1.2 AND creatinine_max < 2.0 THEN 1
WHEN creatinine_max IS NULL
AND NOT (uo_tm_24hr BETWEEN 22 AND 30) THEN NULL
ELSE 0
END AS renal
FROM sofa_wide w;
CREATE INDEX IF NOT EXISTS sofa_components_idx
ON sofa_components (icustay_id, hr);
ANALYZE sofa_components;
-- 12. Final hourly SOFA: 24-hour rolling MAX per component, summed.
DROP TABLE IF EXISTS sofa_hourly;
CREATE TABLE sofa_hourly AS
SELECT s.subject_id, s.hadm_id, s.icustay_id, s.hr
, s.starttime, s.endtime
, s.respiration, s.coagulation, s.liver
, s.cardiovascular, s.cns, s.renal
, COALESCE(MAX(s.respiration) OVER w, 0) AS respiration_24hours
, COALESCE(MAX(s.coagulation) OVER w, 0) AS coagulation_24hours
, COALESCE(MAX(s.liver) OVER w, 0) AS liver_24hours
, COALESCE(MAX(s.cardiovascular) OVER w, 0) AS cardiovascular_24hours
, COALESCE(MAX(s.cns) OVER w, 0) AS cns_24hours
, COALESCE(MAX(s.renal) OVER w, 0) AS renal_24hours
, COALESCE(MAX(s.respiration) OVER w, 0)
+ COALESCE(MAX(s.coagulation) OVER w, 0)
+ COALESCE(MAX(s.liver) OVER w, 0)
+ COALESCE(MAX(s.cardiovascular) OVER w, 0)
+ COALESCE(MAX(s.cns) OVER w, 0)
+ COALESCE(MAX(s.renal) OVER w, 0)
AS sofa_24hours
FROM sofa_components s
WINDOW w AS (
PARTITION BY s.icustay_id
ORDER BY s.hr
ROWS BETWEEN 23 PRECEDING AND CURRENT ROW
);
CREATE INDEX IF NOT EXISTS sofa_hourly_idx
ON sofa_hourly (icustay_id, hr);
CREATE INDEX IF NOT EXISTS sofa_hourly_time_idx
ON sofa_hourly (icustay_id, endtime);
ANALYZE sofa_hourly;