This commit is contained in:
2026-05-05 10:22:17 +02:00
commit 9a24865016
30 changed files with 5735 additions and 0 deletions

48
sql/build_sapsii.sql Normal file
View File

@@ -0,0 +1,48 @@
-- ------------------------------------------------------------------
-- Build the SAPS-II severity score on a vanilla PostgreSQL MIMIC-III v1.3 DB.
--
-- Usage (assuming you have already restored the MIMIC-III dump into a
-- database called `mimic` and have the base tables in the `mimiciii` schema):
--
-- psql -d mimic -v ON_ERROR_STOP=1 \
-- -c 'SET search_path TO mimiciii, public;' \
-- -f sql/build_sapsii.sql
--
-- Resulting tables created in the current search_path:
-- urine_output (not used by SAPS-II directly,
-- included for completeness)
-- ventilation_classification
-- ventilation_durations
-- blood_gas_first_day
-- blood_gas_first_day_arterial
-- gcs_first_day
-- labs_first_day
-- urine_output_first_day
-- vitals_first_day
-- sapsii <-- final score table
-- ------------------------------------------------------------------
\set ON_ERROR_STOP on
-- 0. PL/pgSQL shims for BigQuery-style DATETIME_DIFF / DATETIME_ADD / DATETIME_SUB
\i postgres-functions.sql
-- 1. Optional helper view (not required by SAPS-II, but useful and harmless)
\i fluid_balance/urine_output.sql
-- 2. Ventilation: classification first, then durations
\i durations/ventilation_classification.sql
\i durations/ventilation_durations.sql
-- 3. First-day derived views (blood_gas_first_day must precede the arterial one)
\i firstday/blood_gas_first_day.sql
\i firstday/blood_gas_first_day_arterial.sql
\i firstday/gcs_first_day.sql
\i firstday/labs_first_day.sql
\i firstday/urine_output_first_day.sql
\i firstday/vitals_first_day.sql
-- 4. The score itself
\i severityscores/sapsii.sql
\echo 'SAPS-II build complete. Query results with: SELECT * FROM sapsii LIMIT 10;'

74
sql/build_sepsis3.sql Normal file
View File

@@ -0,0 +1,74 @@
-- ------------------------------------------------------------------
-- Build the Sepsis-3 onset table on a vanilla PostgreSQL MIMIC-III
-- v1.3 DB.
--
-- Usage (assuming you have already restored the MIMIC-III dump into a
-- database called `mimic` and have the base tables in the `mimiciii`
-- schema):
--
-- psql -d mimic -v ON_ERROR_STOP=1 \
-- -c 'SET search_path TO mimiciii, public;' \
-- -f sql/build_sepsis3.sql
--
-- Resulting tables created in the current search_path:
-- echo_data
-- urine_output
-- ventilation_classification
-- ventilation_durations
-- weight_durations
-- {dobutamine,dopamine,epinephrine,norepinephrine}_dose
-- blood_gas_arterial (all-time PaO2/FiO2)
-- gcs_all (all-time GCS)
-- sofa_grid, sofa_vs, sofa_gcs, sofa_bili, sofa_cr, sofa_plt,
-- sofa_pf, sofa_uo, sofa_vaso, sofa_wide, sofa_components
-- (intermediate hourly stages,
-- retained for inspection)
-- sofa_hourly (final hourly SOFA, one row per
-- ICU hour, with 24-h rolling MAX)
-- antibiotic (filtered antibiotic prescriptions)
-- suspicion_of_infection (Seymour 2016 abx<>culture pairing)
-- sepsis3 (final sepsis-3 onset, one row per
-- ICU stay)
--
-- Runtime: expect a few hours on a stock single-node PostgreSQL with
-- the default `chartevents` and `labevents` indexes. Most of the
-- cost is the eight raw-table scans driving the sofa_* staging
-- tables; each stage prints its progress via psql's default ECHO.
-- ------------------------------------------------------------------
\set ON_ERROR_STOP on
-- 0. PL/pgSQL shims for BigQuery-style DATETIME_DIFF / _ADD / _SUB
\i postgres-functions.sql
-- 1. Helpers shared with SAPS-II
\i echo_data.sql
\i fluid_balance/urine_output.sql
\i durations/ventilation_classification.sql
\i durations/ventilation_durations.sql
\i durations/weight_durations.sql
-- 2. Vasopressor dose tables (each merges CareVue + MetaVision)
\i durations/dobutamine_dose.sql
\i durations/dopamine_dose.sql
\i durations/epinephrine_dose.sql
\i durations/norepinephrine_dose.sql
-- 3. All-time pivots feeding hourly SOFA
\i sepsis/blood_gas_arterial.sql
\i sepsis/gcs_all.sql
-- 4. Hourly SOFA pipeline (staged temp tables -> sofa_hourly)
\i sepsis/sofa_hourly.sql
-- 5. Suspicion of infection
\i sepsis/antibiotic.sql
\i sepsis/suspicion_of_infection.sql
-- 6. Final onset table
\i sepsis/sepsis3.sql
\echo 'Sepsis-3 build complete.'
\echo 'Query results with:'
\echo ' SELECT icustay_id, suspected_infection_time, sofa_time, sofa_score, sepsis3'
\echo ' FROM sepsis3 WHERE sepsis3 = TRUE LIMIT 10;'

View File

@@ -0,0 +1,259 @@
-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY.
DROP TABLE IF EXISTS dobutamine_dose; CREATE TABLE dobutamine_dose AS
-- This query extracts dose+durations of dopamine administration
-- Get drug administration data from CareVue first
with vasocv1 as
(
select
icustay_id, charttime
-- case statement determining whether the ITEMID is an instance of vasopressor usage
, max(case when itemid in (30042,30306) then 1 else 0 end) as vaso -- dobutamine
-- the 'stopped' column indicates if a vasopressor has been disconnected
, max(case when itemid in (30042,30306) and (stopped = 'Stopped' OR stopped like 'D/C%') then 1
else 0 end) as vaso_stopped
, max(case when itemid in (30042,30306) and rate is not null then 1 else 0 end) as vaso_null
, max(case when itemid in (30042,30306) then rate else null end) as vaso_rate
, max(case when itemid in (30042,30306) then amount else null end) as vaso_amount
FROM inputevents_cv
where itemid in (30042,30306) -- dobutamine
group by icustay_id, charttime
)
, vasocv2 as
(
select v.*
, sum(vaso_null) over (partition by icustay_id order by charttime) as vaso_partition
from
vasocv1 v
)
, vasocv3 as
(
select v.*
, first_value(vaso_rate) over (partition by icustay_id, vaso_partition order by charttime) as vaso_prevrate_ifnull
from
vasocv2 v
)
, vasocv4 as
(
select
icustay_id
, charttime
-- , (CHARTTIME - (LAG(CHARTTIME, 1) OVER (partition by icustay_id, vaso order by charttime))) AS delta
, vaso
, vaso_rate
, vaso_amount
, vaso_stopped
, vaso_prevrate_ifnull
-- We define start time here
, case
when vaso = 0 then null
-- if this is the first instance of the vasoactive drug
when vaso_rate > 0 and
LAG(vaso_prevrate_ifnull,1)
OVER
(
partition by icustay_id, vaso, vaso_null
order by charttime
)
is null
then 1
-- you often get a string of 0s
-- we decide not to set these as 1, just because it makes vasonum sequential
when vaso_rate = 0 and
LAG(vaso_prevrate_ifnull,1)
OVER
(
partition by icustay_id, vaso
order by charttime
)
= 0
then 0
-- sometimes you get a string of NULL, associated with 0 volumes
-- same reason as before, we decide not to set these as 1
-- vaso_prevrate_ifnull is equal to the previous value *iff* the current value is null
when vaso_prevrate_ifnull = 0 and
LAG(vaso_prevrate_ifnull,1)
OVER
(
partition by icustay_id, vaso
order by charttime
)
= 0
then 0
-- If the last recorded rate was 0, newvaso = 1
when LAG(vaso_prevrate_ifnull,1)
OVER
(
partition by icustay_id, vaso
order by charttime
) = 0
then 1
-- If the last recorded vaso was D/C'd, newvaso = 1
when
LAG(vaso_stopped,1)
OVER
(
partition by icustay_id, vaso
order by charttime
)
= 1 then 1
-- ** not sure if the below is needed
--when (CHARTTIME - (LAG(CHARTTIME, 1) OVER (partition by icustay_id, vaso order by charttime))) > (interval '4 hours') then 1
else null
end as vaso_start
FROM
vasocv3
)
-- propagate start/stop flags forward in time
, vasocv5 as
(
select v.*
, SUM(vaso_start) OVER (partition by icustay_id, vaso order by charttime) as vaso_first
FROM
vasocv4 v
)
, vasocv6 as
(
select v.*
-- We define end time here
, case
when vaso = 0
then null
-- If the recorded vaso was D/C'd, this is an end time
when vaso_stopped = 1
then vaso_first
-- If the rate is zero, this is the end time
when vaso_rate = 0
then vaso_first
-- the last row in the table is always a potential end time
-- this captures patients who die/are discharged while on vasopressors
-- in principle, this could add an extra end time for the vasopressor
-- however, since we later group on vaso_start, any extra end times are ignored
when LEAD(CHARTTIME,1)
OVER
(
partition by icustay_id, vaso
order by charttime
) is null
then vaso_first
else null
end as vaso_stop
from vasocv5 v
)
-- -- if you want to look at the results of the table before grouping:
-- select
-- icustay_id, charttime, vaso, vaso_rate, vaso_amount
-- , vaso_stopped
-- , vaso_start
-- , vaso_first
-- , vaso_stop
-- from vasocv6 order by icustay_id, charttime;
, vasocv7 as
(
select
icustay_id
, charttime as starttime
, lead(charttime) OVER (partition by icustay_id, vaso_first order by charttime) as endtime
, vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first
from vasocv6
where
vaso_first is not null -- bogus data
and
vaso_first != 0 -- sometimes *only* a rate of 0 appears, i.e. the drug is never actually delivered
and
icustay_id is not null -- there are data for "floating" admissions, we don't worry about these
)
-- table of start/stop times for event
, vasocv8 as
(
select
icustay_id
, starttime, endtime
, vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first
from vasocv7
where endtime is not null
and vaso_rate > 0
and starttime != endtime
)
-- collapse these start/stop times down if the rate doesn't change
, vasocv9 as
(
select
icustay_id
, starttime, endtime
, case
when LAG(endtime) OVER (partition by icustay_id order by starttime, endtime) = starttime
AND LAG(vaso_rate) OVER (partition by icustay_id order by starttime, endtime) = vaso_rate
THEN 0
else 1
end as vaso_groups
, vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first
from vasocv8
where endtime is not null
and vaso_rate > 0
and starttime != endtime
)
, vasocv10 as
(
select
icustay_id
, starttime, endtime
, vaso_groups
, SUM(vaso_groups) OVER (partition by icustay_id order by starttime, endtime) as vaso_groups_sum
, vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first
from vasocv9
)
, vasocv as
(
select icustay_id
, min(starttime) as starttime
, max(endtime) as endtime
, vaso_groups_sum
, vaso_rate
, sum(vaso_amount) as vaso_amount
from vasocv10
group by icustay_id, vaso_groups_sum, vaso_rate
)
-- now we extract the associated data for metavision patients
, vasomv as
(
select
icustay_id, linkorderid
, rate as vaso_rate
, amount as vaso_amount
, starttime
, endtime
from inputevents_mv
where itemid = 221653 -- dobutamine
and statusdescription != 'Rewritten' -- only valid orders
)
-- now assign this data to every hour of the patient's stay
-- vaso_amount for carevue is not accurate
SELECT icustay_id
, starttime, endtime
, vaso_rate, vaso_amount
from vasocv
UNION ALL
SELECT icustay_id
, starttime, endtime
, vaso_rate, vaso_amount
from vasomv
order by icustay_id, starttime;

View File

@@ -0,0 +1,262 @@
-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY.
DROP TABLE IF EXISTS dopamine_dose; CREATE TABLE dopamine_dose AS
-- This query extracts dose+durations of dopamine administration
-- Get drug administration data from CareVue first
with vasocv1 as
(
select
icustay_id, charttime
-- case statement determining whether the ITEMID is an instance of vasopressor usage
, max(case when itemid in (30043,30307) then 1 else 0 end) as vaso -- dopamine
-- the 'stopped' column indicates if a vasopressor has been disconnected
, max(case when itemid in (30043,30307) and (stopped = 'Stopped' OR stopped like 'D/C%') then 1
else 0 end) as vaso_stopped
, max(case when itemid in (30043,30307) and rate is not null then 1 else 0 end) as vaso_null
, max(case when itemid in (30043,30307) then rate else null end) as vaso_rate
, max(case when itemid in (30043,30307) then amount else null end) as vaso_amount
FROM inputevents_cv
where itemid in
(
30043,30307 -- dopamine
)
group by icustay_id, charttime
)
, vasocv2 as
(
select v.*
, sum(vaso_null) over (partition by icustay_id order by charttime) as vaso_partition
from
vasocv1 v
)
, vasocv3 as
(
select v.*
, first_value(vaso_rate) over (partition by icustay_id, vaso_partition order by charttime) as vaso_prevrate_ifnull
from
vasocv2 v
)
, vasocv4 as
(
select
icustay_id
, charttime
-- , (CHARTTIME - (LAG(CHARTTIME, 1) OVER (partition by icustay_id, vaso order by charttime))) AS delta
, vaso
, vaso_rate
, vaso_amount
, vaso_stopped
, vaso_prevrate_ifnull
-- We define start time here
, case
when vaso = 0 then null
-- if this is the first instance of the vasoactive drug
when vaso_rate > 0 and
LAG(vaso_prevrate_ifnull,1)
OVER
(
partition by icustay_id, vaso, vaso_null
order by charttime
)
is null
then 1
-- you often get a string of 0s
-- we decide not to set these as 1, just because it makes vasonum sequential
when vaso_rate = 0 and
LAG(vaso_prevrate_ifnull,1)
OVER
(
partition by icustay_id, vaso
order by charttime
)
= 0
then 0
-- sometimes you get a string of NULL, associated with 0 volumes
-- same reason as before, we decide not to set these as 1
-- vaso_prevrate_ifnull is equal to the previous value *iff* the current value is null
when vaso_prevrate_ifnull = 0 and
LAG(vaso_prevrate_ifnull,1)
OVER
(
partition by icustay_id, vaso
order by charttime
)
= 0
then 0
-- If the last recorded rate was 0, newvaso = 1
when LAG(vaso_prevrate_ifnull,1)
OVER
(
partition by icustay_id, vaso
order by charttime
) = 0
then 1
-- If the last recorded vaso was D/C'd, newvaso = 1
when
LAG(vaso_stopped,1)
OVER
(
partition by icustay_id, vaso
order by charttime
)
= 1 then 1
-- ** not sure if the below is needed
--when (CHARTTIME - (LAG(CHARTTIME, 1) OVER (partition by icustay_id, vaso order by charttime))) > (interval '4 hours') then 1
else null
end as vaso_start
FROM
vasocv3
)
-- propagate start/stop flags forward in time
, vasocv5 as
(
select v.*
, SUM(vaso_start) OVER (partition by icustay_id, vaso order by charttime) as vaso_first
FROM
vasocv4 v
)
, vasocv6 as
(
select v.*
-- We define end time here
, case
when vaso = 0
then null
-- If the recorded vaso was D/C'd, this is an end time
when vaso_stopped = 1
then vaso_first
-- If the rate is zero, this is the end time
when vaso_rate = 0
then vaso_first
-- the last row in the table is always a potential end time
-- this captures patients who die/are discharged while on vasopressors
-- in principle, this could add an extra end time for the vasopressor
-- however, since we later group on vaso_start, any extra end times are ignored
when LEAD(CHARTTIME,1)
OVER
(
partition by icustay_id, vaso
order by charttime
) is null
then vaso_first
else null
end as vaso_stop
from vasocv5 v
)
-- -- if you want to look at the results of the table before grouping:
-- select
-- icustay_id, charttime, vaso, vaso_rate, vaso_amount
-- , vaso_stopped
-- , vaso_start
-- , vaso_first
-- , vaso_stop
-- from vasocv6 order by icustay_id, charttime;
, vasocv7 as
(
select
icustay_id
, charttime as starttime
, lead(charttime) OVER (partition by icustay_id, vaso_first order by charttime) as endtime
, vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first
from vasocv6
where
vaso_first is not null -- bogus data
and
vaso_first != 0 -- sometimes *only* a rate of 0 appears, i.e. the drug is never actually delivered
and
icustay_id is not null -- there are data for "floating" admissions, we don't worry about these
)
-- table of start/stop times for event
, vasocv8 as
(
select
icustay_id
, starttime, endtime
, vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first
from vasocv7
where endtime is not null
and vaso_rate > 0
and starttime != endtime
)
-- collapse these start/stop times down if the rate doesn't change
, vasocv9 as
(
select
icustay_id
, starttime, endtime
, case
when LAG(endtime) OVER (partition by icustay_id order by starttime, endtime) = starttime
AND LAG(vaso_rate) OVER (partition by icustay_id order by starttime, endtime) = vaso_rate
THEN 0
else 1
end as vaso_groups
, vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first
from vasocv8
where endtime is not null
and vaso_rate > 0
and starttime != endtime
)
, vasocv10 as
(
select
icustay_id
, starttime, endtime
, vaso_groups
, SUM(vaso_groups) OVER (partition by icustay_id order by starttime, endtime) as vaso_groups_sum
, vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first
from vasocv9
)
, vasocv as
(
select icustay_id
, min(starttime) as starttime
, max(endtime) as endtime
, vaso_groups_sum
, vaso_rate
, sum(vaso_amount) as vaso_amount
from vasocv10
group by icustay_id, vaso_groups_sum, vaso_rate
)
-- now we extract the associated data for metavision patients
, vasomv as
(
select
icustay_id, linkorderid
, rate as vaso_rate
, amount as vaso_amount
, starttime
, endtime
from inputevents_mv
where itemid = 221662 -- dopamine
and statusdescription != 'Rewritten' -- only valid orders
)
-- now assign this data to every hour of the patient's stay
-- vaso_amount for carevue is not accurate
SELECT icustay_id
, starttime, endtime
, vaso_rate, vaso_amount
from vasocv
UNION ALL
SELECT icustay_id
, starttime, endtime
, vaso_rate, vaso_amount
from vasomv
order by icustay_id, starttime;

View File

@@ -0,0 +1,273 @@
-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY.
DROP TABLE IF EXISTS epinephrine_dose; CREATE TABLE epinephrine_dose AS
-- This query extracts dose+durations of epinephrine administration
-- Requires the weightfirstday table
-- Get drug administration data from CareVue first
with vasocv1 as
(
select
cv.icustay_id, cv.charttime
-- case statement determining whether the ITEMID is an instance of vasopressor usage
, max(case when itemid in (30044,30119,30309) then 1 else 0 end) as vaso -- epinephrine
-- the 'stopped' column indicates if a vasopressor has been disconnected
, max(case when itemid in (30044,30119,30309) and (stopped = 'Stopped' OR stopped like 'D/C%') then 1
else 0 end) as vaso_stopped
, max(case when itemid in (30044,30119,30309) and rate is not null then 1 else 0 end) as vaso_null
, max(case
when itemid = 30044 and wd.weight is null then rate / 80.0 -- super rare to be missing weight... affects 2 patients for 14 rows
when itemid = 30044 then rate / wd.weight -- measured in mcgmin
when itemid in (30119,30309) then rate -- measured in mcgkgmin
else null
end) as vaso_rate
, max(case when itemid in (30044,30119,30309) then amount else null end) as vaso_amount
FROM inputevents_cv cv
left join weight_durations wd
on cv.icustay_id = wd.icustay_id
and cv.charttime between wd.starttime and wd.endtime
where itemid in
(
30044,30119,30309 -- epinephrine
)
and cv.icustay_id is not null
group by cv.icustay_id, charttime
)
, vasocv2 as
(
select v.*
, sum(vaso_null) over (partition by icustay_id order by charttime) as vaso_partition
from
vasocv1 v
)
, vasocv3 as
(
select v.*
, first_value(vaso_rate) over (partition by icustay_id, vaso_partition order by charttime) as vaso_prevrate_ifnull
from
vasocv2 v
)
, vasocv4 as
(
select
icustay_id
, charttime
-- , (CHARTTIME - (LAG(CHARTTIME, 1) OVER (partition by icustay_id, vaso order by charttime))) AS delta
, vaso
, vaso_rate
, vaso_amount
, vaso_stopped
, vaso_prevrate_ifnull
-- We define start time here
, case
when vaso = 0 then null
-- if this is the first instance of the vasoactive drug
when vaso_rate > 0 and
LAG(vaso_prevrate_ifnull,1)
OVER
(
partition by icustay_id, vaso, vaso_null
order by charttime
)
is null
then 1
-- you often get a string of 0s
-- we decide not to set these as 1, just because it makes vasonum sequential
when vaso_rate = 0 and
LAG(vaso_prevrate_ifnull,1)
OVER
(
partition by icustay_id, vaso
order by charttime
)
= 0
then 0
-- sometimes you get a string of NULL, associated with 0 volumes
-- same reason as before, we decide not to set these as 1
-- vaso_prevrate_ifnull is equal to the previous value *iff* the current value is null
when vaso_prevrate_ifnull = 0 and
LAG(vaso_prevrate_ifnull,1)
OVER
(
partition by icustay_id, vaso
order by charttime
)
= 0
then 0
-- If the last recorded rate was 0, newvaso = 1
when LAG(vaso_prevrate_ifnull,1)
OVER
(
partition by icustay_id, vaso
order by charttime
) = 0
then 1
-- If the last recorded vaso was D/C'd, newvaso = 1
when
LAG(vaso_stopped,1)
OVER
(
partition by icustay_id, vaso
order by charttime
)
= 1 then 1
-- ** not sure if the below is needed
--when (CHARTTIME - (LAG(CHARTTIME, 1) OVER (partition by icustay_id, vaso order by charttime))) > (interval '4 hours') then 1
else null
end as vaso_start
FROM
vasocv3
)
-- propagate start/stop flags forward in time
, vasocv5 as
(
select v.*
, SUM(vaso_start) OVER (partition by icustay_id, vaso order by charttime) as vaso_first
FROM
vasocv4 v
)
, vasocv6 as
(
select v.*
-- We define end time here
, case
when vaso = 0
then null
-- If the recorded vaso was D/C'd, this is an end time
when vaso_stopped = 1
then vaso_first
-- If the rate is zero, this is the end time
when vaso_rate = 0
then vaso_first
-- the last row in the table is always a potential end time
-- this captures patients who die/are discharged while on vasopressors
-- in principle, this could add an extra end time for the vasopressor
-- however, since we later group on vaso_start, any extra end times are ignored
when LEAD(CHARTTIME,1)
OVER
(
partition by icustay_id, vaso
order by charttime
) is null
then vaso_first
else null
end as vaso_stop
from vasocv5 v
)
-- -- if you want to look at the results of the table before grouping:
-- select
-- icustay_id, charttime, vaso, vaso_rate, vaso_amount
-- , vaso_stopped
-- , vaso_start
-- , vaso_first
-- , vaso_stop
-- from vasocv6 order by icustay_id, charttime;
, vasocv7 as
(
select
icustay_id
, charttime as starttime
, lead(charttime) OVER (partition by icustay_id, vaso_first order by charttime) as endtime
, vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first
from vasocv6
where
vaso_first is not null -- bogus data
and
vaso_first != 0 -- sometimes *only* a rate of 0 appears, i.e. the drug is never actually delivered
and
icustay_id is not null -- there are data for "floating" admissions, we don't worry about these
)
-- table of start/stop times for event
, vasocv8 as
(
select
icustay_id
, starttime, endtime
, vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first
from vasocv7
where endtime is not null
and vaso_rate > 0
and starttime != endtime
)
-- collapse these start/stop times down if the rate doesn't change
, vasocv9 as
(
select
icustay_id
, starttime, endtime
, case
when LAG(endtime) OVER (partition by icustay_id order by starttime, endtime) = starttime
AND LAG(vaso_rate) OVER (partition by icustay_id order by starttime, endtime) = vaso_rate
THEN 0
else 1
end as vaso_groups
, vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first
from vasocv8
where endtime is not null
and vaso_rate > 0
and starttime != endtime
)
, vasocv10 as
(
select
icustay_id
, starttime, endtime
, vaso_groups
, SUM(vaso_groups) OVER (partition by icustay_id order by starttime, endtime) as vaso_groups_sum
, vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first
from vasocv9
)
, vasocv as
(
select icustay_id
, min(starttime) as starttime
, max(endtime) as endtime
, vaso_groups_sum
, vaso_rate
, sum(vaso_amount) as vaso_amount
from vasocv10
group by icustay_id, vaso_groups_sum, vaso_rate
)
-- now we extract the associated data for metavision patients
, vasomv as
(
select
icustay_id, linkorderid
, rate as vaso_rate
, amount as vaso_amount
, starttime
, endtime
from inputevents_mv
where itemid = 221289 -- epinephrine
and statusdescription != 'Rewritten' -- only valid orders
)
-- now assign this data to every hour of the patient's stay
-- vaso_amount for carevue is not accurate
SELECT icustay_id
, starttime, endtime
, vaso_rate, vaso_amount
from vasocv
UNION ALL
SELECT icustay_id
, starttime, endtime
, vaso_rate, vaso_amount
from vasomv
order by icustay_id, starttime;

View File

@@ -0,0 +1,270 @@
-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY.
DROP TABLE IF EXISTS norepinephrine_dose; CREATE TABLE norepinephrine_dose AS
-- This query extracts dose+durations of norepinephrine administration
-- Total time on the drug can be calculated from this table by grouping using ICUSTAY_ID
-- Get drug administration data from CareVue first
with vasocv1 as
(
select
cv.icustay_id, cv.charttime
-- case statement determining whether the ITEMID is an instance of vasopressor usage
, max(case when itemid in (30047,30120) then 1 else 0 end) as vaso -- norepinephrine
-- the 'stopped' column indicates if a vasopressor has been disconnected
, max(case when itemid in (30047,30120) and (stopped = 'Stopped' OR stopped like 'D/C%') then 1
else 0 end) as vaso_stopped
-- case statement determining whether the ITEMID is an instance of vasopressor usage
, max(case when itemid in (30047,30120) and rate is not null then 1 else 0 end) as vaso_null
, max(case
when itemid = 30047 and wd.weight is null then rate / 80.0 -- this is rare, only affects a total of ~400 rows
when itemid = 30047 then rate / wd.weight -- measured in mcgmin
when itemid = 30120 then rate -- measured in mcgkgmin ** there are clear errors, perhaps actually mcgmin
else null end) as vaso_rate
, max(case when itemid in (30047,30120) then amount else null end) as vaso_amount
FROM inputevents_cv cv
left join weight_durations wd
on cv.icustay_id = wd.icustay_id
and cv.charttime between wd.starttime and wd.endtime
where itemid in (30047,30120) -- norepinephrine
and cv.icustay_id is not null
group by cv.icustay_id, cv.charttime
)
, vasocv2 as
(
select v.*
, sum(vaso_null) over (partition by icustay_id order by charttime) as vaso_partition
from
vasocv1 v
)
, vasocv3 as
(
select v.*
, first_value(vaso_rate) over (partition by icustay_id, vaso_partition order by charttime) as vaso_prevrate_ifnull
from
vasocv2 v
)
, vasocv4 as
(
select
icustay_id
, charttime
-- , (CHARTTIME - (LAG(CHARTTIME, 1) OVER (partition by icustay_id, vaso order by charttime))) AS delta
, vaso
, vaso_rate
, vaso_amount
, vaso_stopped
, vaso_prevrate_ifnull
-- We define start time here
, case
when vaso = 0 then null
-- if this is the first instance of the vasoactive drug
when vaso_rate > 0 and
LAG(vaso_prevrate_ifnull,1)
OVER
(
partition by icustay_id, vaso, vaso_null
order by charttime
)
is null
then 1
-- you often get a string of 0s
-- we decide not to set these as 1, just because it makes vasonum sequential
when vaso_rate = 0 and
LAG(vaso_prevrate_ifnull,1)
OVER
(
partition by icustay_id, vaso
order by charttime
)
= 0
then 0
-- sometimes you get a string of NULL, associated with 0 volumes
-- same reason as before, we decide not to set these as 1
-- vaso_prevrate_ifnull is equal to the previous value *iff* the current value is null
when vaso_prevrate_ifnull = 0 and
LAG(vaso_prevrate_ifnull,1)
OVER
(
partition by icustay_id, vaso
order by charttime
)
= 0
then 0
-- If the last recorded rate was 0, newvaso = 1
when LAG(vaso_prevrate_ifnull,1)
OVER
(
partition by icustay_id, vaso
order by charttime
) = 0
then 1
-- If the last recorded vaso was D/C'd, newvaso = 1
when
LAG(vaso_stopped,1)
OVER
(
partition by icustay_id, vaso
order by charttime
)
= 1 then 1
-- ** not sure if the below is needed
--when (CHARTTIME - (LAG(CHARTTIME, 1) OVER (partition by icustay_id, vaso order by charttime))) > (interval '4 hours') then 1
else null
end as vaso_start
FROM
vasocv3
)
-- propagate start/stop flags forward in time
, vasocv5 as
(
select v.*
, SUM(vaso_start) OVER (partition by icustay_id, vaso order by charttime) as vaso_first
FROM
vasocv4 v
)
, vasocv6 as
(
select v.*
-- We define end time here
, case
when vaso = 0
then null
-- If the recorded vaso was D/C'd, this is an end time
when vaso_stopped = 1
then vaso_first
-- If the rate is zero, this is the end time
when vaso_rate = 0
then vaso_first
-- the last row in the table is always a potential end time
-- this captures patients who die/are discharged while on vasopressors
-- in principle, this could add an extra end time for the vasopressor
-- however, since we later group on vaso_start, any extra end times are ignored
when LEAD(CHARTTIME,1)
OVER
(
partition by icustay_id, vaso
order by charttime
) is null
then vaso_first
else null
end as vaso_stop
from vasocv5 v
)
-- -- if you want to look at the results of the table before grouping:
-- select
-- icustay_id, charttime, vaso, vaso_rate, vaso_amount
-- , vaso_stopped
-- , vaso_start
-- , vaso_first
-- , vaso_stop
-- from vasocv6 order by icustay_id, charttime;
, vasocv7 as
(
select
icustay_id
, charttime as starttime
, lead(charttime) OVER (partition by icustay_id, vaso_first order by charttime) as endtime
, vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first
from vasocv6
where
vaso_first is not null -- bogus data
and
vaso_first != 0 -- sometimes *only* a rate of 0 appears, i.e. the drug is never actually delivered
and
icustay_id is not null -- there are data for "floating" admissions, we don't worry about these
)
-- table of start/stop times for event
, vasocv8 as
(
select
icustay_id
, starttime, endtime
, vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first
from vasocv7
where endtime is not null
and vaso_rate > 0
and starttime != endtime
)
-- collapse these start/stop times down if the rate doesn't change
, vasocv9 as
(
select
icustay_id
, starttime, endtime
, case
when LAG(endtime) OVER (partition by icustay_id order by starttime, endtime) = starttime
AND LAG(vaso_rate) OVER (partition by icustay_id order by starttime, endtime) = vaso_rate
THEN 0
else 1
end as vaso_groups
, vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first
from vasocv8
where endtime is not null
and vaso_rate > 0
and starttime != endtime
)
, vasocv10 as
(
select
icustay_id
, starttime, endtime
, vaso_groups
, SUM(vaso_groups) OVER (partition by icustay_id order by starttime, endtime) as vaso_groups_sum
, vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first
from vasocv9
)
, vasocv as
(
select icustay_id
, min(starttime) as starttime
, max(endtime) as endtime
, vaso_groups_sum
, vaso_rate
, sum(vaso_amount) as vaso_amount
from vasocv10
group by icustay_id, vaso_groups_sum, vaso_rate
)
-- now we extract the associated data for metavision patients
, vasomv as
(
select
icustay_id, linkorderid
, rate as vaso_rate
, amount as vaso_amount
, starttime
, endtime
from inputevents_mv
where itemid = 221906 -- norepinephrine
and statusdescription != 'Rewritten' -- only valid orders
)
-- now assign this data to every hour of the patient's stay
-- vaso_amount for carevue is not accurate
SELECT icustay_id
, starttime, endtime
, vaso_rate, vaso_amount
from vasocv
UNION ALL
SELECT icustay_id
, starttime, endtime
, vaso_rate, vaso_amount
from vasomv
order by icustay_id, starttime;

View File

@@ -0,0 +1,142 @@
-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY.
DROP TABLE IF EXISTS ventilation_classification; CREATE TABLE ventilation_classification AS
-- Identify The presence of a mechanical ventilation using settings
select
icustay_id, charttime
-- case statement determining whether it is an instance of mech vent
, max(
case
when itemid is null or value is null then 0 -- can't have null values
when itemid = 720 and value != 'Other/Remarks' THEN 1 -- VentTypeRecorded
when itemid = 223848 and value != 'Other' THEN 1
when itemid = 223849 then 1 -- ventilator mode
when itemid = 467 and value = 'Ventilator' THEN 1 -- O2 delivery device == ventilator
when itemid in
(
445, 448, 449, 450, 1340, 1486, 1600, 224687 -- minute volume
, 639, 654, 681, 682, 683, 684,224685,224684,224686 -- tidal volume
, 218,436,535,444,459,224697,224695,224696,224746,224747 -- High/Low/Peak/Mean/Neg insp force ("RespPressure")
, 221,1,1211,1655,2000,226873,224738,224419,224750,227187 -- Insp pressure
, 543 -- PlateauPressure
, 5865,5866,224707,224709,224705,224706 -- APRV pressure
, 60,437,505,506,686,220339,224700 -- PEEP
, 3459 -- high pressure relief
, 501,502,503,224702 -- PCV
, 223,667,668,669,670,671,672 -- TCPCV
, 224701 -- PSVlevel
)
THEN 1
else 0
end
) as MechVent
, max(
case
-- initiation of oxygen therapy indicates the ventilation has ended
when itemid = 226732 and value in
(
'Nasal cannula', -- 153714 observations
'Face tent', -- 24601 observations
'Aerosol-cool', -- 24560 observations
'Trach mask ', -- 16435 observations
'High flow neb', -- 10785 observations
'Non-rebreather', -- 5182 observations
'Venti mask ', -- 1947 observations
'Medium conc mask ', -- 1888 observations
'T-piece', -- 1135 observations
'High flow nasal cannula', -- 925 observations
'Ultrasonic neb', -- 9 observations
'Vapomist' -- 3 observations
) then 1
when itemid = 467 and value in
(
'Cannula', -- 278252 observations
'Nasal Cannula', -- 248299 observations
-- 'None', -- 95498 observations
'Face Tent', -- 35766 observations
'Aerosol-Cool', -- 33919 observations
'Trach Mask', -- 32655 observations
'Hi Flow Neb', -- 14070 observations
'Non-Rebreather', -- 10856 observations
'Venti Mask', -- 4279 observations
'Medium Conc Mask', -- 2114 observations
'Vapotherm', -- 1655 observations
'T-Piece', -- 779 observations
'Hood', -- 670 observations
'Hut', -- 150 observations
'TranstrachealCat', -- 78 observations
'Heated Neb', -- 37 observations
'Ultrasonic Neb' -- 2 observations
) then 1
else 0
end
) as OxygenTherapy
, max(
case when itemid is null or value is null then 0
-- extubated indicates ventilation event has ended
when itemid = 640 and value = 'Extubated' then 1
when itemid = 640 and value = 'Self Extubation' then 1
else 0
end
)
as Extubated
, max(
case when itemid is null or value is null then 0
when itemid = 640 and value = 'Self Extubation' then 1
else 0
end
)
as SelfExtubated
from chartevents ce
where ce.value is not null
-- exclude rows marked as error
and (ce.error != 1 or ce.error IS NULL)
and itemid in
(
-- the below are settings used to indicate ventilation
720, 223849 -- vent mode
, 223848 -- vent type
, 445, 448, 449, 450, 1340, 1486, 1600, 224687 -- minute volume
, 639, 654, 681, 682, 683, 684,224685,224684,224686 -- tidal volume
, 218,436,535,444,224697,224695,224696,224746,224747 -- High/Low/Peak/Mean ("RespPressure")
, 221,1,1211,1655,2000,226873,224738,224419,224750,227187 -- Insp pressure
, 543 -- PlateauPressure
, 5865,5866,224707,224709,224705,224706 -- APRV pressure
, 60,437,505,506,686,220339,224700 -- PEEP
, 3459 -- high pressure relief
, 501,502,503,224702 -- PCV
, 223,667,668,669,670,671,672 -- TCPCV
, 224701 -- PSVlevel
-- the below are settings used to indicate extubation
, 640 -- extubated
-- the below indicate oxygen/NIV, i.e. the end of a mechanical vent event
, 468 -- O2 Delivery Device#2
, 469 -- O2 Delivery Mode
, 470 -- O2 Flow (lpm)
, 471 -- O2 Flow (lpm) #2
, 227287 -- O2 Flow (additional cannula)
, 226732 -- O2 Delivery Device(s)
, 223834 -- O2 Flow
-- used in both oxygen + vent calculation
, 467 -- O2 Delivery Device
)
group by icustay_id, charttime
UNION DISTINCT
-- add in the extubation flags from procedureevents_mv
-- note that we only need the start time for the extubation
-- (extubation is always charted as ending 1 minute after it started)
select
icustay_id, starttime as charttime
, 0 as MechVent
, 0 as OxygenTherapy
, 1 as Extubated
, case when itemid = 225468 then 1 else 0 end as SelfExtubated
from procedureevents_mv
where itemid in
(
227194 -- "Extubation"
, 225468 -- "Unplanned Extubation (patient-initiated)"
, 225477 -- "Unplanned Extubation (non-patient initiated)"
);

View File

@@ -0,0 +1,112 @@
-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY.
DROP TABLE IF EXISTS ventilation_durations; CREATE TABLE ventilation_durations AS
-- This query extracts the duration of mechanical ventilation
-- The main goal of the query is to aggregate sequential ventilator settings
-- into single mechanical ventilation "events". The start and end time of these
-- events can then be used for various purposes: calculating the total duration
-- of mechanical ventilation, cross-checking values (e.g. PaO2:FiO2 on vent), etc
-- The query's logic is roughly:
-- 1) The presence of a mechanical ventilation setting starts a new ventilation event
-- 2) Any instance of a setting in the next 8 hours continues the event
-- 3) Certain elements end the current ventilation event
-- a) documented extubation ends the current ventilation
-- b) initiation of non-invasive vent and/or oxygen ends the current vent
-- See the ventilation_classification.sql query for step 1 of the above.
-- This query has the logic for converting events into durations.
with vd0 as
(
select
icustay_id
-- this carries over the previous charttime which had a mechanical ventilation event
, case
when MechVent=1 then
LAG(CHARTTIME, 1) OVER (partition by icustay_id, MechVent order by charttime)
else null
end as charttime_lag
, charttime
, MechVent
, OxygenTherapy
, Extubated
, SelfExtubated
from ventilation_classification
)
, vd1 as
(
select
icustay_id
, charttime_lag
, charttime
, MechVent
, OxygenTherapy
, Extubated
, SelfExtubated
-- if this is a mechanical ventilation event, we calculate the time since the last event
, case
-- if the current observation indicates mechanical ventilation is present
-- calculate the time since the last vent event
when MechVent=1 then
DATETIME_DIFF(CHARTTIME, charttime_lag, 'MINUTE')/60
else null
end as ventduration
, LAG(Extubated,1)
OVER
(
partition by icustay_id, case when MechVent=1 or Extubated=1 then 1 else 0 end
order by charttime
) as ExtubatedLag
-- now we determine if the current mech vent event is a "new", i.e. they've just been intubated
, case
-- if there is an extubation flag, we mark any subsequent ventilation as a new ventilation event
--when Extubated = 1 then 0 -- extubation is *not* a new ventilation event, the *subsequent* row is
when
LAG(Extubated,1)
OVER
(
partition by icustay_id, case when MechVent=1 or Extubated=1 then 1 else 0 end
order by charttime
)
= 1 then 1
-- if patient has initiated oxygen therapy, and is not currently vented, start a newvent
when MechVent = 0 and OxygenTherapy = 1 then 1
-- if there is less than 8 hours between vent settings, we do not treat this as a new ventilation event
when CHARTTIME > DATETIME_ADD(charttime_lag, INTERVAL '8' HOUR)
then 1
else 0
end as newvent
-- use the staging table with only vent settings from chart events
FROM vd0 ventsettings
)
, vd2 as
(
select vd1.*
-- create a cumulative sum of the instances of new ventilation
-- this results in a monotonic integer assigned to each instance of ventilation
, case when MechVent=1 or Extubated = 1 then
SUM( newvent )
OVER ( partition by icustay_id order by charttime )
else null end
as ventnum
--- now we convert CHARTTIME of ventilator settings into durations
from vd1
)
-- create the durations for each mechanical ventilation instance
select icustay_id
-- regenerate ventnum so it's sequential
, ROW_NUMBER() over (partition by icustay_id order by ventnum) as ventnum
, min(charttime) as starttime
, max(charttime) as endtime
, DATETIME_DIFF(max(charttime), min(charttime), 'MINUTE')/60 AS duration_hours
from vd2
group by icustay_id, vd2.ventnum
having min(charttime) != max(charttime)
-- patient had to be mechanically ventilated at least once
-- i.e. max(mechvent) should be 1
-- this excludes a frequent situation of NIV/oxygen before intub
-- in these cases, ventnum=0 and max(mechvent)=0, so they are ignored
and max(mechvent) = 1
order by icustay_id, ventnum

View File

@@ -0,0 +1,207 @@
-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY.
DROP TABLE IF EXISTS weight_durations; CREATE TABLE weight_durations AS
-- This query extracts weights for adult ICU patients with start/stop times
-- if an admission weight is given, then this is assigned from intime to outtime
-- This query extracts weights for adult ICU patients with start/stop times
-- if an admission weight is given, then this is assigned from intime to outtime
WITH wt_neonate AS
(
SELECT c.icustay_id, c.charttime
, MAX(CASE WHEN c.itemid = 3580 THEN c.valuenum END) as wt_kg
, MAX(CASE WHEN c.itemid = 3581 THEN c.valuenum END) as wt_lb
, MAX(CASE WHEN c.itemid = 3582 THEN c.valuenum END) as wt_oz
FROM chartevents c
WHERE c.itemid in (3580, 3581, 3582)
AND c.icustay_id IS NOT NULL
AND COALESCE(c.error, 0) = 0
-- wt_oz/wt_lb/wt_kg are only 0 erroneously, so drop these rows
AND c.valuenum > 0
-- a separate query was run to manually verify only 1 value exists per
-- icustay_id/charttime/itemid grouping
-- therefore, we can use max() across itemid to collapse these values to 1 row per group
GROUP BY c.icustay_id, c.charttime
)
, birth_wt AS
(
SELECT c.icustay_id, c.charttime
, MAX(
CASE
WHEN c.itemid = 4183 THEN
-- clean free-text birth weight data
CASE
-- ignore value if there are any non-numeric characters
WHEN REGEXP_CONTAINS(c.value, '[^0-9\\.]') THEN NULL
-- convert grams to kd
WHEN CAST(c.value AS NUMERIC) > 100 THEN CAST(c.value AS NUMERIC)/1000
-- keep kg as is, filtering bad values (largest baby ever born was conveniently 9.98kg)
WHEN CAST(c.value AS NUMERIC) < 10 THEN CAST(c.value AS NUMERIC)
-- ignore other values (those between 10-100) - junk data
ELSE NULL END
-- itemid 3723 happily has all numeric data - also doesn't store any grams data
WHEN c.itemid = 3723 AND c.valuenum < 10 THEN c.valuenum
ELSE NULL END) as wt_kg
FROM chartevents c
WHERE c.itemid in (3723, 4183)
AND c.icustay_id IS NOT NULL
AND COALESCE(c.error, 0) = 0
-- a separate query was run to manually verify only 1 value exists per
-- icustay_id/charttime/itemid grouping
-- therefore, we can use max() across itemid to collapse these values to 1 row per group
GROUP BY c.icustay_id, c.charttime
)
, wt_stg as
(
SELECT
c.icustay_id
, c.charttime
, case when c.itemid in (762,226512) then 'admit'
else 'daily' end as weight_type
-- TODO: eliminate obvious outliers if there is a reasonable weight
, c.valuenum as weight
FROM chartevents c
WHERE c.valuenum IS NOT NULL
AND c.itemid in
(
762,226512 -- Admit Wt
, 763,224639 -- Daily Weight
)
AND c.icustay_id IS NOT NULL
AND c.valuenum > 0
-- exclude rows marked as error
AND COALESCE(c.error, 0) = 0
UNION ALL
SELECT
n.icustay_id
, n.charttime
, 'daily' AS weight_type
, CASE
WHEN wt_kg IS NOT NULL THEN wt_kg
WHEN wt_lb IS NOT NULL THEN wt_lb*0.45359237 + wt_oz*0.0283495231
ELSE NULL END AS weight
FROM wt_neonate n
UNION ALL
SELECT
b.icustay_id
, b.charttime
-- birth weight of neonates is treated as admission weight
, 'admit' AS weight_type
, wt_kg as weight
FROM birth_wt b
)
-- get more weights from echo - completes data for ~2500 patients
-- we only use echo data if there is *no* charted data
-- we impute the median echo weight for their entire ICU stay
, echo as
(
select
ie.icustay_id
, ec.charttime
, 'echo' AS weight_type
, 0.453592*ec.weight as weight
from icustays ie
inner join echo_data ec
on ie.hadm_id = ec.hadm_id
where ec.weight is not null
and ie.icustay_id not in (select distinct icustay_id from wt_stg)
)
, wt_stg0 AS
(
SELECT icustay_id, charttime, weight_type, weight
FROM wt_stg
UNION ALL
SELECT icustay_id, charttime, weight_type, weight
FROM echo
)
-- assign ascending row number
, wt_stg1 as
(
select
icustay_id
, charttime
, weight_type
, weight
, ROW_NUMBER() OVER (partition by icustay_id, weight_type order by charttime) as rn
from wt_stg0
WHERE weight IS NOT NULL
)
-- change charttime to intime for the first admission weight recorded
, wt_stg2 AS
(
SELECT
wt_stg1.icustay_id
, ie.intime, ie.outtime
, case when wt_stg1.weight_type = 'admit' and wt_stg1.rn = 1
then DATETIME_SUB(ie.intime, INTERVAL '2' HOUR)
else wt_stg1.charttime end as starttime
, wt_stg1.weight
from wt_stg1
INNER JOIN icustays ie
on ie.icustay_id = wt_stg1.icustay_id
)
, wt_stg3 as
(
select
icustay_id
, intime, outtime
, starttime
, coalesce(
LEAD(starttime) OVER (PARTITION BY icustay_id ORDER BY starttime),
DATETIME_ADD(GREATEST(outtime, starttime), INTERVAL '2' HOUR)
) as endtime
, weight
from wt_stg2
)
-- this table is the start/stop times from admit/daily weight in charted data
, wt1 as
(
select
icustay_id
, starttime
, coalesce(endtime,
LEAD(starttime) OVER (partition by icustay_id order by starttime),
-- impute ICU discharge as the end of the final weight measurement
-- plus a 2 hour "fuzziness" window
DATETIME_ADD(outtime, INTERVAL '2' HOUR)
) as endtime
, weight
from wt_stg3
)
-- if the intime for the patient is < the first charted daily weight
-- then we will have a "gap" at the start of their stay
-- to prevent this, we look for these gaps and backfill the first weight
-- this adds (153255-149657)=3598 rows, meaning this fix helps for up to 3598 icustay_id
, wt_fix as
(
select ie.icustay_id
-- we add a 2 hour "fuzziness" window
, DATETIME_SUB(ie.intime, INTERVAL '2' HOUR) as starttime
, wt.starttime as endtime
, wt.weight
from icustays ie
inner join
-- the below subquery returns one row for each unique icustay_id
-- the row contains: the first starttime and the corresponding weight
(
SELECT wt1.icustay_id, wt1.starttime, wt1.weight
, ROW_NUMBER() OVER (PARTITION BY wt1.icustay_id ORDER BY wt1.starttime) as rn
FROM wt1
) wt
ON ie.icustay_id = wt.icustay_id
AND wt.rn = 1
and ie.intime < wt.starttime
)
-- add the backfill rows to the main weight table
select
wt1.icustay_id
, wt1.starttime
, wt1.endtime
, wt1.weight
from wt1
UNION ALL
SELECT
wt_fix.icustay_id
, wt_fix.starttime
, wt_fix.endtime
, wt_fix.weight
from wt_fix

48
sql/echo_data.sql Normal file
View File

@@ -0,0 +1,48 @@
-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY.
DROP TABLE IF EXISTS echo_data; CREATE TABLE echo_data AS
-- This code extracts structured data from echocardiographies
-- You can join it to the text notes using ROW_ID
-- Just note that ROW_ID will differ across versions of MIMIC-III.
select ROW_ID
, subject_id, hadm_id
, chartdate
-- charttime is always null for echoes..
-- however, the time is available in the echo text, e.g.:
-- , substring(ne.text, 'Date/Time: [\[\]0-9*-]+ at ([0-9:]+)') as TIMESTAMP
-- we can therefore impute it and re-create charttime
, PARSE_DATETIME
(
'%Y-%m-%d%H:%M:%S',
FORMAT_DATE('%Y-%m-%d', chartdate)
|| REGEXP_EXTRACT(ne.text, 'Date/Time: .+? at ([0-9]+:[0-9]{2})')
|| ':00'
) AS charttime
-- explanation of below substring:
-- 'Indication: ' - matched verbatim
-- (.*?) - match any character
-- \n - the end of the line
-- substring only returns the item in ()s
-- note: the '?' makes it non-greedy. if you exclude it, it matches until it reaches the *last* \n
, REGEXP_EXTRACT(ne.text, 'Indication: (.*?)\n') as Indication
-- sometimes numeric values contain de-id text, e.g. [** Numeric Identifier **]
-- this removes that text
, cast(REGEXP_EXTRACT(ne.text, 'Height: \\x28in\\x29 ([0-9]+)') as numeric) as Height
, cast(REGEXP_EXTRACT(ne.text, 'Weight \\x28lb\\x29: ([0-9]+)\n') as numeric) as Weight
, cast(REGEXP_EXTRACT(ne.text, 'BSA \\x28m2\\x29: ([0-9]+) m2\n') as numeric) as BSA -- ends in 'm2'
, REGEXP_EXTRACT(ne.text, 'BP \\x28mm Hg\\x29: (.+)\n') as BP -- Sys/Dias
, cast(REGEXP_EXTRACT(ne.text, 'BP \\x28mm Hg\\x29: ([0-9]+)/[0-9]+?\n') as numeric) as BPSys -- first part of fraction
, cast(REGEXP_EXTRACT(ne.text, 'BP \\x28mm Hg\\x29: [0-9]+/([0-9]+?)\n') as numeric) as BPDias -- second part of fraction
, cast(REGEXP_EXTRACT(ne.text, 'HR \\x28bpm\\x29: ([0-9]+?)\n') as numeric) as HR
, REGEXP_EXTRACT(ne.text, 'Status: (.*?)\n') as Status
, REGEXP_EXTRACT(ne.text, 'Test: (.*?)\n') as Test
, REGEXP_EXTRACT(ne.text, 'Doppler: (.*?)\n') as Doppler
, REGEXP_EXTRACT(ne.text, 'Contrast: (.*?)\n') as Contrast
, REGEXP_EXTRACT(ne.text, 'Technical Quality: (.*?)\n') as TechnicalQuality
FROM noteevents ne
where category = 'Echo';

View File

@@ -0,0 +1,108 @@
-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY.
DROP TABLE IF EXISTS blood_gas_first_day; CREATE TABLE blood_gas_first_day AS
-- The aim of this query is to pivot entries related to blood gases and
-- chemistry values which were found in LABEVENTS
-- things to check:
-- when a mixed venous/arterial blood sample are taken at the same time, is the store time different?
with pvt as
( -- begin query that extracts the data
select ie.subject_id, ie.hadm_id, ie.icustay_id
-- here we assign labels to ITEMIDs
-- this also fuses together multiple ITEMIDs containing the same data
, case
when itemid = 50800 then 'SPECIMEN'
when itemid = 50801 then 'AADO2'
when itemid = 50802 then 'BASEEXCESS'
when itemid = 50803 then 'BICARBONATE'
when itemid = 50804 then 'TOTALCO2'
when itemid = 50805 then 'CARBOXYHEMOGLOBIN'
when itemid = 50806 then 'CHLORIDE'
when itemid = 50808 then 'CALCIUM'
when itemid = 50809 then 'GLUCOSE'
when itemid = 50810 then 'HEMATOCRIT'
when itemid = 50811 then 'HEMOGLOBIN'
when itemid = 50812 then 'INTUBATED'
when itemid = 50813 then 'LACTATE'
when itemid = 50814 then 'METHEMOGLOBIN'
when itemid = 50815 then 'O2FLOW'
when itemid = 50816 then 'FIO2'
when itemid = 50817 then 'SO2' -- OXYGENSATURATION
when itemid = 50818 then 'PCO2'
when itemid = 50819 then 'PEEP'
when itemid = 50820 then 'PH'
when itemid = 50821 then 'PO2'
when itemid = 50822 then 'POTASSIUM'
when itemid = 50823 then 'REQUIREDO2'
when itemid = 50824 then 'SODIUM'
when itemid = 50825 then 'TEMPERATURE'
when itemid = 50826 then 'TIDALVOLUME'
when itemid = 50827 then 'VENTILATIONRATE'
when itemid = 50828 then 'VENTILATOR'
else null
end as label
, charttime
, value
-- add in some sanity checks on the values
, case
when valuenum <= 0 and itemid != 50802 then null -- allow negative baseexcess
when itemid = 50810 and valuenum > 100 then null -- hematocrit
-- ensure FiO2 is a valid number between 21-100
-- mistakes are rare (<100 obs out of ~100,000)
-- there are 862 obs of valuenum == 20 - some people round down!
-- rather than risk imputing garbage data for FiO2, we simply NULL invalid values
when itemid = 50816 and valuenum < 20 then null
when itemid = 50816 and valuenum > 100 then null
when itemid = 50817 and valuenum > 100 then null -- O2 sat
when itemid = 50815 and valuenum > 70 then null -- O2 flow
when itemid = 50821 and valuenum > 800 then null -- PO2
-- conservative upper limit
else valuenum
end as valuenum
FROM icustays ie
left join labevents le
on le.subject_id = ie.subject_id and le.hadm_id = ie.hadm_id
and le.charttime between (DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)) and (DATETIME_ADD(ie.intime, INTERVAL '1' DAY))
and le.ITEMID in
-- blood gases
(
50800, 50801, 50802, 50803, 50804, 50805, 50806, 50807, 50808, 50809
, 50810, 50811, 50812, 50813, 50814, 50815, 50816, 50817, 50818, 50819
, 50820, 50821, 50822, 50823, 50824, 50825, 50826, 50827, 50828
, 51545
)
)
select pvt.SUBJECT_ID, pvt.HADM_ID, pvt.ICUSTAY_ID, pvt.CHARTTIME
, max(case when label = 'SPECIMEN' then value else null end) as specimen
, max(case when label = 'AADO2' then valuenum else null end) as aado2
, max(case when label = 'BASEEXCESS' then valuenum else null end) as baseexcess
, max(case when label = 'BICARBONATE' then valuenum else null end) as bicarbonate
, max(case when label = 'TOTALCO2' then valuenum else null end) as totalco2
, max(case when label = 'CARBOXYHEMOGLOBIN' then valuenum else null end) as carboxyhemoglobin
, max(case when label = 'CHLORIDE' then valuenum else null end) as chloride
, max(case when label = 'CALCIUM' then valuenum else null end) as calcium
, max(case when label = 'GLUCOSE' then valuenum else null end) as glucose
, max(case when label = 'HEMATOCRIT' then valuenum else null end) as hematocrit
, max(case when label = 'HEMOGLOBIN' then valuenum else null end) as hemoglobin
, max(case when label = 'INTUBATED' then valuenum else null end) as intubated
, max(case when label = 'LACTATE' then valuenum else null end) as lactate
, max(case when label = 'METHEMOGLOBIN' then valuenum else null end) as methemoglobin
, max(case when label = 'O2FLOW' then valuenum else null end) as o2flow
, max(case when label = 'FIO2' then valuenum else null end) as fio2
, max(case when label = 'SO2' then valuenum else null end) as so2 -- OXYGENSATURATION
, max(case when label = 'PCO2' then valuenum else null end) as pco2
, max(case when label = 'PEEP' then valuenum else null end) as peep
, max(case when label = 'PH' then valuenum else null end) as ph
, max(case when label = 'PO2' then valuenum else null end) as po2
, max(case when label = 'POTASSIUM' then valuenum else null end) as potassium
, max(case when label = 'REQUIREDO2' then valuenum else null end) as requiredo2
, max(case when label = 'SODIUM' then valuenum else null end) as sodium
, max(case when label = 'TEMPERATURE' then valuenum else null end) as temperature
, max(case when label = 'TIDALVOLUME' then valuenum else null end) as tidalvolume
, max(case when label = 'VENTILATIONRATE' then valuenum else null end) as ventilationrate
, max(case when label = 'VENTILATOR' then valuenum else null end) as ventilator
from pvt
group by pvt.subject_id, pvt.hadm_id, pvt.icustay_id, pvt.CHARTTIME
order by pvt.subject_id, pvt.hadm_id, pvt.icustay_id, pvt.CHARTTIME;

View File

@@ -0,0 +1,156 @@
-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY.
DROP TABLE IF EXISTS blood_gas_first_day_arterial; CREATE TABLE blood_gas_first_day_arterial AS
with stg_spo2 as
(
select subject_id, hadm_id, icustay_id, charttime
-- max here is just used to group SpO2 by charttime
, max(case when valuenum <= 0 or valuenum > 100 then null else valuenum end) as SpO2
FROM chartevents
-- o2 sat
where ITEMID in
(
646 -- SpO2
, 220277 -- O2 saturation pulseoxymetry
)
group by subject_id, hadm_id, icustay_id, charttime
)
, stg_fio2 as
(
select subject_id, hadm_id, icustay_id, charttime
-- pre-process the FiO2s to ensure they are between 21-100%
, max(
case
when itemid = 223835
then case
when valuenum > 0 and valuenum <= 1
then valuenum * 100
-- improperly input data - looks like O2 flow in litres
when valuenum > 1 and valuenum < 21
then null
when valuenum >= 21 and valuenum <= 100
then valuenum
else null end -- unphysiological
when itemid in (3420, 3422)
-- all these values are well formatted
then valuenum
when itemid = 190 and valuenum > 0.20 and valuenum < 1
-- well formatted but not in %
then valuenum * 100
else null end
) as fio2_chartevents
FROM chartevents
where ITEMID in
(
3420 -- FiO2
, 190 -- FiO2 set
, 223835 -- Inspired O2 Fraction (FiO2)
, 3422 -- FiO2 [measured]
)
-- exclude rows marked as error
AND (error IS NULL OR error = 0)
group by subject_id, hadm_id, icustay_id, charttime
)
, stg2 as
(
select bg.*
, ROW_NUMBER() OVER (partition by bg.icustay_id, bg.charttime order by s1.charttime DESC) as lastRowSpO2
, s1.spo2
from blood_gas_first_day bg
left join stg_spo2 s1
-- same patient
on bg.icustay_id = s1.icustay_id
-- spo2 occurred at most 2 hours before this blood gas
and s1.charttime >= DATETIME_SUB(bg.charttime, INTERVAL '2' HOUR)
and s1.charttime <= bg.charttime
where bg.po2 is not null
)
, stg3 as
(
select bg.*
, ROW_NUMBER() OVER (partition by bg.icustay_id, bg.charttime order by s2.charttime DESC) as lastRowFiO2
, s2.fio2_chartevents
-- create our specimen prediction
, 1/(1+exp(-(-0.02544
+ 0.04598 * po2
+ coalesce(-0.15356 * spo2 , -0.15356 * 97.49420 + 0.13429)
+ coalesce( 0.00621 * fio2_chartevents , 0.00621 * 51.49550 + -0.24958)
+ coalesce( 0.10559 * hemoglobin , 0.10559 * 10.32307 + 0.05954)
+ coalesce( 0.13251 * so2 , 0.13251 * 93.66539 + -0.23172)
+ coalesce(-0.01511 * pco2 , -0.01511 * 42.08866 + -0.01630)
+ coalesce( 0.01480 * fio2 , 0.01480 * 63.97836 + -0.31142)
+ coalesce(-0.00200 * aado2 , -0.00200 * 442.21186 + -0.01328)
+ coalesce(-0.03220 * bicarbonate , -0.03220 * 22.96894 + -0.06535)
+ coalesce( 0.05384 * totalco2 , 0.05384 * 24.72632 + -0.01405)
+ coalesce( 0.08202 * lactate , 0.08202 * 3.06436 + 0.06038)
+ coalesce( 0.10956 * ph , 0.10956 * 7.36233 + -0.00617)
+ coalesce( 0.00848 * o2flow , 0.00848 * 7.59362 + -0.35803)
))) as SPECIMEN_PROB
from stg2 bg
left join stg_fio2 s2
-- same patient
on bg.icustay_id = s2.icustay_id
-- fio2 occurred at most 4 hours before this blood gas
and s2.charttime between DATETIME_SUB(bg.charttime, INTERVAL '4' HOUR) and bg.charttime
where bg.lastRowSpO2 = 1 -- only the row with the most recent SpO2 (if no SpO2 found lastRowSpO2 = 1)
)
select subject_id, hadm_id,
icustay_id, charttime
, specimen -- raw data indicating sample type, only present 80% of the time
-- prediction of specimen for missing data
, case
when SPECIMEN is not null then SPECIMEN
when SPECIMEN_PROB > 0.75 then 'ART'
else null end as SPECIMEN_PRED
, specimen_prob
-- oxygen related parameters
, so2, spo2 -- note spo2 is FROM chartevents
, po2, pco2
, fio2_chartevents, fio2
, aado2
-- also calculate AADO2
, case
when PO2 is not null
and pco2 is not null
and coalesce(fio2, fio2_chartevents) is not null
-- multiple by 100 because FiO2 is in a % but should be a fraction
then (coalesce(fio2, fio2_chartevents)/100) * (760 - 47) - (pco2/0.8) - po2
else null
end as AADO2_calc
, case
when PO2 is not null and coalesce(fio2, fio2_chartevents) is not null
-- multiply by 100 because FiO2 is in a % but should be a fraction
then 100*PO2/(coalesce(fio2, fio2_chartevents))
else null
end as PaO2FiO2
-- acid-base parameters
, ph, baseexcess
, bicarbonate, totalco2
-- blood count parameters
, hematocrit
, hemoglobin
, carboxyhemoglobin
, methemoglobin
-- chemistry
, chloride, calcium
, temperature
, potassium, sodium
, lactate
, glucose
-- ventilation stuff that's sometimes input
, intubated, tidalvolume, ventilationrate, ventilator
, peep, o2flow
, requiredo2
from stg3
where lastRowFiO2 = 1 -- only the most recent FiO2
-- restrict it to *only* arterial samples
and (specimen = 'ART' or specimen_prob > 0.75)
order by icustay_id, charttime;

View File

@@ -0,0 +1,143 @@
-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY.
DROP TABLE IF EXISTS gcs_first_day; CREATE TABLE gcs_first_day AS
-- ITEMIDs used:
-- CAREVUE
-- 723 as GCSVerbal
-- 454 as GCSMotor
-- 184 as GCSEyes
-- METAVISION
-- 223900 GCS - Verbal Response
-- 223901 GCS - Motor Response
-- 220739 GCS - Eye Opening
-- The code combines the ITEMIDs into the carevue itemids, then pivots those
-- So 223900 is changed to 723, then the ITEMID 723 is pivoted to form GCSVerbal
-- Note:
-- The GCS for sedated patients is defaulted to 15 in this code.
-- This is in line with how the data is meant to be collected.
-- e.g., from the SAPS II publication:
-- For sedated patients, the Glasgow Coma Score before sedation was used.
-- This was ascertained either from interviewing the physician who ordered the sedation,
-- or by reviewing the patient's medical record.
with base as
(
SELECT pvt.ICUSTAY_ID
, pvt.charttime
-- Easier names - note we coalesced Metavision and CareVue IDs below
, max(case when pvt.itemid = 454 then pvt.valuenum else null end) as GCSMotor
, max(case when pvt.itemid = 723 then pvt.valuenum else null end) as GCSVerbal
, max(case when pvt.itemid = 184 then pvt.valuenum else null end) as GCSEyes
-- If verbal was set to 0 in the below select, then this is an intubated patient
, case
when max(case when pvt.itemid = 723 then pvt.valuenum else null end) = 0
then 1
else 0
end as EndoTrachFlag
, ROW_NUMBER ()
OVER (PARTITION BY pvt.ICUSTAY_ID ORDER BY pvt.charttime ASC) as rn
FROM (
select l.ICUSTAY_ID
-- merge the ITEMIDs so that the pivot applies to both metavision/carevue data
, case
when l.ITEMID in (723,223900) then 723
when l.ITEMID in (454,223901) then 454
when l.ITEMID in (184,220739) then 184
else l.ITEMID end
as ITEMID
-- convert the data into a number, reserving a value of 0 for ET/Trach
, case
-- endotrach/vent is assigned a value of 0, later parsed specially
when l.ITEMID = 723 and l.VALUE = '1.0 ET/Trach' then 0 -- carevue
when l.ITEMID = 223900 and l.VALUE = 'No Response-ETT' then 0 -- metavision
else VALUENUM
end
as VALUENUM
, l.CHARTTIME
FROM chartevents l
-- get intime for charttime subselection
inner join icustays b
on l.icustay_id = b.icustay_id
-- Isolate the desired GCS variables
where l.ITEMID in
(
-- 198 -- GCS
-- GCS components, CareVue
184, 454, 723
-- GCS components, Metavision
, 223900, 223901, 220739
)
-- Only get data for the first 24 hours
and l.charttime between b.intime and DATETIME_ADD(b.intime, INTERVAL '1' DAY)
-- exclude rows marked as error
AND (l.error IS NULL OR l.error = 0)
) pvt
group by pvt.ICUSTAY_ID, pvt.charttime
)
, gcs as (
select b.*
, b2.GCSVerbal as GCSVerbalPrev
, b2.GCSMotor as GCSMotorPrev
, b2.GCSEyes as GCSEyesPrev
-- Calculate GCS, factoring in special case when they are intubated and prev vals
-- note that the coalesce are used to implement the following if:
-- if current value exists, use it
-- if previous value exists, use it
-- otherwise, default to normal
, case
-- replace GCS during sedation with 15
when b.GCSVerbal = 0
then 15
when b.GCSVerbal is null and b2.GCSVerbal = 0
then 15
-- if previously they were intub, but they aren't now, do not use previous GCS values
when b2.GCSVerbal = 0
then
coalesce(b.GCSMotor,6)
+ coalesce(b.GCSVerbal,5)
+ coalesce(b.GCSEyes,4)
-- otherwise, add up score normally, imputing previous value if none available at current time
else
coalesce(b.GCSMotor,coalesce(b2.GCSMotor,6))
+ coalesce(b.GCSVerbal,coalesce(b2.GCSVerbal,5))
+ coalesce(b.GCSEyes,coalesce(b2.GCSEyes,4))
end as GCS
from base b
-- join to itself within 6 hours to get previous value
left join base b2
on b.ICUSTAY_ID = b2.ICUSTAY_ID and b.rn = b2.rn+1 and b2.charttime > DATETIME_SUB(b.charttime, INTERVAL '6' HOUR)
)
, gcs_final as (
select gcs.*
-- This sorts the data by GCS, so rn=1 is the the lowest GCS values to keep
, ROW_NUMBER ()
OVER (PARTITION BY gcs.ICUSTAY_ID
ORDER BY gcs.GCS
) as IsMinGCS
from gcs
)
select ie.subject_id, ie.hadm_id, ie.icustay_id
-- The minimum GCS is determined by the above row partition, we only join if IsMinGCS=1
, GCS as mingcs
, coalesce(GCSMotor,GCSMotorPrev) as gcsmotor
, coalesce(GCSVerbal,GCSVerbalPrev) as gcsverbal
, coalesce(GCSEyes,GCSEyesPrev) as gcseyes
, EndoTrachFlag as endotrachflag
-- subselect down to the cohort of eligible patients
FROM icustays ie
left join gcs_final gs
on ie.icustay_id = gs.icustay_id and gs.IsMinGCS = 1
ORDER BY ie.icustay_id;

View File

@@ -0,0 +1,155 @@
-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY.
DROP TABLE IF EXISTS labs_first_day; CREATE TABLE labs_first_day AS
-- This query pivots lab values taken in the first 24 hours of a patient's stay
-- Have already confirmed that the unit of measurement is always the same: null or the correct unit
SELECT
pvt.subject_id, pvt.hadm_id, pvt.icustay_id
, min(CASE WHEN label = 'ANION GAP' THEN valuenum ELSE NULL END) AS aniongap_min
, max(CASE WHEN label = 'ANION GAP' THEN valuenum ELSE NULL END) AS aniongap_max
, min(CASE WHEN label = 'ALBUMIN' THEN valuenum ELSE NULL END) AS albumin_min
, max(CASE WHEN label = 'ALBUMIN' THEN valuenum ELSE NULL END) AS albumin_max
, min(CASE WHEN label = 'BANDS' THEN valuenum ELSE NULL END) AS bands_min
, max(CASE WHEN label = 'BANDS' THEN valuenum ELSE NULL END) AS bands_max
, min(CASE WHEN label = 'BICARBONATE' THEN valuenum ELSE NULL END) AS bicarbonate_min
, max(CASE WHEN label = 'BICARBONATE' THEN valuenum ELSE NULL END) AS bicarbonate_max
, min(CASE WHEN label = 'BILIRUBIN' THEN valuenum ELSE NULL END) AS bilirubin_min
, max(CASE WHEN label = 'BILIRUBIN' THEN valuenum ELSE NULL END) AS bilirubin_max
, min(CASE WHEN label = 'CREATININE' THEN valuenum ELSE NULL END) AS creatinine_min
, max(CASE WHEN label = 'CREATININE' THEN valuenum ELSE NULL END) AS creatinine_max
, min(CASE WHEN label = 'CHLORIDE' THEN valuenum ELSE NULL END) AS chloride_min
, max(CASE WHEN label = 'CHLORIDE' THEN valuenum ELSE NULL END) AS chloride_max
, min(CASE WHEN label = 'GLUCOSE' THEN valuenum ELSE NULL END) AS glucose_min
, max(CASE WHEN label = 'GLUCOSE' THEN valuenum ELSE NULL END) AS glucose_max
, min(CASE WHEN label = 'HEMATOCRIT' THEN valuenum ELSE NULL END) AS hematocrit_min
, max(CASE WHEN label = 'HEMATOCRIT' THEN valuenum ELSE NULL END) AS hematocrit_max
, min(CASE WHEN label = 'HEMOGLOBIN' THEN valuenum ELSE NULL END) AS hemoglobin_min
, max(CASE WHEN label = 'HEMOGLOBIN' THEN valuenum ELSE NULL END) AS hemoglobin_max
, min(CASE WHEN label = 'LACTATE' THEN valuenum ELSE NULL END) AS lactate_min
, max(CASE WHEN label = 'LACTATE' THEN valuenum ELSE NULL END) AS lactate_max
, min(CASE WHEN label = 'PLATELET' THEN valuenum ELSE NULL END) AS platelet_min
, max(CASE WHEN label = 'PLATELET' THEN valuenum ELSE NULL END) AS platelet_max
, min(CASE WHEN label = 'POTASSIUM' THEN valuenum ELSE NULL END) AS potassium_min
, max(CASE WHEN label = 'POTASSIUM' THEN valuenum ELSE NULL END) AS potassium_max
, min(CASE WHEN label = 'PTT' THEN valuenum ELSE NULL END) AS ptt_min
, max(CASE WHEN label = 'PTT' THEN valuenum ELSE NULL END) AS ptt_max
, min(CASE WHEN label = 'INR' THEN valuenum ELSE NULL END) AS inr_min
, max(CASE WHEN label = 'INR' THEN valuenum ELSE NULL END) AS inr_max
, min(CASE WHEN label = 'PT' THEN valuenum ELSE NULL END) AS pt_min
, max(CASE WHEN label = 'PT' THEN valuenum ELSE NULL END) AS pt_max
, min(CASE WHEN label = 'SODIUM' THEN valuenum ELSE NULL END) AS sodium_min
, max(CASE WHEN label = 'SODIUM' THEN valuenum ELSE NULL END) AS sodium_max
, min(CASE WHEN label = 'BUN' THEN valuenum ELSE NULL END) AS bun_min
, max(CASE WHEN label = 'BUN' THEN valuenum ELSE NULL END) AS bun_max
, min(CASE WHEN label = 'WBC' THEN valuenum ELSE NULL END) AS wbc_min
, max(CASE WHEN label = 'WBC' THEN valuenum ELSE NULL END) AS wbc_max
FROM
( -- begin query that extracts the data
SELECT ie.subject_id, ie.hadm_id, ie.icustay_id
-- here we assign labels to ITEMIDs
-- this also fuses together multiple ITEMIDs containing the same data
, CASE
WHEN itemid = 50868 THEN 'ANION GAP'
WHEN itemid = 50862 THEN 'ALBUMIN'
WHEN itemid = 51144 THEN 'BANDS'
WHEN itemid = 50882 THEN 'BICARBONATE'
WHEN itemid = 50885 THEN 'BILIRUBIN'
WHEN itemid = 50912 THEN 'CREATININE'
WHEN itemid = 50806 THEN 'CHLORIDE'
WHEN itemid = 50902 THEN 'CHLORIDE'
WHEN itemid = 50809 THEN 'GLUCOSE'
WHEN itemid = 50931 THEN 'GLUCOSE'
WHEN itemid = 50810 THEN 'HEMATOCRIT'
WHEN itemid = 51221 THEN 'HEMATOCRIT'
WHEN itemid = 50811 THEN 'HEMOGLOBIN'
WHEN itemid = 51222 THEN 'HEMOGLOBIN'
WHEN itemid = 50813 THEN 'LACTATE'
WHEN itemid = 51265 THEN 'PLATELET'
WHEN itemid = 50822 THEN 'POTASSIUM'
WHEN itemid = 50971 THEN 'POTASSIUM'
WHEN itemid = 51275 THEN 'PTT'
WHEN itemid = 51237 THEN 'INR'
WHEN itemid = 51274 THEN 'PT'
WHEN itemid = 50824 THEN 'SODIUM'
WHEN itemid = 50983 THEN 'SODIUM'
WHEN itemid = 51006 THEN 'BUN'
WHEN itemid = 51300 THEN 'WBC'
WHEN itemid = 51301 THEN 'WBC'
ELSE null
END as label
, -- add in some sanity checks on the values
-- the where clause below requires all valuenum to be > 0, so these are only upper limit checks
CASE
WHEN itemid = 50862 and valuenum > 10 THEN null -- g/dL 'ALBUMIN'
WHEN itemid = 50868 and valuenum > 10000 THEN null -- mEq/L 'ANION GAP'
WHEN itemid = 51144 and valuenum < 0 THEN null -- immature band forms, %
WHEN itemid = 51144 and valuenum > 100 THEN null -- immature band forms, %
WHEN itemid = 50882 and valuenum > 10000 THEN null -- mEq/L 'BICARBONATE'
WHEN itemid = 50885 and valuenum > 150 THEN null -- mg/dL 'BILIRUBIN'
WHEN itemid = 50806 and valuenum > 10000 THEN null -- mEq/L 'CHLORIDE'
WHEN itemid = 50902 and valuenum > 10000 THEN null -- mEq/L 'CHLORIDE'
WHEN itemid = 50912 and valuenum > 150 THEN null -- mg/dL 'CREATININE'
WHEN itemid = 50809 and valuenum > 10000 THEN null -- mg/dL 'GLUCOSE'
WHEN itemid = 50931 and valuenum > 10000 THEN null -- mg/dL 'GLUCOSE'
WHEN itemid = 50810 and valuenum > 100 THEN null -- % 'HEMATOCRIT'
WHEN itemid = 51221 and valuenum > 100 THEN null -- % 'HEMATOCRIT'
WHEN itemid = 50811 and valuenum > 50 THEN null -- g/dL 'HEMOGLOBIN'
WHEN itemid = 51222 and valuenum > 50 THEN null -- g/dL 'HEMOGLOBIN'
WHEN itemid = 50813 and valuenum > 50 THEN null -- mmol/L 'LACTATE'
WHEN itemid = 51265 and valuenum > 10000 THEN null -- K/uL 'PLATELET'
WHEN itemid = 50822 and valuenum > 30 THEN null -- mEq/L 'POTASSIUM'
WHEN itemid = 50971 and valuenum > 30 THEN null -- mEq/L 'POTASSIUM'
WHEN itemid = 51275 and valuenum > 150 THEN null -- sec 'PTT'
WHEN itemid = 51237 and valuenum > 50 THEN null -- 'INR'
WHEN itemid = 51274 and valuenum > 150 THEN null -- sec 'PT'
WHEN itemid = 50824 and valuenum > 200 THEN null -- mEq/L == mmol/L 'SODIUM'
WHEN itemid = 50983 and valuenum > 200 THEN null -- mEq/L == mmol/L 'SODIUM'
WHEN itemid = 51006 and valuenum > 300 THEN null -- 'BUN'
WHEN itemid = 51300 and valuenum > 1000 THEN null -- 'WBC'
WHEN itemid = 51301 and valuenum > 1000 THEN null -- 'WBC'
ELSE le.valuenum
END as valuenum
FROM icustays ie
LEFT JOIN labevents le
ON le.subject_id = ie.subject_id AND le.hadm_id = ie.hadm_id
AND le.charttime BETWEEN (DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)) AND (DATETIME_ADD(ie.intime, INTERVAL '1' DAY))
AND le.ITEMID in
(
-- comment is: LABEL | CATEGORY | FLUID | NUMBER OF ROWS IN LABEVENTS
50868, -- ANION GAP | CHEMISTRY | BLOOD | 769895
50862, -- ALBUMIN | CHEMISTRY | BLOOD | 146697
51144, -- BANDS - hematology
50882, -- BICARBONATE | CHEMISTRY | BLOOD | 780733
50885, -- BILIRUBIN, TOTAL | CHEMISTRY | BLOOD | 238277
50912, -- CREATININE | CHEMISTRY | BLOOD | 797476
50902, -- CHLORIDE | CHEMISTRY | BLOOD | 795568
50806, -- CHLORIDE, WHOLE BLOOD | BLOOD GAS | BLOOD | 48187
50931, -- GLUCOSE | CHEMISTRY | BLOOD | 748981
50809, -- GLUCOSE | BLOOD GAS | BLOOD | 196734
51221, -- HEMATOCRIT | HEMATOLOGY | BLOOD | 881846
50810, -- HEMATOCRIT, CALCULATED | BLOOD GAS | BLOOD | 89715
51222, -- HEMOGLOBIN | HEMATOLOGY | BLOOD | 752523
50811, -- HEMOGLOBIN | BLOOD GAS | BLOOD | 89712
50813, -- LACTATE | BLOOD GAS | BLOOD | 187124
51265, -- PLATELET COUNT | HEMATOLOGY | BLOOD | 778444
50971, -- POTASSIUM | CHEMISTRY | BLOOD | 845825
50822, -- POTASSIUM, WHOLE BLOOD | BLOOD GAS | BLOOD | 192946
51275, -- PTT | HEMATOLOGY | BLOOD | 474937
51237, -- INR(PT) | HEMATOLOGY | BLOOD | 471183
51274, -- PT | HEMATOLOGY | BLOOD | 469090
50983, -- SODIUM | CHEMISTRY | BLOOD | 808489
50824, -- SODIUM, WHOLE BLOOD | BLOOD GAS | BLOOD | 71503
51006, -- UREA NITROGEN | CHEMISTRY | BLOOD | 791925
51301, -- WHITE BLOOD CELLS | HEMATOLOGY | BLOOD | 753301
51300 -- WBC COUNT | HEMATOLOGY | BLOOD | 2371
)
AND valuenum IS NOT null AND valuenum > 0 -- lab values cannot be 0 and cannot be negative
) pvt
GROUP BY pvt.subject_id, pvt.hadm_id, pvt.icustay_id
ORDER BY pvt.subject_id, pvt.hadm_id, pvt.icustay_id;

View File

@@ -0,0 +1,58 @@
-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY.
DROP TABLE IF EXISTS urine_output_first_day; CREATE TABLE urine_output_first_day AS
-- ------------------------------------------------------------------
-- Purpose: Create a view of the urine output for each ICUSTAY_ID over the first 24 hours.
-- ------------------------------------------------------------------
select
-- patient identifiers
ie.subject_id, ie.hadm_id, ie.icustay_id
-- volumes associated with urine output ITEMIDs
, sum(
-- we consider input of GU irrigant as a negative volume
case
when oe.itemid = 227488 and oe.value > 0 then -1*oe.value
else oe.value
end) as urineoutput
FROM icustays ie
-- Join to the outputevents table to get urine output
left join outputevents oe
-- join on all patient identifiers
on ie.subject_id = oe.subject_id and ie.hadm_id = oe.hadm_id and ie.icustay_id = oe.icustay_id
-- and ensure the data occurs during the first day
and oe.charttime between ie.intime and (DATETIME_ADD(ie.intime, INTERVAL '1' DAY)) -- first ICU day
where itemid in
(
-- these are the most frequently occurring urine output observations in CareVue
40055, -- "Urine Out Foley"
43175, -- "Urine ."
40069, -- "Urine Out Void"
40094, -- "Urine Out Condom Cath"
40715, -- "Urine Out Suprapubic"
40473, -- "Urine Out IleoConduit"
40085, -- "Urine Out Incontinent"
40057, -- "Urine Out Rt Nephrostomy"
40056, -- "Urine Out Lt Nephrostomy"
40405, -- "Urine Out Other"
40428, -- "Urine Out Straight Cath"
40086,-- Urine Out Incontinent
40096, -- "Urine Out Ureteral Stent #1"
40651, -- "Urine Out Ureteral Stent #2"
-- these are the most frequently occurring urine output observations in MetaVision
226559, -- "Foley"
226560, -- "Void"
226561, -- "Condom Cath"
226584, -- "Ileoconduit"
226563, -- "Suprapubic"
226564, -- "R Nephrostomy"
226565, -- "L Nephrostomy"
226567, -- Straight Cath
226557, -- R Ureteral Stent
226558, -- L Ureteral Stent
227488, -- GU Irrigant Volume In
227489 -- GU Irrigant/Urine Volume Out
)
group by ie.subject_id, ie.hadm_id, ie.icustay_id
order by ie.subject_id, ie.hadm_id, ie.icustay_id;

View File

@@ -0,0 +1,120 @@
-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY.
DROP TABLE IF EXISTS vitals_first_day; CREATE TABLE vitals_first_day AS
-- This query pivots the vital signs for the first 24 hours of a patient's stay
-- Vital signs include heart rate, blood pressure, respiration rate, and temperature
SELECT pvt.subject_id, pvt.hadm_id, pvt.icustay_id
-- Easier names
, min(case when VitalID = 1 then valuenum ELSE NULL END) AS heartrate_min
, max(case when VitalID = 1 then valuenum ELSE NULL END) AS heartrate_max
, avg(case when VitalID = 1 then valuenum ELSE NULL END) AS heartrate_mean
, min(case when VitalID = 2 then valuenum ELSE NULL END) AS sysbp_min
, max(case when VitalID = 2 then valuenum ELSE NULL END) AS sysbp_max
, avg(case when VitalID = 2 then valuenum ELSE NULL END) AS sysbp_mean
, min(case when VitalID = 3 then valuenum ELSE NULL END) AS diasbp_min
, max(case when VitalID = 3 then valuenum ELSE NULL END) AS diasbp_max
, avg(case when VitalID = 3 then valuenum ELSE NULL END) AS diasbp_mean
, min(case when VitalID = 4 then valuenum ELSE NULL END) AS meanbp_min
, max(case when VitalID = 4 then valuenum ELSE NULL END) AS meanbp_max
, avg(case when VitalID = 4 then valuenum ELSE NULL END) AS meanbp_mean
, min(case when VitalID = 5 then valuenum ELSE NULL END) AS resprate_min
, max(case when VitalID = 5 then valuenum ELSE NULL END) AS resprate_max
, avg(case when VitalID = 5 then valuenum ELSE NULL END) AS resprate_mean
, min(case when VitalID = 6 then valuenum ELSE NULL END) AS tempc_min
, max(case when VitalID = 6 then valuenum ELSE NULL END) AS tempc_max
, avg(case when VitalID = 6 then valuenum ELSE NULL END) AS tempc_mean
, min(case when VitalID = 7 then valuenum ELSE NULL END) AS spo2_min
, max(case when VitalID = 7 then valuenum ELSE NULL END) AS spo2_max
, avg(case when VitalID = 7 then valuenum ELSE NULL END) AS spo2_mean
, min(case when VitalID = 8 then valuenum ELSE NULL END) AS glucose_min
, max(case when VitalID = 8 then valuenum ELSE NULL END) AS glucose_max
, avg(case when VitalID = 8 then valuenum ELSE NULL END) AS glucose_mean
FROM (
select ie.subject_id, ie.hadm_id, ie.icustay_id
, case
when itemid in (211,220045) and valuenum > 0 and valuenum < 300 then 1 -- HeartRate
when itemid in (51,442,455,6701,220179,220050) and valuenum > 0 and valuenum < 400 then 2 -- SysBP
when itemid in (8368,8440,8441,8555,220180,220051) and valuenum > 0 and valuenum < 300 then 3 -- DiasBP
when itemid in (456,52,6702,443,220052,220181,225312) and valuenum > 0 and valuenum < 300 then 4 -- MeanBP
when itemid in (615,618,220210,224690) and valuenum > 0 and valuenum < 70 then 5 -- RespRate
when itemid in (223761,678) and valuenum > 70 and valuenum < 120 then 6 -- TempF, converted to degC in valuenum call
when itemid in (223762,676) and valuenum > 10 and valuenum < 50 then 6 -- TempC
when itemid in (646,220277) and valuenum > 0 and valuenum <= 100 then 7 -- SpO2
when itemid in (807,811,1529,3745,3744,225664,220621,226537) and valuenum > 0 then 8 -- Glucose
else null end as vitalid
-- convert F to C
, case when itemid in (223761,678) then (valuenum-32)/1.8 else valuenum end as valuenum
from icustays ie
left join chartevents ce
on ie.icustay_id = ce.icustay_id
and ce.charttime between ie.intime and DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
and DATETIME_DIFF(ce.charttime, ie.intime, 'SECOND') > 0
and DATETIME_DIFF(ce.charttime, ie.intime, 'HOUR') <= 24
-- exclude rows marked as error
and (ce.error IS NULL or ce.error = 0)
where ce.itemid in
(
-- HEART RATE
211, --"Heart Rate"
220045, --"Heart Rate"
-- Systolic/diastolic
51, -- Arterial BP [Systolic]
442, -- Manual BP [Systolic]
455, -- NBP [Systolic]
6701, -- Arterial BP #2 [Systolic]
220179, -- Non Invasive Blood Pressure systolic
220050, -- Arterial Blood Pressure systolic
8368, -- Arterial BP [Diastolic]
8440, -- Manual BP [Diastolic]
8441, -- NBP [Diastolic]
8555, -- Arterial BP #2 [Diastolic]
220180, -- Non Invasive Blood Pressure diastolic
220051, -- Arterial Blood Pressure diastolic
-- MEAN ARTERIAL PRESSURE
456, --"NBP Mean"
52, --"Arterial BP Mean"
6702, -- Arterial BP Mean #2
443, -- Manual BP Mean(calc)
220052, --"Arterial Blood Pressure mean"
220181, --"Non Invasive Blood Pressure mean"
225312, --"ART BP mean"
-- RESPIRATORY RATE
618,-- Respiratory Rate
615,-- Resp Rate (Total)
220210,-- Respiratory Rate
224690, -- Respiratory Rate (Total)
-- SPO2, peripheral
646, 220277,
-- GLUCOSE, both lab and fingerstick
807,-- Fingerstick Glucose
811,-- Glucose (70-105)
1529,-- Glucose
3745,-- BloodGlucose
3744,-- Blood Glucose
225664,-- Glucose finger stick
220621,-- Glucose (serum)
226537,-- Glucose (whole blood)
-- TEMPERATURE
223762, -- "Temperature Celsius"
676, -- "Temperature C"
223761, -- "Temperature Fahrenheit"
678 -- "Temperature F"
)
) pvt
group by pvt.subject_id, pvt.hadm_id, pvt.icustay_id
order by pvt.subject_id, pvt.hadm_id, pvt.icustay_id;

View File

@@ -0,0 +1,45 @@
-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY.
DROP TABLE IF EXISTS urine_output; CREATE TABLE urine_output AS
-- First we drop the table if it exists
select oe.icustay_id, oe.charttime
, SUM(
-- we consider input of GU irrigant as a negative volume
case when oe.itemid = 227488 then -1*value
else value end
) as value
from outputevents oe
where oe.itemid in
(
-- these are the most frequently occurring urine output observations in CareVue
40055, -- "Urine Out Foley"
43175, -- "Urine ."
40069, -- "Urine Out Void"
40094, -- "Urine Out Condom Cath"
40715, -- "Urine Out Suprapubic"
40473, -- "Urine Out IleoConduit"
40085, -- "Urine Out Incontinent"
40057, -- "Urine Out Rt Nephrostomy"
40056, -- "Urine Out Lt Nephrostomy"
40405, -- "Urine Out Other"
40428, -- "Urine Out Straight Cath"
40086,-- Urine Out Incontinent
40096, -- "Urine Out Ureteral Stent #1"
40651, -- "Urine Out Ureteral Stent #2"
-- these are the most frequently occurring urine output observations in MetaVision
226559, -- "Foley"
226560, -- "Void"
226561, -- "Condom Cath"
226584, -- "Ileoconduit"
226563, -- "Suprapubic"
226564, -- "R Nephrostomy"
226565, -- "L Nephrostomy"
226567, -- Straight Cath
226557, -- R Ureteral Stent
226558, -- L Ureteral Stent
227488, -- GU Irrigant Volume In
227489 -- GU Irrigant/Urine Volume Out
)
and oe.value < 5000 -- sanity check on urine value
and oe.icustay_id is not null
group by icustay_id, charttime;

163
sql/postgres-functions.sql Normal file
View File

@@ -0,0 +1,163 @@
-- (Optional): set the search_path so all functions are generated on the mimiciii schema
-- SET search_path TO mimiciii;
CREATE OR REPLACE FUNCTION REGEXP_EXTRACT(str TEXT, pattern TEXT) RETURNS TEXT AS $$
BEGIN
RETURN substring(str from pattern);
END; $$
LANGUAGE PLPGSQL;
CREATE OR REPLACE FUNCTION REGEXP_CONTAINS(str TEXT, pattern TEXT) RETURNS BOOL AS $$
BEGIN
RETURN str ~ pattern;
END; $$
LANGUAGE PLPGSQL;
-- alias generate_series with generate_array
CREATE OR REPLACE FUNCTION GENERATE_ARRAY(i INTEGER, j INTEGER)
RETURNS setof INTEGER language sql as $$
SELECT GENERATE_SERIES(i, j)
$$;
-- datetime functions
CREATE OR REPLACE FUNCTION DATETIME(dt DATE) RETURNS TIMESTAMP(3) AS $$
BEGIN
RETURN CAST(dt AS TIMESTAMP(3));
END; $$
LANGUAGE PLPGSQL;
CREATE OR REPLACE FUNCTION DATETIME(year INTEGER, month INTEGER, day INTEGER, hour INTEGER, minute INTEGER, second INTEGER) RETURNS TIMESTAMP(3) AS $$
BEGIN
RETURN TO_TIMESTAMP(
TO_CHAR(year, '0000') || TO_CHAR(month, '00') || TO_CHAR(day, '00') || TO_CHAR(hour, '00') || TO_CHAR(minute, '00') || TO_CHAR(second, '00'),
'yyyymmddHH24MISS'
);
END; $$
LANGUAGE PLPGSQL;
-- note: in bigquery, `INTERVAL 1 YEAR` is a valid interval
-- but in postgres, it must be `INTERVAL '1' YEAR`
-- DATETIME_ADD(datetime, INTERVAL 'n' DATEPART) -> datetime + INTERVAL 'n' DATEPART
CREATE OR REPLACE FUNCTION DATETIME_ADD(datetime_val TIMESTAMP(3), intvl INTERVAL) RETURNS TIMESTAMP(3) AS $$
BEGIN
RETURN datetime_val + intvl;
END; $$
LANGUAGE PLPGSQL;
CREATE OR REPLACE FUNCTION DATE_ADD(dt DATE, intvl INTERVAL) RETURNS TIMESTAMP(3) AS $$
BEGIN
RETURN CAST(dt AS TIMESTAMP(3)) + intvl;
END; $$
LANGUAGE PLPGSQL;
-- DATETIME_SUB(datetime, INTERVAL 'n' DATEPART) -> datetime - INTERVAL 'n' DATEPART
CREATE OR REPLACE FUNCTION DATETIME_SUB(datetime_val TIMESTAMP(3), intvl INTERVAL) RETURNS TIMESTAMP(3) AS $$
BEGIN
RETURN datetime_val - intvl;
END; $$
LANGUAGE PLPGSQL;
CREATE OR REPLACE FUNCTION DATE_SUB(dt DATE, intvl INTERVAL) RETURNS TIMESTAMP(3) AS $$
BEGIN
RETURN CAST(dt AS TIMESTAMP(3)) - intvl;
END; $$
LANGUAGE PLPGSQL;
-- TODO:
-- DATETIME_TRUNC(datetime, PART) -> DATE_TRUNC('datepart', datetime)
-- below requires a regex to convert datepart from primitive to a string
-- i.e. encapsulate it in single quotes
CREATE OR REPLACE FUNCTION DATETIME_DIFF(endtime TIMESTAMP(3), starttime TIMESTAMP(3), datepart TEXT) RETURNS NUMERIC AS $$
BEGIN
RETURN
EXTRACT(EPOCH FROM endtime - starttime) /
CASE
WHEN datepart = 'SECOND' THEN 1.0
WHEN datepart = 'MINUTE' THEN 60.0
WHEN datepart = 'HOUR' THEN 3600.0
WHEN datepart = 'DAY' THEN 24*3600.0
WHEN datepart = 'YEAR' THEN 365.242*24*3600.0
ELSE NULL END;
END; $$
LANGUAGE PLPGSQL;
-- BigQuery has a custom data type, PART
-- It's difficult to replicate this in postgresql, which recognizes the PART as a column name,
-- unless it is within an EXTRACT() function.
CREATE OR REPLACE FUNCTION BIGQUERY_FORMAT_TO_PSQL(format_str VARCHAR(255)) RETURNS TEXT AS $$
BEGIN
RETURN
-- use replace to convert BigQuery string format to postgres string format
-- only handles a few cases since we don't extensively use this function
REPLACE(
REPLACE(
REPLACE(
REPLACE(
REPLACE(
REPLACE(
format_str
, '%S', 'SS'
)
, '%M', 'MI'
)
, '%H', 'HH24'
)
, '%d', 'dd'
)
, '%m', 'mm'
)
, '%Y', 'yyyy'
)
;
END; $$
LANGUAGE PLPGSQL;
CREATE OR REPLACE FUNCTION FORMAT_DATE(format_str VARCHAR(255), datetime_val TIMESTAMP(3)) RETURNS TEXT AS $$
BEGIN
RETURN TO_CHAR(
datetime_val,
-- use replace to convert BigQuery string format to postgres string format
-- only handles a few cases since we don't extensively use this function
BIGQUERY_FORMAT_TO_PSQL(format_str)
);
END; $$
LANGUAGE PLPGSQL;
CREATE OR REPLACE FUNCTION PARSE_DATE(format_str VARCHAR(255), string_val VARCHAR(255)) RETURNS DATE AS $$
BEGIN
RETURN TO_DATE(
string_val,
-- use replace to convert BigQuery string format to postgres string format
-- only handles a few cases since we don't extensively use this function
BIGQUERY_FORMAT_TO_PSQL(format_str)
);
END; $$
LANGUAGE PLPGSQL;
CREATE OR REPLACE FUNCTION FORMAT_DATETIME(format_str VARCHAR(255), datetime_val TIMESTAMP(3)) RETURNS TEXT AS $$
BEGIN
RETURN TO_CHAR(
datetime_val,
-- use replace to convert BigQuery string format to postgres string format
-- only handles a few cases since we don't extensively use this function
BIGQUERY_FORMAT_TO_PSQL(format_str)
);
END; $$
LANGUAGE PLPGSQL;
CREATE OR REPLACE FUNCTION PARSE_DATETIME(format_str VARCHAR(255), string_val VARCHAR(255)) RETURNS TIMESTAMP(3) AS $$
BEGIN
RETURN TO_TIMESTAMP(
string_val,
-- use replace to convert BigQuery string format to postgres string format
-- only handles a few cases since we don't extensively use this function
BIGQUERY_FORMAT_TO_PSQL(format_str)
);
END; $$
LANGUAGE PLPGSQL;

605
sql/schemas.sql Normal file
View File

@@ -0,0 +1,605 @@
-- ------------------------------------------------------------------
-- Reference CREATE TABLE schemas for every derived table produced by
-- sql/build_sapsii.sql
-- sql/build_sepsis3.sql
--
-- This file is documentation only. The actual build scripts use
-- `DROP TABLE IF EXISTS ...; CREATE TABLE ... AS SELECT ...`, so
-- column types are inferred by PostgreSQL at build time from the
-- MIMIC-III v1.3 base schema and from the expressions in the SELECT.
-- The types below match what PostgreSQL infers when the build is run
-- on a stock MIMIC-III v1.3 PostgreSQL restore (where for example
-- chartevents.valuenum is DOUBLE PRECISION, outputevents.value is
-- DOUBLE PRECISION, *.charttime is TIMESTAMP(0), etc.).
--
-- Use this file as:
-- * a quick reference for column names and types of each derived
-- table (handy for downstream consumers that need to know the
-- output schema without grep'ing through the build SQL);
-- * a stub for declaring empty derived tables ahead of time (e.g.
-- in a migration that just `CREATE TABLE IF NOT EXISTS ...`s
-- them, then later runs the build to populate them);
-- * a checklist when porting these scripts to another flavor of
-- MIMIC (e.g. MIMIC-III v1.4 or MIMIC-IV).
-- ------------------------------------------------------------------
-- ==================================================================
-- SAPS-II
-- ==================================================================
-- 1. Helper: all-time urine output (from outputevents).
DROP TABLE IF EXISTS urine_output;
CREATE TABLE urine_output (
icustay_id INTEGER,
charttime TIMESTAMP(0),
value DOUBLE PRECISION
);
-- 2. Ventilation: classification (per charttime) and durations
-- (per ventilation episode).
DROP TABLE IF EXISTS ventilation_classification;
CREATE TABLE ventilation_classification (
icustay_id INTEGER,
charttime TIMESTAMP(0),
mechvent INTEGER,
oxygentherapy INTEGER,
extubated INTEGER,
selfextubated INTEGER
);
DROP TABLE IF EXISTS ventilation_durations;
CREATE TABLE ventilation_durations (
icustay_id INTEGER,
ventnum BIGINT,
starttime TIMESTAMP(0),
endtime TIMESTAMP(0),
duration_hours NUMERIC
);
-- 3. First-day pivots feeding SAPS-II.
DROP TABLE IF EXISTS blood_gas_first_day;
CREATE TABLE blood_gas_first_day (
subject_id INTEGER,
hadm_id INTEGER,
icustay_id INTEGER,
charttime TIMESTAMP(0),
specimen VARCHAR(200),
aado2 DOUBLE PRECISION,
baseexcess DOUBLE PRECISION,
bicarbonate DOUBLE PRECISION,
totalco2 DOUBLE PRECISION,
carboxyhemoglobin DOUBLE PRECISION,
chloride DOUBLE PRECISION,
calcium DOUBLE PRECISION,
glucose DOUBLE PRECISION,
hematocrit DOUBLE PRECISION,
hemoglobin DOUBLE PRECISION,
intubated DOUBLE PRECISION,
lactate DOUBLE PRECISION,
methemoglobin DOUBLE PRECISION,
o2flow DOUBLE PRECISION,
fio2 DOUBLE PRECISION,
so2 DOUBLE PRECISION,
pco2 DOUBLE PRECISION,
peep DOUBLE PRECISION,
ph DOUBLE PRECISION,
po2 DOUBLE PRECISION,
potassium DOUBLE PRECISION,
requiredo2 DOUBLE PRECISION,
sodium DOUBLE PRECISION,
temperature DOUBLE PRECISION,
tidalvolume DOUBLE PRECISION,
ventilationrate DOUBLE PRECISION,
ventilator DOUBLE PRECISION
);
DROP TABLE IF EXISTS blood_gas_first_day_arterial;
CREATE TABLE blood_gas_first_day_arterial (
subject_id INTEGER,
hadm_id INTEGER,
icustay_id INTEGER,
charttime TIMESTAMP(0),
specimen VARCHAR(200),
specimen_pred VARCHAR(200),
specimen_prob DOUBLE PRECISION,
so2 DOUBLE PRECISION,
spo2 DOUBLE PRECISION,
po2 DOUBLE PRECISION,
pco2 DOUBLE PRECISION,
fio2_chartevents DOUBLE PRECISION,
fio2 DOUBLE PRECISION,
aado2 DOUBLE PRECISION,
aado2_calc DOUBLE PRECISION,
pao2fio2 DOUBLE PRECISION,
ph DOUBLE PRECISION,
baseexcess DOUBLE PRECISION,
bicarbonate DOUBLE PRECISION,
totalco2 DOUBLE PRECISION,
hematocrit DOUBLE PRECISION,
hemoglobin DOUBLE PRECISION,
carboxyhemoglobin DOUBLE PRECISION,
methemoglobin DOUBLE PRECISION,
chloride DOUBLE PRECISION,
calcium DOUBLE PRECISION,
temperature DOUBLE PRECISION,
potassium DOUBLE PRECISION,
sodium DOUBLE PRECISION,
lactate DOUBLE PRECISION,
glucose DOUBLE PRECISION,
intubated DOUBLE PRECISION,
tidalvolume DOUBLE PRECISION,
ventilationrate DOUBLE PRECISION,
ventilator DOUBLE PRECISION,
peep DOUBLE PRECISION,
o2flow DOUBLE PRECISION,
requiredo2 DOUBLE PRECISION
);
DROP TABLE IF EXISTS gcs_first_day;
CREATE TABLE gcs_first_day (
subject_id INTEGER,
hadm_id INTEGER,
icustay_id INTEGER,
mingcs DOUBLE PRECISION,
gcsmotor DOUBLE PRECISION,
gcsverbal DOUBLE PRECISION,
gcseyes DOUBLE PRECISION,
endotrachflag INTEGER
);
DROP TABLE IF EXISTS labs_first_day;
CREATE TABLE labs_first_day (
subject_id INTEGER,
hadm_id INTEGER,
icustay_id INTEGER,
aniongap_min DOUBLE PRECISION,
aniongap_max DOUBLE PRECISION,
albumin_min DOUBLE PRECISION,
albumin_max DOUBLE PRECISION,
bands_min DOUBLE PRECISION,
bands_max DOUBLE PRECISION,
bicarbonate_min DOUBLE PRECISION,
bicarbonate_max DOUBLE PRECISION,
bilirubin_min DOUBLE PRECISION,
bilirubin_max DOUBLE PRECISION,
creatinine_min DOUBLE PRECISION,
creatinine_max DOUBLE PRECISION,
chloride_min DOUBLE PRECISION,
chloride_max DOUBLE PRECISION,
glucose_min DOUBLE PRECISION,
glucose_max DOUBLE PRECISION,
hematocrit_min DOUBLE PRECISION,
hematocrit_max DOUBLE PRECISION,
hemoglobin_min DOUBLE PRECISION,
hemoglobin_max DOUBLE PRECISION,
lactate_min DOUBLE PRECISION,
lactate_max DOUBLE PRECISION,
platelet_min DOUBLE PRECISION,
platelet_max DOUBLE PRECISION,
potassium_min DOUBLE PRECISION,
potassium_max DOUBLE PRECISION,
ptt_min DOUBLE PRECISION,
ptt_max DOUBLE PRECISION,
inr_min DOUBLE PRECISION,
inr_max DOUBLE PRECISION,
pt_min DOUBLE PRECISION,
pt_max DOUBLE PRECISION,
sodium_min DOUBLE PRECISION,
sodium_max DOUBLE PRECISION,
bun_min DOUBLE PRECISION,
bun_max DOUBLE PRECISION,
wbc_min DOUBLE PRECISION,
wbc_max DOUBLE PRECISION
);
DROP TABLE IF EXISTS urine_output_first_day;
CREATE TABLE urine_output_first_day (
subject_id INTEGER,
hadm_id INTEGER,
icustay_id INTEGER,
urineoutput DOUBLE PRECISION
);
DROP TABLE IF EXISTS vitals_first_day;
CREATE TABLE vitals_first_day (
subject_id INTEGER,
hadm_id INTEGER,
icustay_id INTEGER,
heartrate_min DOUBLE PRECISION,
heartrate_max DOUBLE PRECISION,
heartrate_mean DOUBLE PRECISION,
sysbp_min DOUBLE PRECISION,
sysbp_max DOUBLE PRECISION,
sysbp_mean DOUBLE PRECISION,
diasbp_min DOUBLE PRECISION,
diasbp_max DOUBLE PRECISION,
diasbp_mean DOUBLE PRECISION,
meanbp_min DOUBLE PRECISION,
meanbp_max DOUBLE PRECISION,
meanbp_mean DOUBLE PRECISION,
resprate_min DOUBLE PRECISION,
resprate_max DOUBLE PRECISION,
resprate_mean DOUBLE PRECISION,
tempc_min DOUBLE PRECISION,
tempc_max DOUBLE PRECISION,
tempc_mean DOUBLE PRECISION,
spo2_min DOUBLE PRECISION,
spo2_max DOUBLE PRECISION,
spo2_mean DOUBLE PRECISION,
glucose_min DOUBLE PRECISION,
glucose_max DOUBLE PRECISION,
glucose_mean DOUBLE PRECISION
);
-- 4. Final SAPS-II score table (one row per ICU stay).
DROP TABLE IF EXISTS sapsii;
CREATE TABLE sapsii (
subject_id INTEGER,
hadm_id INTEGER,
icustay_id INTEGER,
sapsii INTEGER,
sapsii_prob DOUBLE PRECISION,
age_score INTEGER,
hr_score INTEGER,
sysbp_score INTEGER,
temp_score INTEGER,
pao2fio2_score INTEGER,
uo_score INTEGER,
bun_score INTEGER,
wbc_score INTEGER,
potassium_score INTEGER,
sodium_score INTEGER,
bicarbonate_score INTEGER,
bilirubin_score INTEGER,
gcs_score INTEGER,
comorbidity_score INTEGER,
admissiontype_score INTEGER
);
-- ==================================================================
-- Sepsis-3
-- ==================================================================
--
-- Sepsis-3 reuses these SAPS-II tables:
-- urine_output, ventilation_classification, ventilation_durations
-- (defined above). The tables below are the ones added by
-- build_sepsis3.sql.
-- 1. Echo extraction (used to impute weight when chartevents weight
-- is missing; also keyed by ROW_ID to the noteevents row).
DROP TABLE IF EXISTS echo_data;
CREATE TABLE echo_data (
row_id INTEGER,
subject_id INTEGER,
hadm_id INTEGER,
chartdate TIMESTAMP(0),
charttime TIMESTAMP(3),
indication TEXT,
height NUMERIC,
weight NUMERIC,
bsa NUMERIC,
bp TEXT,
bpsys NUMERIC,
bpdias NUMERIC,
hr NUMERIC,
status TEXT,
test TEXT,
doppler TEXT,
contrast TEXT,
technicalquality TEXT
);
-- 2. Per-stay weight durations (admit + daily + neonate + echo
-- imputed); used for mcg/kg/min vasopressor unit conversion.
DROP TABLE IF EXISTS weight_durations;
CREATE TABLE weight_durations (
icustay_id INTEGER,
starttime TIMESTAMP(0),
endtime TIMESTAMP(0),
weight DOUBLE PRECISION
);
-- 3. Vasopressor dose tables. All four have the same schema; rates
-- are merged CareVue + MetaVision and converted to mcg/kg/min.
DROP TABLE IF EXISTS dobutamine_dose;
CREATE TABLE dobutamine_dose (
icustay_id INTEGER,
starttime TIMESTAMP(0),
endtime TIMESTAMP(0),
vaso_rate DOUBLE PRECISION,
vaso_amount DOUBLE PRECISION
);
DROP TABLE IF EXISTS dopamine_dose;
CREATE TABLE dopamine_dose (
icustay_id INTEGER,
starttime TIMESTAMP(0),
endtime TIMESTAMP(0),
vaso_rate DOUBLE PRECISION,
vaso_amount DOUBLE PRECISION
);
DROP TABLE IF EXISTS epinephrine_dose;
CREATE TABLE epinephrine_dose (
icustay_id INTEGER,
starttime TIMESTAMP(0),
endtime TIMESTAMP(0),
vaso_rate DOUBLE PRECISION,
vaso_amount DOUBLE PRECISION
);
DROP TABLE IF EXISTS norepinephrine_dose;
CREATE TABLE norepinephrine_dose (
icustay_id INTEGER,
starttime TIMESTAMP(0),
endtime TIMESTAMP(0),
vaso_rate DOUBLE PRECISION,
vaso_amount DOUBLE PRECISION
);
-- 4. All-time pivots feeding hourly SOFA.
DROP TABLE IF EXISTS blood_gas_arterial;
CREATE TABLE blood_gas_arterial (
subject_id INTEGER,
hadm_id INTEGER,
icustay_id INTEGER,
charttime TIMESTAMP(0),
specimen VARCHAR(200),
specimen_pred VARCHAR(200),
specimen_prob DOUBLE PRECISION,
so2 DOUBLE PRECISION,
spo2 DOUBLE PRECISION,
po2 DOUBLE PRECISION,
pco2 DOUBLE PRECISION,
fio2_chartevents DOUBLE PRECISION,
fio2 DOUBLE PRECISION,
aado2 DOUBLE PRECISION,
aado2_calc DOUBLE PRECISION,
pao2fio2 DOUBLE PRECISION,
ph DOUBLE PRECISION,
baseexcess DOUBLE PRECISION,
bicarbonate DOUBLE PRECISION,
totalco2 DOUBLE PRECISION,
hematocrit DOUBLE PRECISION,
hemoglobin DOUBLE PRECISION,
carboxyhemoglobin DOUBLE PRECISION,
methemoglobin DOUBLE PRECISION,
chloride DOUBLE PRECISION,
calcium DOUBLE PRECISION,
temperature DOUBLE PRECISION,
potassium DOUBLE PRECISION,
sodium DOUBLE PRECISION,
lactate DOUBLE PRECISION,
glucose DOUBLE PRECISION,
intubated DOUBLE PRECISION,
tidalvolume DOUBLE PRECISION,
ventilationrate DOUBLE PRECISION,
ventilator DOUBLE PRECISION,
peep DOUBLE PRECISION,
o2flow DOUBLE PRECISION,
requiredo2 DOUBLE PRECISION
);
DROP TABLE IF EXISTS gcs_all;
CREATE TABLE gcs_all (
icustay_id INTEGER,
charttime TIMESTAMP(0),
gcs DOUBLE PRECISION,
endotrachflag INTEGER
);
-- 5. Hourly SOFA pipeline. Each measurement class is materialised
-- into a narrow staging table keyed by (icustay_id, hr); these
-- are kept (not dropped) so each stage can be inspected with
-- EXPLAIN ANALYZE.
-- 5a. Hourly grid (one row per ICU hour per stay).
DROP TABLE IF EXISTS sofa_grid;
CREATE TABLE sofa_grid (
subject_id INTEGER,
hadm_id INTEGER,
icustay_id INTEGER,
hr INTEGER,
starttime TIMESTAMP(0),
endtime TIMESTAMP(0)
);
-- 5b. MAP minimum within each hour.
DROP TABLE IF EXISTS sofa_vs;
CREATE TABLE sofa_vs (
icustay_id INTEGER,
hr INTEGER,
meanbp_min DOUBLE PRECISION
);
-- 5c. GCS minimum within each hour (from gcs_all, which already has
-- the carry-forward and ET-trach=15 rules applied).
DROP TABLE IF EXISTS sofa_gcs;
CREATE TABLE sofa_gcs (
icustay_id INTEGER,
hr INTEGER,
gcs_min DOUBLE PRECISION
);
-- 5d. Bilirubin maximum within each hour.
DROP TABLE IF EXISTS sofa_bili;
CREATE TABLE sofa_bili (
icustay_id INTEGER,
hr INTEGER,
bilirubin_max DOUBLE PRECISION
);
-- 5e. Creatinine maximum within each hour.
DROP TABLE IF EXISTS sofa_cr;
CREATE TABLE sofa_cr (
icustay_id INTEGER,
hr INTEGER,
creatinine_max DOUBLE PRECISION
);
-- 5f. Platelet minimum within each hour.
DROP TABLE IF EXISTS sofa_plt;
CREATE TABLE sofa_plt (
icustay_id INTEGER,
hr INTEGER,
platelet_min DOUBLE PRECISION
);
-- 5g. PaO2/FiO2: split into vent / no-vent based on whether an
-- active ventilation episode covered the blood gas.
DROP TABLE IF EXISTS sofa_pf;
CREATE TABLE sofa_pf (
icustay_id INTEGER,
hr INTEGER,
pao2fio2_novent DOUBLE PRECISION,
pao2fio2_vent DOUBLE PRECISION
);
-- 5h. Urine output rolling sum + count of distinct charted hours
-- within the past 24 h.
DROP TABLE IF EXISTS sofa_uo;
CREATE TABLE sofa_uo (
icustay_id INTEGER,
hr INTEGER,
uo_24hr DOUBLE PRECISION,
uo_tm_24hr BIGINT
);
-- 5i. Vasopressor rate snapshot at the hour boundary.
DROP TABLE IF EXISTS sofa_vaso;
CREATE TABLE sofa_vaso (
icustay_id INTEGER,
hr INTEGER,
rate_epinephrine DOUBLE PRECISION,
rate_norepinephrine DOUBLE PRECISION,
rate_dopamine DOUBLE PRECISION,
rate_dobutamine DOUBLE PRECISION
);
-- 5j. Wide assembly: grid LEFT JOINed onto every measurement table.
DROP TABLE IF EXISTS sofa_wide;
CREATE TABLE sofa_wide (
subject_id INTEGER,
hadm_id INTEGER,
icustay_id INTEGER,
hr INTEGER,
starttime TIMESTAMP(0),
endtime TIMESTAMP(0),
meanbp_min DOUBLE PRECISION,
gcs_min DOUBLE PRECISION,
bilirubin_max DOUBLE PRECISION,
creatinine_max DOUBLE PRECISION,
platelet_min DOUBLE PRECISION,
pao2fio2_novent DOUBLE PRECISION,
pao2fio2_vent DOUBLE PRECISION,
uo_24hr DOUBLE PRECISION,
uo_tm_24hr BIGINT,
rate_epinephrine DOUBLE PRECISION,
rate_norepinephrine DOUBLE PRECISION,
rate_dopamine DOUBLE PRECISION,
rate_dobutamine DOUBLE PRECISION
);
-- 5k. Per-hour component scores (no rolling window yet).
DROP TABLE IF EXISTS sofa_components;
CREATE TABLE sofa_components (
subject_id INTEGER,
hadm_id INTEGER,
icustay_id INTEGER,
hr INTEGER,
starttime TIMESTAMP(0),
endtime TIMESTAMP(0),
meanbp_min DOUBLE PRECISION,
gcs_min DOUBLE PRECISION,
bilirubin_max DOUBLE PRECISION,
creatinine_max DOUBLE PRECISION,
platelet_min DOUBLE PRECISION,
pao2fio2_novent DOUBLE PRECISION,
pao2fio2_vent DOUBLE PRECISION,
uo_24hr DOUBLE PRECISION,
uo_tm_24hr BIGINT,
rate_epinephrine DOUBLE PRECISION,
rate_norepinephrine DOUBLE PRECISION,
rate_dopamine DOUBLE PRECISION,
rate_dobutamine DOUBLE PRECISION,
respiration INTEGER,
coagulation INTEGER,
liver INTEGER,
cardiovascular INTEGER,
cns INTEGER,
renal INTEGER
);
-- 5l. Final hourly SOFA: 24-hour rolling MAX per component, summed.
DROP TABLE IF EXISTS sofa_hourly;
CREATE TABLE sofa_hourly (
subject_id INTEGER,
hadm_id INTEGER,
icustay_id INTEGER,
hr INTEGER,
starttime TIMESTAMP(0),
endtime TIMESTAMP(0),
respiration INTEGER,
coagulation INTEGER,
liver INTEGER,
cardiovascular INTEGER,
cns INTEGER,
renal INTEGER,
respiration_24hours INTEGER,
coagulation_24hours INTEGER,
liver_24hours INTEGER,
cardiovascular_24hours INTEGER,
cns_24hours INTEGER,
renal_24hours INTEGER,
sofa_24hours INTEGER
);
-- 6. Suspicion of infection.
DROP TABLE IF EXISTS antibiotic;
CREATE TABLE antibiotic (
subject_id INTEGER,
hadm_id INTEGER,
icustay_id INTEGER,
antibiotic VARCHAR(255),
route VARCHAR(120),
-- MIMIC-III prescriptions has DATE-precision startdate / enddate
-- (stored as TIMESTAMP(0) but always at 00:00:00).
starttime TIMESTAMP(0),
stoptime TIMESTAMP(0)
);
DROP TABLE IF EXISTS suspicion_of_infection;
CREATE TABLE suspicion_of_infection (
subject_id INTEGER,
icustay_id INTEGER,
hadm_id INTEGER,
ab_id BIGINT,
antibiotic VARCHAR(255),
antibiotic_time TIMESTAMP,
suspected_infection INTEGER,
suspected_infection_time TIMESTAMP,
culture_time TIMESTAMP,
specimen VARCHAR(100),
positive_culture INTEGER
);
-- 7. Final Sepsis-3 onset table (one row per ICU stay).
DROP TABLE IF EXISTS sepsis3;
CREATE TABLE sepsis3 (
subject_id INTEGER,
icustay_id INTEGER,
antibiotic_time TIMESTAMP,
culture_time TIMESTAMP,
suspected_infection_time TIMESTAMP,
sofa_time TIMESTAMP(0),
sofa_score INTEGER,
respiration INTEGER,
coagulation INTEGER,
liver INTEGER,
cardiovascular INTEGER,
cns INTEGER,
renal INTEGER,
sepsis3 BOOLEAN
);

215
sql/sepsis/antibiotic.sql Normal file
View File

@@ -0,0 +1,215 @@
-- ------------------------------------------------------------------
-- Title: Antibiotic prescriptions
--
-- Adapted from the MIMIC-IV upstream
-- concepts/medication/antibiotic.sql
-- and ported to MIMIC-III v1.3 vanilla PostgreSQL.
--
-- PORT NOTES:
-- 1. MIMIC-III `prescriptions` has DATE-precision `startdate` /
-- `enddate` instead of MIMIC-IV's TIMESTAMP `starttime` /
-- `stoptime`. We carry these through unchanged; downstream
-- consumers (suspicion_of_infection.sql) treat the antibiotic
-- time as the start-of-day timestamp of `startdate`.
-- 2. MIMIC-III `prescriptions` already has a populated `icustay_id`
-- column on most rows, so we can pick the ICU stay directly
-- rather than recomputing from `intime`/`outtime`. We still
-- back-fill it from `icustays` where missing, the same way
-- MIMIC-IV does.
-- ------------------------------------------------------------------
DROP TABLE IF EXISTS antibiotic;
CREATE TABLE antibiotic AS
WITH abx AS
(
SELECT DISTINCT
drug
, route
, CASE
WHEN LOWER(drug) LIKE '%adoxa%' THEN 1
WHEN LOWER(drug) LIKE '%ala-tet%' THEN 1
WHEN LOWER(drug) LIKE '%alodox%' THEN 1
WHEN LOWER(drug) LIKE '%amikacin%' THEN 1
WHEN LOWER(drug) LIKE '%amikin%' THEN 1
WHEN LOWER(drug) LIKE '%amoxicill%' THEN 1
WHEN LOWER(drug) LIKE '%amphotericin%' THEN 1
WHEN LOWER(drug) LIKE '%anidulafungin%' THEN 1
WHEN LOWER(drug) LIKE '%ancef%' THEN 1
WHEN LOWER(drug) LIKE '%clavulanate%' THEN 1
WHEN LOWER(drug) LIKE '%ampicillin%' THEN 1
WHEN LOWER(drug) LIKE '%augmentin%' THEN 1
WHEN LOWER(drug) LIKE '%avelox%' THEN 1
WHEN LOWER(drug) LIKE '%avidoxy%' THEN 1
WHEN LOWER(drug) LIKE '%azactam%' THEN 1
WHEN LOWER(drug) LIKE '%azithromycin%' THEN 1
WHEN LOWER(drug) LIKE '%aztreonam%' THEN 1
WHEN LOWER(drug) LIKE '%axetil%' THEN 1
WHEN LOWER(drug) LIKE '%bactocill%' THEN 1
WHEN LOWER(drug) LIKE '%bactrim%' THEN 1
WHEN LOWER(drug) LIKE '%bactroban%' THEN 1
WHEN LOWER(drug) LIKE '%bethkis%' THEN 1
WHEN LOWER(drug) LIKE '%biaxin%' THEN 1
WHEN LOWER(drug) LIKE '%bicillin l-a%' THEN 1
WHEN LOWER(drug) LIKE '%cayston%' THEN 1
WHEN LOWER(drug) LIKE '%cefazolin%' THEN 1
WHEN LOWER(drug) LIKE '%cedax%' THEN 1
WHEN LOWER(drug) LIKE '%cefoxitin%' THEN 1
WHEN LOWER(drug) LIKE '%ceftazidime%' THEN 1
WHEN LOWER(drug) LIKE '%cefaclor%' THEN 1
WHEN LOWER(drug) LIKE '%cefadroxil%' THEN 1
WHEN LOWER(drug) LIKE '%cefdinir%' THEN 1
WHEN LOWER(drug) LIKE '%cefditoren%' THEN 1
WHEN LOWER(drug) LIKE '%cefepime%' THEN 1
WHEN LOWER(drug) LIKE '%cefotan%' THEN 1
WHEN LOWER(drug) LIKE '%cefotetan%' THEN 1
WHEN LOWER(drug) LIKE '%cefotaxime%' THEN 1
WHEN LOWER(drug) LIKE '%ceftaroline%' THEN 1
WHEN LOWER(drug) LIKE '%cefpodoxime%' THEN 1
WHEN LOWER(drug) LIKE '%cefpirome%' THEN 1
WHEN LOWER(drug) LIKE '%cefprozil%' THEN 1
WHEN LOWER(drug) LIKE '%ceftibuten%' THEN 1
WHEN LOWER(drug) LIKE '%ceftin%' THEN 1
WHEN LOWER(drug) LIKE '%ceftriaxone%' THEN 1
WHEN LOWER(drug) LIKE '%cefuroxime%' THEN 1
WHEN LOWER(drug) LIKE '%cephalexin%' THEN 1
WHEN LOWER(drug) LIKE '%cephalothin%' THEN 1
WHEN LOWER(drug) LIKE '%cephapririn%' THEN 1
WHEN LOWER(drug) LIKE '%chloramphenicol%' THEN 1
WHEN LOWER(drug) LIKE '%cipro%' THEN 1
WHEN LOWER(drug) LIKE '%ciprofloxacin%' THEN 1
WHEN LOWER(drug) LIKE '%claforan%' THEN 1
WHEN LOWER(drug) LIKE '%clarithromycin%' THEN 1
WHEN LOWER(drug) LIKE '%cleocin%' THEN 1
WHEN LOWER(drug) LIKE '%clindamycin%' THEN 1
WHEN LOWER(drug) LIKE '%cubicin%' THEN 1
WHEN LOWER(drug) LIKE '%dicloxacillin%' THEN 1
WHEN LOWER(drug) LIKE '%dirithromycin%' THEN 1
WHEN LOWER(drug) LIKE '%doryx%' THEN 1
WHEN LOWER(drug) LIKE '%doxycy%' THEN 1
WHEN LOWER(drug) LIKE '%duricef%' THEN 1
WHEN LOWER(drug) LIKE '%dynacin%' THEN 1
WHEN LOWER(drug) LIKE '%ery-tab%' THEN 1
WHEN LOWER(drug) LIKE '%eryped%' THEN 1
WHEN LOWER(drug) LIKE '%eryc%' THEN 1
WHEN LOWER(drug) LIKE '%erythrocin%' THEN 1
WHEN LOWER(drug) LIKE '%erythromycin%' THEN 1
WHEN LOWER(drug) LIKE '%factive%' THEN 1
WHEN LOWER(drug) LIKE '%flagyl%' THEN 1
WHEN LOWER(drug) LIKE '%fortaz%' THEN 1
WHEN LOWER(drug) LIKE '%furadantin%' THEN 1
WHEN LOWER(drug) LIKE '%garamycin%' THEN 1
WHEN LOWER(drug) LIKE '%gentamicin%' THEN 1
WHEN LOWER(drug) LIKE '%kanamycin%' THEN 1
WHEN LOWER(drug) LIKE '%keflex%' THEN 1
WHEN LOWER(drug) LIKE '%kefzol%' THEN 1
WHEN LOWER(drug) LIKE '%ketek%' THEN 1
WHEN LOWER(drug) LIKE '%levaquin%' THEN 1
WHEN LOWER(drug) LIKE '%levofloxacin%' THEN 1
WHEN LOWER(drug) LIKE '%lincocin%' THEN 1
WHEN LOWER(drug) LIKE '%linezolid%' THEN 1
WHEN LOWER(drug) LIKE '%macrobid%' THEN 1
WHEN LOWER(drug) LIKE '%macrodantin%' THEN 1
WHEN LOWER(drug) LIKE '%maxipime%' THEN 1
WHEN LOWER(drug) LIKE '%mefoxin%' THEN 1
WHEN LOWER(drug) LIKE '%metronidazole%' THEN 1
WHEN LOWER(drug) LIKE '%meropenem%' THEN 1
WHEN LOWER(drug) LIKE '%methicillin%' THEN 1
WHEN LOWER(drug) LIKE '%minocin%' THEN 1
WHEN LOWER(drug) LIKE '%minocycline%' THEN 1
WHEN LOWER(drug) LIKE '%monodox%' THEN 1
WHEN LOWER(drug) LIKE '%monurol%' THEN 1
WHEN LOWER(drug) LIKE '%morgidox%' THEN 1
WHEN LOWER(drug) LIKE '%moxatag%' THEN 1
WHEN LOWER(drug) LIKE '%moxifloxacin%' THEN 1
WHEN LOWER(drug) LIKE '%mupirocin%' THEN 1
WHEN LOWER(drug) LIKE '%myrac%' THEN 1
WHEN LOWER(drug) LIKE '%nafcillin%' THEN 1
WHEN LOWER(drug) LIKE '%neomycin%' THEN 1
WHEN LOWER(drug) LIKE '%nicazel doxy 30%' THEN 1
WHEN LOWER(drug) LIKE '%nitrofurantoin%' THEN 1
WHEN LOWER(drug) LIKE '%norfloxacin%' THEN 1
WHEN LOWER(drug) LIKE '%noroxin%' THEN 1
WHEN LOWER(drug) LIKE '%ocudox%' THEN 1
WHEN LOWER(drug) LIKE '%ofloxacin%' THEN 1
WHEN LOWER(drug) LIKE '%omnicef%' THEN 1
WHEN LOWER(drug) LIKE '%oracea%' THEN 1
WHEN LOWER(drug) LIKE '%oraxyl%' THEN 1
WHEN LOWER(drug) LIKE '%oxacillin%' THEN 1
WHEN LOWER(drug) LIKE '%pc pen vk%' THEN 1
WHEN LOWER(drug) LIKE '%pce dispertab%' THEN 1
WHEN LOWER(drug) LIKE '%panixine%' THEN 1
WHEN LOWER(drug) LIKE '%pediazole%' THEN 1
WHEN LOWER(drug) LIKE '%penicillin%' THEN 1
WHEN LOWER(drug) LIKE '%periostat%' THEN 1
WHEN LOWER(drug) LIKE '%pfizerpen%' THEN 1
WHEN LOWER(drug) LIKE '%piperacillin%' THEN 1
WHEN LOWER(drug) LIKE '%tazobactam%' THEN 1
WHEN LOWER(drug) LIKE '%primsol%' THEN 1
WHEN LOWER(drug) LIKE '%proquin%' THEN 1
WHEN LOWER(drug) LIKE '%raniclor%' THEN 1
WHEN LOWER(drug) LIKE '%rifadin%' THEN 1
WHEN LOWER(drug) LIKE '%rifampin%' THEN 1
WHEN LOWER(drug) LIKE '%rocephin%' THEN 1
WHEN LOWER(drug) LIKE '%smz-tmp%' THEN 1
WHEN LOWER(drug) LIKE '%septra%' THEN 1
WHEN LOWER(drug) LIKE '%septra ds%' THEN 1
WHEN LOWER(drug) LIKE '%solodyn%' THEN 1
WHEN LOWER(drug) LIKE '%spectracef%' THEN 1
WHEN LOWER(drug) LIKE '%streptomycin%' THEN 1
WHEN LOWER(drug) LIKE '%sulfadiazine%' THEN 1
WHEN LOWER(drug) LIKE '%sulfamethoxazole%' THEN 1
WHEN LOWER(drug) LIKE '%trimethoprim%' THEN 1
WHEN LOWER(drug) LIKE '%sulfatrim%' THEN 1
WHEN LOWER(drug) LIKE '%sulfisoxazole%' THEN 1
WHEN LOWER(drug) LIKE '%suprax%' THEN 1
WHEN LOWER(drug) LIKE '%synercid%' THEN 1
WHEN LOWER(drug) LIKE '%tazicef%' THEN 1
WHEN LOWER(drug) LIKE '%tetracycline%' THEN 1
WHEN LOWER(drug) LIKE '%timentin%' THEN 1
WHEN LOWER(drug) LIKE '%tobramycin%' THEN 1
WHEN LOWER(drug) LIKE '%unasyn%' THEN 1
WHEN LOWER(drug) LIKE '%vancocin%' THEN 1
WHEN LOWER(drug) LIKE '%vancomycin%' THEN 1
WHEN LOWER(drug) LIKE '%vantin%' THEN 1
WHEN LOWER(drug) LIKE '%vibativ%' THEN 1
WHEN LOWER(drug) LIKE '%vibra-tabs%' THEN 1
WHEN LOWER(drug) LIKE '%vibramycin%' THEN 1
WHEN LOWER(drug) LIKE '%zinacef%' THEN 1
WHEN LOWER(drug) LIKE '%zithromax%' THEN 1
WHEN LOWER(drug) LIKE '%zosyn%' THEN 1
WHEN LOWER(drug) LIKE '%zyvox%' THEN 1
ELSE 0
END AS antibiotic
FROM prescriptions
WHERE drug_type NOT IN ('BASE')
-- match upstream: NULL routes are excluded by the NOT IN.
AND route NOT IN ('OU','OS','OD','AU','AS','AD','TP')
AND LOWER(route) NOT LIKE '%ear%'
AND LOWER(route) NOT LIKE '%eye%'
AND LOWER(drug) NOT LIKE '%cream%'
AND LOWER(drug) NOT LIKE '%desensitization%'
AND LOWER(drug) NOT LIKE '%ophth oint%'
AND LOWER(drug) NOT LIKE '%gel%'
)
SELECT pr.subject_id
, pr.hadm_id
, COALESCE(pr.icustay_id, ie.icustay_id) AS icustay_id
, pr.drug AS antibiotic
, pr.route
-- DATE-precision in MIMIC-III; downstream treats this as the
-- start-of-day timestamp.
, pr.startdate AS starttime
, pr.enddate AS stoptime
FROM prescriptions pr
INNER JOIN abx
ON pr.drug = abx.drug
AND pr.route = abx.route
LEFT JOIN icustays ie
ON pr.hadm_id = ie.hadm_id
AND pr.startdate >= CAST(ie.intime AS DATE)
AND pr.startdate <= CAST(ie.outtime AS DATE)
WHERE abx.antibiotic = 1;
CREATE INDEX IF NOT EXISTS antibiotic_idx
ON antibiotic (subject_id, hadm_id, starttime);

View File

@@ -0,0 +1,230 @@
-- ------------------------------------------------------------------
-- All-time arterial blood-gas pivot (PaO2 / FiO2 ratio at every gas).
--
-- This script is a fusion of the upstream MIMIC-III concepts_postgres
-- files
-- firstday/blood_gas_first_day.sql
-- firstday/blood_gas_first_day_arterial.sql
-- with their day-1 time predicate removed, so we get one row per
-- (icustay_id, charttime) for the entire ICU stay.
--
-- Output table: blood_gas_arterial
-- Output cols : subject_id, hadm_id, icustay_id, charttime,
-- specimen, specimen_pred, specimen_prob,
-- so2, spo2, po2, pco2, fio2_chartevents, fio2,
-- aado2, aado2_calc, pao2fio2, ph, baseexcess,
-- bicarbonate, totalco2, hematocrit, hemoglobin,
-- carboxyhemoglobin, methemoglobin, chloride, calcium,
-- temperature, potassium, sodium, lactate, glucose,
-- intubated, tidalvolume, ventilationrate, ventilator,
-- peep, o2flow, requiredo2
--
-- Restricted to *arterial* samples (specimen = 'ART' or
-- specimen_prob > 0.75).
-- ------------------------------------------------------------------
DROP TABLE IF EXISTS blood_gas_arterial;
CREATE TABLE blood_gas_arterial AS
WITH bg_pvt AS
(
SELECT ie.subject_id, ie.hadm_id, ie.icustay_id
, CASE
WHEN itemid = 50800 THEN 'SPECIMEN'
WHEN itemid = 50801 THEN 'AADO2'
WHEN itemid = 50802 THEN 'BASEEXCESS'
WHEN itemid = 50803 THEN 'BICARBONATE'
WHEN itemid = 50804 THEN 'TOTALCO2'
WHEN itemid = 50805 THEN 'CARBOXYHEMOGLOBIN'
WHEN itemid = 50806 THEN 'CHLORIDE'
WHEN itemid = 50808 THEN 'CALCIUM'
WHEN itemid = 50809 THEN 'GLUCOSE'
WHEN itemid = 50810 THEN 'HEMATOCRIT'
WHEN itemid = 50811 THEN 'HEMOGLOBIN'
WHEN itemid = 50812 THEN 'INTUBATED'
WHEN itemid = 50813 THEN 'LACTATE'
WHEN itemid = 50814 THEN 'METHEMOGLOBIN'
WHEN itemid = 50815 THEN 'O2FLOW'
WHEN itemid = 50816 THEN 'FIO2'
WHEN itemid = 50817 THEN 'SO2'
WHEN itemid = 50818 THEN 'PCO2'
WHEN itemid = 50819 THEN 'PEEP'
WHEN itemid = 50820 THEN 'PH'
WHEN itemid = 50821 THEN 'PO2'
WHEN itemid = 50822 THEN 'POTASSIUM'
WHEN itemid = 50823 THEN 'REQUIREDO2'
WHEN itemid = 50824 THEN 'SODIUM'
WHEN itemid = 50825 THEN 'TEMPERATURE'
WHEN itemid = 50826 THEN 'TIDALVOLUME'
WHEN itemid = 50827 THEN 'VENTILATIONRATE'
WHEN itemid = 50828 THEN 'VENTILATOR'
ELSE NULL
END AS label
, le.charttime
, le.value
, CASE
WHEN valuenum <= 0 AND itemid != 50802 THEN NULL
WHEN itemid = 50810 AND valuenum > 100 THEN NULL
WHEN itemid = 50816 AND valuenum < 20 THEN NULL
WHEN itemid = 50816 AND valuenum > 100 THEN NULL
WHEN itemid = 50817 AND valuenum > 100 THEN NULL
WHEN itemid = 50815 AND valuenum > 70 THEN NULL
WHEN itemid = 50821 AND valuenum > 800 THEN NULL
ELSE valuenum
END AS valuenum
FROM icustays ie
INNER JOIN labevents le
ON le.subject_id = ie.subject_id
AND le.hadm_id = ie.hadm_id
AND le.charttime BETWEEN ie.intime AND ie.outtime
AND le.itemid IN (
50800,50801,50802,50803,50804,50805,50806,50807,50808,50809
, 50810,50811,50812,50813,50814,50815,50816,50817,50818,50819
, 50820,50821,50822,50823,50824,50825,50826,50827,50828
, 51545
)
)
, bg AS
(
SELECT subject_id, hadm_id, icustay_id, charttime
, MAX(CASE WHEN label = 'SPECIMEN' THEN value END) AS specimen
, MAX(CASE WHEN label = 'AADO2' THEN valuenum END) AS aado2
, MAX(CASE WHEN label = 'BASEEXCESS' THEN valuenum END) AS baseexcess
, MAX(CASE WHEN label = 'BICARBONATE' THEN valuenum END) AS bicarbonate
, MAX(CASE WHEN label = 'TOTALCO2' THEN valuenum END) AS totalco2
, MAX(CASE WHEN label = 'CARBOXYHEMOGLOBIN' THEN valuenum END) AS carboxyhemoglobin
, MAX(CASE WHEN label = 'CHLORIDE' THEN valuenum END) AS chloride
, MAX(CASE WHEN label = 'CALCIUM' THEN valuenum END) AS calcium
, MAX(CASE WHEN label = 'GLUCOSE' THEN valuenum END) AS glucose
, MAX(CASE WHEN label = 'HEMATOCRIT' THEN valuenum END) AS hematocrit
, MAX(CASE WHEN label = 'HEMOGLOBIN' THEN valuenum END) AS hemoglobin
, MAX(CASE WHEN label = 'INTUBATED' THEN valuenum END) AS intubated
, MAX(CASE WHEN label = 'LACTATE' THEN valuenum END) AS lactate
, MAX(CASE WHEN label = 'METHEMOGLOBIN' THEN valuenum END) AS methemoglobin
, MAX(CASE WHEN label = 'O2FLOW' THEN valuenum END) AS o2flow
, MAX(CASE WHEN label = 'FIO2' THEN valuenum END) AS fio2
, MAX(CASE WHEN label = 'SO2' THEN valuenum END) AS so2
, MAX(CASE WHEN label = 'PCO2' THEN valuenum END) AS pco2
, MAX(CASE WHEN label = 'PEEP' THEN valuenum END) AS peep
, MAX(CASE WHEN label = 'PH' THEN valuenum END) AS ph
, MAX(CASE WHEN label = 'PO2' THEN valuenum END) AS po2
, MAX(CASE WHEN label = 'POTASSIUM' THEN valuenum END) AS potassium
, MAX(CASE WHEN label = 'REQUIREDO2' THEN valuenum END) AS requiredo2
, MAX(CASE WHEN label = 'SODIUM' THEN valuenum END) AS sodium
, MAX(CASE WHEN label = 'TEMPERATURE' THEN valuenum END) AS temperature
, MAX(CASE WHEN label = 'TIDALVOLUME' THEN valuenum END) AS tidalvolume
, MAX(CASE WHEN label = 'VENTILATIONRATE' THEN valuenum END) AS ventilationrate
, MAX(CASE WHEN label = 'VENTILATOR' THEN valuenum END) AS ventilator
FROM bg_pvt
GROUP BY subject_id, hadm_id, icustay_id, charttime
)
, stg_spo2 AS
(
SELECT subject_id, hadm_id, icustay_id, charttime
, MAX(CASE WHEN valuenum <= 0 OR valuenum > 100 THEN NULL ELSE valuenum END) AS spo2
FROM chartevents
WHERE itemid IN (646, 220277)
GROUP BY subject_id, hadm_id, icustay_id, charttime
)
, stg_fio2 AS
(
SELECT subject_id, hadm_id, icustay_id, charttime
, MAX(
CASE
WHEN itemid = 223835 THEN
CASE
WHEN valuenum > 0 AND valuenum <= 1 THEN valuenum * 100
WHEN valuenum > 1 AND valuenum < 21 THEN NULL
WHEN valuenum >= 21 AND valuenum <= 100 THEN valuenum
ELSE NULL
END
WHEN itemid IN (3420, 3422) THEN valuenum
WHEN itemid = 190 AND valuenum > 0.20 AND valuenum < 1
THEN valuenum * 100
ELSE NULL
END
) AS fio2_chartevents
FROM chartevents
WHERE itemid IN (3420, 190, 223835, 3422)
AND COALESCE(error, 0) = 0
GROUP BY subject_id, hadm_id, icustay_id, charttime
)
, stg2 AS
(
SELECT bg.*
, ROW_NUMBER() OVER (
PARTITION BY bg.icustay_id, bg.charttime
ORDER BY s1.charttime DESC
) AS lastrowspo2
, s1.spo2
FROM bg
LEFT JOIN stg_spo2 s1
ON bg.icustay_id = s1.icustay_id
AND s1.charttime >= DATETIME_SUB(bg.charttime, INTERVAL '2' HOUR)
AND s1.charttime <= bg.charttime
WHERE bg.po2 IS NOT NULL
)
, stg3 AS
(
SELECT stg2.*
, ROW_NUMBER() OVER (
PARTITION BY stg2.icustay_id, stg2.charttime
ORDER BY s2.charttime DESC
) AS lastrowfio2
, s2.fio2_chartevents
, 1 / (1 + EXP(-(-0.02544
+ 0.04598 * po2
+ COALESCE(-0.15356 * spo2 , -0.15356 * 97.49420 + 0.13429)
+ COALESCE( 0.00621 * s2.fio2_chartevents, 0.00621 * 51.49550 + -0.24958)
+ COALESCE( 0.10559 * hemoglobin , 0.10559 * 10.32307 + 0.05954)
+ COALESCE( 0.13251 * so2 , 0.13251 * 93.66539 + -0.23172)
+ COALESCE(-0.01511 * pco2 , -0.01511 * 42.08866 + -0.01630)
+ COALESCE( 0.01480 * fio2 , 0.01480 * 63.97836 + -0.31142)
+ COALESCE(-0.00200 * aado2 , -0.00200 * 442.21186 + -0.01328)
+ COALESCE(-0.03220 * bicarbonate , -0.03220 * 22.96894 + -0.06535)
+ COALESCE( 0.05384 * totalco2 , 0.05384 * 24.72632 + -0.01405)
+ COALESCE( 0.08202 * lactate , 0.08202 * 3.06436 + 0.06038)
+ COALESCE( 0.10956 * ph , 0.10956 * 7.36233 + -0.00617)
+ COALESCE( 0.00848 * o2flow , 0.00848 * 7.59362 + -0.35803)
))) AS specimen_prob
FROM stg2
LEFT JOIN stg_fio2 s2
ON stg2.icustay_id = s2.icustay_id
AND s2.charttime BETWEEN DATETIME_SUB(stg2.charttime, INTERVAL '4' HOUR)
AND stg2.charttime
WHERE stg2.lastrowspo2 = 1
)
SELECT subject_id, hadm_id, icustay_id, charttime
, specimen
, CASE
WHEN specimen IS NOT NULL THEN specimen
WHEN specimen_prob > 0.75 THEN 'ART'
ELSE NULL
END AS specimen_pred
, specimen_prob
, so2, spo2, po2, pco2
, fio2_chartevents, fio2
, aado2
, CASE
WHEN po2 IS NOT NULL
AND pco2 IS NOT NULL
AND COALESCE(fio2, fio2_chartevents) IS NOT NULL
THEN (COALESCE(fio2, fio2_chartevents) / 100) * (760 - 47) - (pco2 / 0.8) - po2
ELSE NULL
END AS aado2_calc
, CASE
WHEN po2 IS NOT NULL AND COALESCE(fio2, fio2_chartevents) IS NOT NULL
THEN 100 * po2 / COALESCE(fio2, fio2_chartevents)
ELSE NULL
END AS pao2fio2
, ph, baseexcess, bicarbonate, totalco2
, hematocrit, hemoglobin, carboxyhemoglobin, methemoglobin
, chloride, calcium, temperature, potassium, sodium, lactate, glucose
, intubated, tidalvolume, ventilationrate, ventilator
, peep, o2flow, requiredo2
FROM stg3
WHERE lastrowfio2 = 1
AND (specimen = 'ART' OR specimen_prob > 0.75);
CREATE INDEX IF NOT EXISTS blood_gas_arterial_idx
ON blood_gas_arterial (icustay_id, charttime);

78
sql/sepsis/gcs_all.sql Normal file
View File

@@ -0,0 +1,78 @@
-- ------------------------------------------------------------------
-- All-time GCS pivot.
--
-- Adapted from the upstream MIMIC-III concepts_postgres file
-- firstday/gcs_first_day.sql
-- with the day-1 time predicate removed and the row reduced to one
-- row per (icustay_id, charttime, gcs) for the entire ICU stay.
--
-- The carry-forward logic (impute missing components from the
-- immediately preceding charttime within 6 h) is preserved. GCS
-- during sedation/intubation is set to 15, matching upstream.
-- ------------------------------------------------------------------
DROP TABLE IF EXISTS gcs_all;
CREATE TABLE gcs_all AS
WITH base AS
(
SELECT pvt.icustay_id
, pvt.charttime
, MAX(CASE WHEN pvt.itemid = 454 THEN pvt.valuenum END) AS gcsmotor
, MAX(CASE WHEN pvt.itemid = 723 THEN pvt.valuenum END) AS gcsverbal
, MAX(CASE WHEN pvt.itemid = 184 THEN pvt.valuenum END) AS gcseyes
, CASE
WHEN MAX(CASE WHEN pvt.itemid = 723 THEN pvt.valuenum END) = 0
THEN 1 ELSE 0
END AS endotrachflag
, ROW_NUMBER() OVER (PARTITION BY pvt.icustay_id ORDER BY pvt.charttime ASC) AS rn
FROM (
SELECT l.icustay_id
, CASE
WHEN l.itemid IN (723, 223900) THEN 723
WHEN l.itemid IN (454, 223901) THEN 454
WHEN l.itemid IN (184, 220739) THEN 184
ELSE l.itemid
END AS itemid
, CASE
WHEN l.itemid = 723 AND l.value = '1.0 ET/Trach' THEN 0
WHEN l.itemid = 223900 AND l.value = 'No Response-ETT' THEN 0
ELSE l.valuenum
END AS valuenum
, l.charttime
FROM chartevents l
INNER JOIN icustays b
ON l.icustay_id = b.icustay_id
WHERE l.itemid IN (184, 454, 723, 223900, 223901, 220739)
AND l.charttime BETWEEN b.intime AND b.outtime
AND COALESCE(l.error, 0) = 0
) pvt
GROUP BY pvt.icustay_id, pvt.charttime
)
, gcs AS
(
SELECT b.icustay_id
, b.charttime
, CASE
WHEN b.gcsverbal = 0 THEN 15
WHEN b.gcsverbal IS NULL AND b2.gcsverbal = 0 THEN 15
WHEN b2.gcsverbal = 0 THEN
COALESCE(b.gcsmotor , 6)
+ COALESCE(b.gcsverbal, 5)
+ COALESCE(b.gcseyes , 4)
ELSE
COALESCE(b.gcsmotor , COALESCE(b2.gcsmotor , 6))
+ COALESCE(b.gcsverbal, COALESCE(b2.gcsverbal, 5))
+ COALESCE(b.gcseyes , COALESCE(b2.gcseyes , 4))
END AS gcs
, b.endotrachflag
FROM base b
LEFT JOIN base b2
ON b.icustay_id = b2.icustay_id
AND b.rn = b2.rn + 1
AND b2.charttime > DATETIME_SUB(b.charttime, INTERVAL '6' HOUR)
)
SELECT icustay_id, charttime, gcs, endotrachflag
FROM gcs;
CREATE INDEX IF NOT EXISTS gcs_all_idx ON gcs_all (icustay_id, charttime);

View File

@@ -0,0 +1,265 @@
-- ------------------------------------------------------------------
-- Mortality verification for Sepsis-3 in MIMIC-III v1.3.
--
-- Usage:
-- psql -d mimic -v ON_ERROR_STOP=1 \
-- -c 'SET search_path TO mimiciii, public;' \
-- -f sql/sepsis/mortality_checks.sql
--
-- Purpose:
-- `sanity_checks.sql` reported a 14.6% in-hospital mortality among
-- Sepsis-3 = TRUE patients, well below the 25-35% range in the
-- literature. The hypothesis was that the broad cohort (neonates,
-- re-admissions, short stays included) drags the number down.
--
-- This script walks an exclusion funnel and shows mortality at
-- each step so you can confirm. It also shows 30-day mortality,
-- stratification by age band, and a direct comparison with the
-- numbers published in:
--
-- Johnson AEW et al., Crit Care Med 2018.
-- "A Comparative Analysis of Sepsis Identification Methods
-- in an Electronic Database."
-- Reported on MIMIC-III v1.4 with adult, first-ICU-stay,
-- LOS >= 4 h cohort:
-- n = 21 927 sepsis-3 stays
-- in-hospital mortality = 21.0%
-- 30-day mortality = 25.4%
--
-- All ages use a clamp at 91 (MIMIC-III shifts DOB by 300 y for
-- patients > 89; we treat them as 91 for stratification).
-- ------------------------------------------------------------------
\set ON_ERROR_STOP on
\timing on
-- Build a working cohort table once with everything we need
DROP TABLE IF EXISTS sepsis3_cohort_check;
CREATE TEMP TABLE sepsis3_cohort_check AS
SELECT ie.subject_id
, ie.hadm_id
, ie.icustay_id
, ie.intime
, ie.outtime
, ie.first_careunit
, EXTRACT(EPOCH FROM (ie.outtime - ie.intime)) / 3600.0 AS los_hours
, LEAST(
91.0,
EXTRACT(EPOCH FROM (ie.intime - pat.dob))
/ (365.242 * 86400.0)
) AS age_at_intime
, ROW_NUMBER() OVER (
PARTITION BY ie.subject_id
ORDER BY ie.intime
) AS icustay_seq
, adm.hospital_expire_flag AS died_in_hospital
, (pat.dod IS NOT NULL
AND pat.dod <= ie.intime + INTERVAL '30 days')::int AS died_within_30d
, COALESCE(s.sepsis3, FALSE) AS sepsis3
FROM icustays ie
JOIN admissions adm ON adm.hadm_id = ie.hadm_id
JOIN patients pat ON pat.subject_id = ie.subject_id
LEFT JOIN sepsis3 s ON s.icustay_id = ie.icustay_id;
CREATE INDEX ON sepsis3_cohort_check (icustay_id);
ANALYZE sepsis3_cohort_check;
\echo
\echo '=================================================================='
\echo ' 1. Cohort exclusion funnel (incremental filtering)'
\echo '=================================================================='
\echo "Each row applies an additional restriction. The 'sepsis3' columns"
\echo "report stats among rows where sepsis3 = TRUE within that cohort."
\echo
\echo "EXPECTED progression: as we narrow to the canonical adult/first-stay/"
\echo "LOS >= 24h cohort, in-hospital mortality among Sepsis-3 should rise"
\echo "from ~14% toward ~25-30%."
\echo
WITH levels AS (
-- 0. Everyone
SELECT 0 AS lvl, '0. all icustays' AS step
, c.* FROM sepsis3_cohort_check c
UNION ALL
-- 1. Exclude neonatal ICU
SELECT 1, '1. + exclude NICU'
, c.* FROM sepsis3_cohort_check c
WHERE c.first_careunit != 'NICU'
UNION ALL
-- 2. Adult (age >= 18) on top of (1)
SELECT 2, '2. + age >= 18'
, c.* FROM sepsis3_cohort_check c
WHERE c.first_careunit != 'NICU'
AND c.age_at_intime >= 18
UNION ALL
-- 3. First ICU stay only on top of (2)
SELECT 3, '3. + first ICU stay only'
, c.* FROM sepsis3_cohort_check c
WHERE c.first_careunit != 'NICU'
AND c.age_at_intime >= 18
AND c.icustay_seq = 1
UNION ALL
-- 4. LOS >= 24h on top of (3) -- the canonical Seymour cohort
SELECT 4, '4. + LOS >= 24 h (canonical)'
, c.* FROM sepsis3_cohort_check c
WHERE c.first_careunit != 'NICU'
AND c.age_at_intime >= 18
AND c.icustay_seq = 1
AND c.los_hours >= 24
)
SELECT lvl
, step
, count(*) AS n_total
, sum(CASE WHEN sepsis3 THEN 1 ELSE 0 END) AS n_sepsis3
, round(100.0 * sum(CASE WHEN sepsis3 THEN 1 ELSE 0 END)
/ count(*), 1) AS pct_sepsis3
-- in-hospital mortality among Sepsis-3 = TRUE
, round(100.0 * sum(CASE WHEN sepsis3 AND died_in_hospital = 1
THEN 1 ELSE 0 END)
/ NULLIF(sum(CASE WHEN sepsis3 THEN 1 ELSE 0 END), 0), 1)
AS sep_inhosp_pct
-- 30-day mortality among Sepsis-3 = TRUE
, round(100.0 * sum(CASE WHEN sepsis3 AND died_within_30d = 1
THEN 1 ELSE 0 END)
/ NULLIF(sum(CASE WHEN sepsis3 THEN 1 ELSE 0 END), 0), 1)
AS sep_30d_pct
-- in-hospital mortality among NOT Sepsis-3
, round(100.0 * sum(CASE WHEN NOT sepsis3 AND died_in_hospital = 1
THEN 1 ELSE 0 END)
/ NULLIF(sum(CASE WHEN NOT sepsis3 THEN 1 ELSE 0 END), 0), 1)
AS nonsep_inhosp_pct
FROM levels
GROUP BY lvl, step
ORDER BY lvl;
\echo
\echo '=================================================================='
\echo ' 2. Mortality stratified by age band, canonical cohort only'
\echo '=================================================================='
\echo "EXPECTED:"
\echo " Mortality among Sepsis-3 = TRUE rises monotonically with age."
\echo " Adults < 30: ~10-15%"
\echo " 30 - 50: ~15-20%"
\echo " 50 - 70: ~20-25%"
\echo " 70 - 90+: ~30-40%"
\echo
SELECT CASE
WHEN age_at_intime < 30 THEN '1. <30'
WHEN age_at_intime < 50 THEN '2. 30-49'
WHEN age_at_intime < 70 THEN '3. 50-69'
WHEN age_at_intime < 90 THEN '4. 70-89'
ELSE '5. 90+'
END AS age_band
, count(*) AS n_sepsis3
, round(100.0 * sum(died_in_hospital) / count(*), 1) AS pct_inhosp
, round(100.0 * sum(died_within_30d) / count(*), 1) AS pct_30d
FROM sepsis3_cohort_check
WHERE sepsis3
AND first_careunit != 'NICU'
AND age_at_intime >= 18
AND icustay_seq = 1
AND los_hours >= 24
GROUP BY 1
ORDER BY 1;
\echo
\echo '=================================================================='
\echo ' 3. Mortality stratified by max-SOFA day-1 (canonical cohort)'
\echo '=================================================================='
\echo "EXPECTED:"
\echo " Mortality should rise monotonically with SOFA. This is the"
\echo " classic dose-response curve of organ dysfunction vs death,"
\echo " and is the strongest semantic check that the SOFA pipeline"
\echo " itself is computing the right thing."
\echo " SOFA 0-3: ~5-10%"
\echo " SOFA 4-7: ~15-25%"
\echo " SOFA 8-11: ~30-40%"
\echo " SOFA 12+: ~50-65%"
\echo
WITH d1 AS (
SELECT icustay_id, max(sofa_24hours) AS d1_sofa
FROM sofa_hourly
WHERE hr <= 24
GROUP BY icustay_id
)
SELECT CASE
WHEN d1_sofa <= 3 THEN '1. 0-3'
WHEN d1_sofa <= 7 THEN '2. 4-7'
WHEN d1_sofa <= 11 THEN '3. 8-11'
ELSE '4. 12+'
END AS sofa_band
, count(*) AS n
, round(100.0 * sum(died_in_hospital) / count(*), 1) AS pct_inhosp
, round(100.0 * sum(died_within_30d) / count(*), 1) AS pct_30d
FROM sepsis3_cohort_check c
JOIN d1 USING (icustay_id)
WHERE first_careunit != 'NICU'
AND age_at_intime >= 18
AND icustay_seq = 1
AND los_hours >= 24
GROUP BY 1
ORDER BY 1;
\echo
\echo '=================================================================='
\echo ' 4. Direct comparison with Johnson 2018 (canonical cohort)'
\echo '=================================================================='
\echo "Johnson 2018 published numbers for Sepsis-3 on MIMIC-III v1.4"
\echo "with the cohort: adult, first ICU stay only, LOS >= 4 h."
\echo "They reported:"
\echo " n_sepsis3 = 21 927"
\echo " in-hospital pct_died = 21.0%"
\echo " 30-day pct_died = 25.4%"
\echo
\echo "We use LOS >= 24 h here (the more common Seymour 2016 cutoff),"
\echo "so our n will be a bit smaller and our mortality slightly"
\echo "higher than Johnson's."
\echo
SELECT count(*) AS n_sepsis3
, round(100.0 * sum(died_in_hospital) / count(*), 1) AS pct_inhosp
, round(100.0 * sum(died_within_30d) / count(*), 1) AS pct_30d
FROM sepsis3_cohort_check
WHERE sepsis3
AND first_careunit != 'NICU'
AND age_at_intime >= 18
AND icustay_seq = 1
AND los_hours >= 24;
\echo
\echo '=================================================================='
\echo ' 5. Sanity: where did the missing mortality "go"?'
\echo '=================================================================='
\echo "Decompose the gap between the broad-cohort 14.6% and the"
\echo "canonical-cohort number from section 4. This shows how much"
\echo "of the gap is explained by each filter individually."
\echo
WITH s3 AS (
SELECT * FROM sepsis3_cohort_check WHERE sepsis3
)
SELECT 'all sepsis3' AS slice
, count(*) AS n
, round(100.0 * sum(died_in_hospital)/count(*), 1) AS pct_inhosp
FROM s3
UNION ALL SELECT 'NICU only',
count(*), round(100.0 * sum(died_in_hospital)/count(*), 1)
FROM s3 WHERE first_careunit = 'NICU'
UNION ALL SELECT 'age < 18 only',
count(*), round(100.0 * sum(died_in_hospital)/count(*), 1)
FROM s3 WHERE age_at_intime < 18
UNION ALL SELECT 're-admission only',
count(*), round(100.0 * sum(died_in_hospital)/count(*), 1)
FROM s3 WHERE icustay_seq > 1
UNION ALL SELECT 'LOS < 24h only',
count(*), round(100.0 * sum(died_in_hospital)/count(*), 1)
FROM s3 WHERE los_hours < 24;
\echo
\echo 'Done. Compare the section-4 result to Johnson 2018 (~21% in-hospital,'
\echo '~25% 30-day) for the headline check.'

View File

@@ -0,0 +1,393 @@
-- ------------------------------------------------------------------
-- Sepsis-3 sanity checks for MIMIC-III v1.3.
--
-- Usage:
-- psql -d mimic -v ON_ERROR_STOP=1 \
-- -c 'SET search_path TO mimiciii, public;' \
-- -f sql/sepsis/sanity_checks.sql
--
-- Each section prints a short result set. Compare against the
-- "EXPECTED" comment. None of these are pass/fail tests; they are
-- bounds-style checks designed to catch obvious upstream breakage
-- (an empty staging table, an off-by-one in the hourly grid, a
-- vasopressor unit-conversion error, etc.).
--
-- Reference numbers come from:
-- Seymour CW et al., JAMA 2016 (the Sepsis-3 paper)
-- Johnson AEW et al., Crit Care Med 2018 ("A Comparative Analysis
-- of Sepsis Identification Methods in an Electronic Database",
-- which reproduces Sepsis-3 on MIMIC-III)
-- ------------------------------------------------------------------
\set ON_ERROR_STOP on
\timing on
\echo
\echo '=================================================================='
\echo ' 1. Row counts of every table in the pipeline'
\echo '=================================================================='
\echo "EXPECTED (MIMIC-III v1.3 full restore, all 61.5k ICU stays):"
\echo " icustays ~ 61 532"
\echo " sofa_grid ~ 6 - 8 M (60k stays * ~4d mean LOS * 24h)"
\echo " sofa_hourly same as sofa_grid"
\echo " blood_gas_arterial ~ 500 k - 1 M"
\echo " gcs_all ~ 4 - 6 M"
\echo " antibiotic ~ 500 k - 700 k prescription rows"
\echo " suspicion_of_infection same as antibiotic"
\echo " sepsis3 ~ 20 k - 30 k rows (one row per ICU stay"
\echo " that ever had any abx + qualifying SOFA)"
\echo
SELECT 'icustays' AS table_name, count(*) AS n FROM icustays
UNION ALL SELECT 'sofa_grid', count(*) FROM sofa_grid
UNION ALL SELECT 'sofa_vs', count(*) FROM sofa_vs
UNION ALL SELECT 'sofa_gcs', count(*) FROM sofa_gcs
UNION ALL SELECT 'sofa_bili', count(*) FROM sofa_bili
UNION ALL SELECT 'sofa_cr', count(*) FROM sofa_cr
UNION ALL SELECT 'sofa_plt', count(*) FROM sofa_plt
UNION ALL SELECT 'sofa_pf', count(*) FROM sofa_pf
UNION ALL SELECT 'sofa_uo', count(*) FROM sofa_uo
UNION ALL SELECT 'sofa_vaso', count(*) FROM sofa_vaso
UNION ALL SELECT 'sofa_wide', count(*) FROM sofa_wide
UNION ALL SELECT 'sofa_components', count(*) FROM sofa_components
UNION ALL SELECT 'sofa_hourly', count(*) FROM sofa_hourly
UNION ALL SELECT 'blood_gas_arterial', count(*) FROM blood_gas_arterial
UNION ALL SELECT 'gcs_all', count(*) FROM gcs_all
UNION ALL SELECT 'antibiotic', count(*) FROM antibiotic
UNION ALL SELECT 'suspicion_of_infection', count(*) FROM suspicion_of_infection
UNION ALL SELECT 'sepsis3', count(*) FROM sepsis3
ORDER BY 1;
\echo
\echo '=================================================================='
\echo ' 2. Hourly grid integrity'
\echo '=================================================================='
\echo "EXPECTED:"
\echo " bad_hr_seq = 0 (hours per stay must be 1..N consecutive)"
\echo " bad_endtime = 0 (endtime > starttime)"
\echo " duplicate_grid = 0 (no (icustay_id, hr) duplicates)"
\echo " grid_eq_hourly = 0 (sofa_grid and sofa_hourly row counts match)"
\echo
WITH per_stay AS (
SELECT icustay_id
, min(hr) AS hr_min
, max(hr) AS hr_max
, count(*) AS n
FROM sofa_grid
GROUP BY icustay_id
)
SELECT
(SELECT count(*) FROM per_stay
WHERE hr_min != 1 OR hr_max != n) AS bad_hr_seq
, (SELECT count(*) FROM sofa_grid
WHERE endtime <= starttime) AS bad_endtime
, (SELECT count(*) - count(DISTINCT (icustay_id, hr))
FROM sofa_grid) AS duplicate_grid
, (SELECT count(*) FROM sofa_grid)
- (SELECT count(*) FROM sofa_hourly) AS grid_eq_hourly;
\echo
\echo '=================================================================='
\echo ' 3. Per-component SOFA score ranges'
\echo '=================================================================='
\echo "EXPECTED: every per-hour component score is in [0, 4] or NULL."
\echo " Any value outside that range indicates a logic bug."
\echo
SELECT 'respiration' AS component, min(respiration) AS min, max(respiration) AS max FROM sofa_components
UNION ALL SELECT 'coagulation', min(coagulation), max(coagulation) FROM sofa_components
UNION ALL SELECT 'liver', min(liver), max(liver) FROM sofa_components
UNION ALL SELECT 'cardiovascular', min(cardiovascular), max(cardiovascular) FROM sofa_components
UNION ALL SELECT 'cns', min(cns), max(cns) FROM sofa_components
UNION ALL SELECT 'renal', min(renal), max(renal) FROM sofa_components
ORDER BY 1;
\echo
\echo '=================================================================='
\echo ' 4. 24-hour rolling SOFA distribution'
\echo '=================================================================='
\echo "EXPECTED:"
\echo " total_sofa min = 0, max ~ 20-24"
\echo " median per-hour total_sofa ~ 2-4"
\echo " Distribution should be heavy-tailed; ~70-80% of hours <= 5,"
\echo " ~5-10% of hours >= 10."
\echo
SELECT min(sofa_24hours) AS sofa_min
, max(sofa_24hours) AS sofa_max
, round(avg(sofa_24hours)::numeric, 2) AS sofa_mean
, percentile_disc(0.50) WITHIN GROUP (ORDER BY sofa_24hours) AS sofa_p50
, percentile_disc(0.90) WITHIN GROUP (ORDER BY sofa_24hours) AS sofa_p90
, percentile_disc(0.99) WITHIN GROUP (ORDER BY sofa_24hours) AS sofa_p99
FROM sofa_hourly;
\echo
\echo '=================================================================='
\echo ' 5. Day-1 max SOFA per stay (compare with SAPS-II severity)'
\echo '=================================================================='
\echo "EXPECTED for adult ICU (per Singer 2016, Vincent 1996):"
\echo " median day-1 SOFA ~ 4-6"
\echo " ~60-70% of stays have day-1 SOFA >= 2 (Sepsis-3 organ-dys threshold)"
\echo
WITH d1 AS (
SELECT icustay_id, max(sofa_24hours) AS day1_sofa
FROM sofa_hourly
WHERE hr <= 24
GROUP BY icustay_id
)
SELECT count(*) AS n_stays
, round(avg(day1_sofa)::numeric, 2) AS mean_d1_sofa
, percentile_disc(0.50) WITHIN GROUP (ORDER BY day1_sofa) AS p50
, percentile_disc(0.90) WITHIN GROUP (ORDER BY day1_sofa) AS p90
, round(100.0 * sum(CASE WHEN day1_sofa >= 2 THEN 1 ELSE 0 END)
/ count(*), 1) AS pct_ge2
, round(100.0 * sum(CASE WHEN day1_sofa >= 6 THEN 1 ELSE 0 END)
/ count(*), 1) AS pct_ge6
FROM d1;
\echo
\echo '=================================================================='
\echo ' 6. Component-input sanity (raw ranges)'
\echo '=================================================================='
\echo "EXPECTED ranges (after our valuenum filters):"
\echo " meanbp_min 30 - 200 mmHg"
\echo " gcs_min 3 - 15"
\echo " bilirubin_max 0 - 80 mg/dL"
\echo " creatinine_max 0 - 30 mg/dL (capped at 150 in pipeline)"
\echo " platelet_min 0 - 1500 K/uL"
\echo " pao2fio2_* 50 - 700"
\echo " uo_24hr 0 - 20000 mL"
\echo " rate_norepi etc. 0 - 5 mcg/kg/min (rates above ~3 are very rare)"
\echo
SELECT 'meanbp_min' AS metric
, min(meanbp_min)::text AS min
, max(meanbp_min)::text AS max
, round(avg(meanbp_min)::numeric, 1)::text AS mean
FROM sofa_components WHERE meanbp_min IS NOT NULL
UNION ALL SELECT 'gcs_min',
min(gcs_min)::text, max(gcs_min)::text, avg(gcs_min)::numeric(10,1)::text
FROM sofa_components WHERE gcs_min IS NOT NULL
UNION ALL SELECT 'bilirubin_max',
min(bilirubin_max)::text, max(bilirubin_max)::text,
avg(bilirubin_max)::numeric(10,2)::text
FROM sofa_components WHERE bilirubin_max IS NOT NULL
UNION ALL SELECT 'creatinine_max',
min(creatinine_max)::text, max(creatinine_max)::text,
avg(creatinine_max)::numeric(10,2)::text
FROM sofa_components WHERE creatinine_max IS NOT NULL
UNION ALL SELECT 'platelet_min',
min(platelet_min)::text, max(platelet_min)::text,
avg(platelet_min)::numeric(10,1)::text
FROM sofa_components WHERE platelet_min IS NOT NULL
UNION ALL SELECT 'pao2fio2_vent',
min(pao2fio2_vent)::text, max(pao2fio2_vent)::text,
avg(pao2fio2_vent)::numeric(10,1)::text
FROM sofa_components WHERE pao2fio2_vent IS NOT NULL
UNION ALL SELECT 'pao2fio2_novent',
min(pao2fio2_novent)::text, max(pao2fio2_novent)::text,
avg(pao2fio2_novent)::numeric(10,1)::text
FROM sofa_components WHERE pao2fio2_novent IS NOT NULL
UNION ALL SELECT 'uo_24hr',
min(uo_24hr)::text, max(uo_24hr)::text,
avg(uo_24hr)::numeric(10,1)::text
FROM sofa_components WHERE uo_24hr IS NOT NULL
UNION ALL SELECT 'rate_norepinephrine',
min(rate_norepinephrine)::text, max(rate_norepinephrine)::text,
avg(rate_norepinephrine)::numeric(10,3)::text
FROM sofa_components WHERE rate_norepinephrine IS NOT NULL
ORDER BY 1;
\echo
\echo '=================================================================='
\echo ' 7. Vasopressor coverage'
\echo '=================================================================='
\echo "EXPECTED: ~25-35% of adult ICU stays receive at least one"
\echo " vasopressor (norepi most common, then epi/dop/dob)."
\echo
SELECT 'any vaso' AS group
, count(DISTINCT icustay_id) AS n_stays
FROM sofa_vaso
UNION ALL SELECT 'norepi',
count(DISTINCT icustay_id) FROM sofa_vaso WHERE rate_norepinephrine IS NOT NULL
UNION ALL SELECT 'epi',
count(DISTINCT icustay_id) FROM sofa_vaso WHERE rate_epinephrine IS NOT NULL
UNION ALL SELECT 'dop',
count(DISTINCT icustay_id) FROM sofa_vaso WHERE rate_dopamine IS NOT NULL
UNION ALL SELECT 'dob',
count(DISTINCT icustay_id) FROM sofa_vaso WHERE rate_dobutamine IS NOT NULL;
\echo
\echo '=================================================================='
\echo ' 8. Antibiotic prescriptions: top 15 drugs'
\echo '=================================================================='
\echo "EXPECTED: vancomycin, piperacillin/tazobactam (zosyn),"
\echo " ceftriaxone, levofloxacin, metronidazole near the top."
\echo
SELECT antibiotic, count(*) AS n
FROM antibiotic
GROUP BY antibiotic
ORDER BY n DESC
LIMIT 15;
\echo
\echo '=================================================================='
\echo ' 9. Suspicion of infection: matching rate'
\echo '=================================================================='
\echo "EXPECTED:"
\echo " ~50-70% of antibiotic rows are matched to a culture"
\echo " (i.e. suspected_infection = 1). Top specimens should be:"
\echo " BLOOD CULTURE, URINE, MRSA SCREEN, SPUTUM, SWAB."
\echo
SELECT count(*) AS n_total
, sum(suspected_infection) AS n_suspected
, round(100.0 * sum(suspected_infection)
/ count(*), 1) AS pct_suspected
FROM suspicion_of_infection;
SELECT specimen, count(*) AS n
FROM suspicion_of_infection
WHERE suspected_infection = 1
GROUP BY specimen
ORDER BY n DESC
LIMIT 10;
\echo
\echo '=================================================================='
\echo '10. Sepsis-3 prevalence at the ICU-stay level'
\echo '=================================================================='
\echo "EXPECTED (Johnson 2018, MIMIC-III all-cohort):"
\echo " total stays in sepsis3 table : 25 - 35 k"
\echo " (every stay with any abx and a qualifying SOFA window)"
\echo " sepsis3 = TRUE : 18 - 24 k (~30-40% of all ICU stays)"
\echo
SELECT count(*) AS n_rows
, sum(CASE WHEN sepsis3 THEN 1 ELSE 0 END) AS n_sepsis3
, round(100.0 * sum(CASE WHEN sepsis3 THEN 1 ELSE 0 END)
/ NULLIF(count(*),0), 1) AS pct_sepsis3_among_rows
, round(100.0 * sum(CASE WHEN sepsis3 THEN 1 ELSE 0 END)
/ NULLIF((SELECT count(*) FROM icustays), 0), 1)
AS pct_sepsis3_of_all_icustays
FROM sepsis3;
\echo
\echo '=================================================================='
\echo '11. Sepsis-3 onset timing'
\echo '=================================================================='
\echo "EXPECTED:"
\echo " Most onsets occur early in the stay; median onset is on"
\echo " day 0-1 (~0-24h after intime). A long right tail exists"
\echo " for ICU-acquired sepsis."
\echo " sofa_time should be within [-48h, +24h] of"
\echo " suspected_infection_time by construction."
\echo
SELECT round(avg(EXTRACT(EPOCH FROM (suspected_infection_time - ie.intime))/3600)::numeric, 1)
AS mean_hours_to_onset
, percentile_disc(0.50) WITHIN GROUP (
ORDER BY EXTRACT(EPOCH FROM (suspected_infection_time - ie.intime))/3600
) AS p50_hours
, percentile_disc(0.90) WITHIN GROUP (
ORDER BY EXTRACT(EPOCH FROM (suspected_infection_time - ie.intime))/3600
) AS p90_hours
, min(EXTRACT(EPOCH FROM (sofa_time - suspected_infection_time))/3600)
AS min_sofa_offset_h
, max(EXTRACT(EPOCH FROM (sofa_time - suspected_infection_time))/3600)
AS max_sofa_offset_h
FROM sepsis3 s
JOIN icustays ie ON ie.icustay_id = s.icustay_id
WHERE s.sepsis3 = TRUE;
\echo
\echo '=================================================================='
\echo '12. Mortality stratified by Sepsis-3 status'
\echo '=================================================================='
\echo "EXPECTED:"
\echo " In-hospital mortality among Sepsis-3 = TRUE: ~25-35%"
\echo " Among Sepsis-3 = FALSE / no row in sepsis3: ~5-10%"
\echo
WITH cohort AS (
SELECT ie.icustay_id, ie.hadm_id
, CASE WHEN s.sepsis3 IS TRUE THEN 'sepsis3'
ELSE 'not sepsis3' END AS sepsis_status
FROM icustays ie
LEFT JOIN sepsis3 s ON s.icustay_id = ie.icustay_id
)
SELECT c.sepsis_status
, count(*) AS n_stays
, sum(CASE WHEN adm.hospital_expire_flag = 1 THEN 1 ELSE 0 END)
AS n_died
, round(100.0 * sum(CASE WHEN adm.hospital_expire_flag = 1 THEN 1 ELSE 0 END)
/ count(*), 1) AS pct_died
FROM cohort c
JOIN admissions adm ON adm.hadm_id = c.hadm_id
GROUP BY c.sepsis_status
ORDER BY c.sepsis_status DESC;
\echo
\echo '=================================================================='
\echo '13. Sepsis-3 vs SAPS-II (cross-score validation)'
\echo '=================================================================='
\echo "EXPECTED:"
\echo " Septic patients should have higher mean SAPS-II than non-septic"
\echo " (typically by ~10-15 points)."
\echo " This sanity check requires that you have already run"
\echo " build_sapsii.sql. If sapsii does not exist, this section"
\echo " will error -- skip it with -v ON_ERROR_STOP=0."
\echo
SELECT CASE WHEN s.sepsis3 IS TRUE THEN 'sepsis3' ELSE 'not sepsis3' END
AS sepsis_status
, count(*) AS n
, round(avg(sa.sapsii)::numeric, 1) AS mean_sapsii
, round(avg(sa.sapsii_prob)::numeric, 3) AS mean_predicted_mortality
FROM icustays ie
LEFT JOIN sepsis3 s ON s.icustay_id = ie.icustay_id
LEFT JOIN sapsii sa ON sa.icustay_id = ie.icustay_id
GROUP BY (s.sepsis3 IS TRUE)
ORDER BY 1 DESC;
\echo
\echo '=================================================================='
\echo '14. Spot-check a few stays end-to-end'
\echo '=================================================================='
\echo "Pulls 5 random Sepsis-3 = TRUE stays and shows you the trajectory"
\echo "of sofa_24hours alongside the suspected_infection_time. Eyeball:"
\echo " - sofa_24hours should be >= 2 at hours surrounding the onset"
\echo " - sofa_24hours should plausibly rise then fall over the stay"
\echo " - hour numbering should be consecutive"
\echo
WITH picks AS (
SELECT icustay_id, suspected_infection_time
FROM sepsis3 WHERE sepsis3 = TRUE
ORDER BY md5(icustay_id::text)
LIMIT 5
)
SELECT p.icustay_id
, p.suspected_infection_time
, h.hr
, h.endtime
, h.respiration_24hours AS resp
, h.coagulation_24hours AS coag
, h.liver_24hours AS liv
, h.cardiovascular_24hours AS cardio
, h.cns_24hours AS cns
, h.renal_24hours AS ren
, h.sofa_24hours AS sofa
FROM picks p
JOIN sofa_hourly h ON h.icustay_id = p.icustay_id
WHERE h.endtime BETWEEN p.suspected_infection_time - INTERVAL '6 hours'
AND p.suspected_infection_time + INTERVAL '6 hours'
ORDER BY p.icustay_id, h.hr;
\echo
\echo 'All sanity checks complete. Anything way off the expected ranges'
\echo 'above is worth investigating before relying on the sepsis-3 cohort.'

90
sql/sepsis/sepsis3.sql Normal file
View File

@@ -0,0 +1,90 @@
-- ------------------------------------------------------------------
-- Title: Sepsis-3 onset
--
-- Adapted from the MIMIC-IV upstream
-- concepts/sepsis/sepsis3.sql
-- and ported to MIMIC-III v1.3 vanilla PostgreSQL.
--
-- Definition (Singer et al., JAMA 2016):
-- Sepsis-3 = SOFA >= 2 AND suspicion of infection,
-- where the SOFA window must overlap the suspected-infection time
-- by at most 48 h before / 24 h after.
-- The "onset time" is the suspected-infection-time of the earliest
-- row that satisfies these criteria for each ICU stay.
--
-- Implicitly assumes baseline SOFA = 0 prior to ICU admission, since
-- we do not have premorbid organ-dysfunction data.
--
-- Dependencies:
-- sepsis/suspicion_of_infection.sql
-- sepsis/sofa_hourly.sql
-- ------------------------------------------------------------------
DROP TABLE IF EXISTS sepsis3;
CREATE TABLE sepsis3 AS
WITH sofa AS
(
SELECT icustay_id
, starttime
, endtime
, respiration_24hours AS respiration
, coagulation_24hours AS coagulation
, liver_24hours AS liver
, cardiovascular_24hours AS cardiovascular
, cns_24hours AS cns
, renal_24hours AS renal
, sofa_24hours AS sofa_score
FROM sofa_hourly
WHERE sofa_24hours >= 2
)
, s1 AS
(
SELECT soi.subject_id
, soi.icustay_id
, soi.ab_id
, soi.antibiotic
, soi.antibiotic_time
, soi.culture_time
, soi.suspected_infection
, soi.suspected_infection_time
, soi.specimen
, soi.positive_culture
, sofa.starttime
, sofa.endtime
, sofa.respiration
, sofa.coagulation
, sofa.liver
, sofa.cardiovascular
, sofa.cns
, sofa.renal
, sofa.sofa_score
, (sofa.sofa_score >= 2 AND soi.suspected_infection = 1) AS sepsis3
, ROW_NUMBER() OVER (
PARTITION BY soi.icustay_id
ORDER BY soi.suspected_infection_time
, soi.antibiotic_time
, soi.culture_time
, sofa.endtime
) AS rn_sus
FROM suspicion_of_infection soi
INNER JOIN sofa
ON soi.icustay_id = sofa.icustay_id
AND sofa.endtime >= DATETIME_SUB(soi.suspected_infection_time, INTERVAL '48' HOUR)
AND sofa.endtime <= DATETIME_ADD(soi.suspected_infection_time, INTERVAL '24' HOUR)
WHERE soi.icustay_id IS NOT NULL
AND soi.suspected_infection_time IS NOT NULL
)
SELECT subject_id
, icustay_id
, antibiotic_time
, culture_time
, suspected_infection_time
, endtime AS sofa_time
, sofa_score
, respiration, coagulation, liver, cardiovascular, cns, renal
, sepsis3
FROM s1
WHERE rn_sus = 1;
CREATE INDEX IF NOT EXISTS sepsis3_idx ON sepsis3 (icustay_id);

397
sql/sepsis/sofa_hourly.sql Normal file
View File

@@ -0,0 +1,397 @@
-- ------------------------------------------------------------------
-- Title: Hourly Sequential Organ Failure Assessment (SOFA)
--
-- Adapted from the MIMIC-IV upstream
-- concepts/score/sofa.sql
-- and ported to MIMIC-III v1.3 vanilla PostgreSQL.
--
-- Produces one row per (icustay_id, hr) for every hour of the ICU
-- stay, with both the per-component score AT that hour and the
-- 24-hour rolling MAX of each component (which is the value used by
-- Sepsis-3). Final column `sofa_24hours` is the sum of the six
-- 24-hour rolling maxes.
--
-- Differences vs. MIMIC-IV upstream (search this file for "PORT NOTE"):
-- 1. ID column is `icustay_id`, not `stay_id`.
-- 2. There is no `icustay_hourly` derived table in MIMIC-III; we
-- build the hourly grid inline with `generate_series`.
-- 3. There is no `mimic_derived.ventilation` with a fine-grained
-- `ventilation_status='InvasiveVent'` flag in MIMIC-III; we use
-- the lumped `ventilation_durations` table, so any active
-- ventilation row is treated as invasive ventilation for the
-- purpose of the PaO2:FiO2 vent/novent split.
-- 4. We replicate MIMIC-IV's `urine_output_rate` adjustment inline
-- in `sofa_uo`: we materialise both `uo_24hr` (sum) and
-- `uo_tm_24hr` (count of distinct hours that actually had a UO
-- observation in the past 24 h), and the renal CASE in (11)
-- uses
-- GREATEST(uo_24hr, 0) * 24.0 / uo_tm_24hr
-- only when uo_tm_24hr BETWEEN 22 AND 30, falling back to
-- creatinine alone otherwise. The GREATEST(_, 0) clip prevents
-- patients on continuous bladder irrigation (which the upstream
-- `urine_output.sql` subtracts as a negative volume) from being
-- mis-scored as oliguric.
-- 5. Vasopressor rates come from the upstream
-- durations/{epinephrine,norepinephrine,dopamine,dobutamine}_dose.sql
-- tables, which already merge CareVue + MetaVision and convert
-- to mcg/kg/min.
--
-- Implementation note: each measurement class is materialised into
-- its own narrow staging table. This avoids forcing the planner to
-- optimise a single ~10-way CTE join, lets each scan of the giant raw
-- tables (`chartevents`, `labevents`, `outputevents`,
-- `inputevents_*`) run independently, and lets you `EXPLAIN ANALYZE`
-- each step in isolation.
--
-- Dependencies:
-- postgres-functions.sql
-- durations/ventilation_durations.sql
-- durations/{dobutamine,dopamine,epinephrine,norepinephrine}_dose.sql
-- fluid_balance/urine_output.sql
-- sepsis/blood_gas_arterial.sql
-- sepsis/gcs_all.sql
-- ------------------------------------------------------------------
-- 1. Hourly grid: one row per (icustay_id, hr) for the entire stay.
DROP TABLE IF EXISTS sofa_grid;
CREATE TABLE sofa_grid AS
SELECT ie.subject_id, ie.hadm_id, ie.icustay_id
, gs.hr
, ie.intime + ((gs.hr - 1) * INTERVAL '1 hour') AS starttime
, ie.intime + (gs.hr * INTERVAL '1 hour') AS endtime
FROM icustays ie
, LATERAL generate_series(
1,
GREATEST(1,
CEIL(EXTRACT(EPOCH FROM (ie.outtime - ie.intime)) / 3600.0)::int
)
) AS gs(hr)
WHERE ie.outtime IS NOT NULL
AND ie.outtime > ie.intime;
CREATE INDEX IF NOT EXISTS sofa_grid_idx ON sofa_grid (icustay_id, hr);
CREATE INDEX IF NOT EXISTS sofa_grid_time_idx
ON sofa_grid (icustay_id, starttime, endtime);
ANALYZE sofa_grid;
-- 2. Mean arterial pressure: minimum within the hour.
DROP TABLE IF EXISTS sofa_vs;
CREATE TABLE sofa_vs AS
SELECT g.icustay_id, g.hr
, MIN(ce.valuenum) AS meanbp_min
FROM sofa_grid g
LEFT JOIN chartevents ce
ON ce.icustay_id = g.icustay_id
AND ce.charttime > g.starttime
AND ce.charttime <= g.endtime
AND ce.itemid IN (456, 52, 6702, 443, 220052, 220181, 225312)
AND ce.valuenum > 0 AND ce.valuenum < 300
AND COALESCE(ce.error, 0) = 0
GROUP BY g.icustay_id, g.hr;
CREATE INDEX IF NOT EXISTS sofa_vs_idx ON sofa_vs (icustay_id, hr);
ANALYZE sofa_vs;
-- 3. GCS: minimum within the hour (uses gcs_all carry-forward logic).
DROP TABLE IF EXISTS sofa_gcs;
CREATE TABLE sofa_gcs AS
SELECT g.icustay_id, g.hr
, MIN(gc.gcs) AS gcs_min
FROM sofa_grid g
LEFT JOIN gcs_all gc
ON gc.icustay_id = g.icustay_id
AND gc.charttime > g.starttime
AND gc.charttime <= g.endtime
GROUP BY g.icustay_id, g.hr;
CREATE INDEX IF NOT EXISTS sofa_gcs_idx ON sofa_gcs (icustay_id, hr);
ANALYZE sofa_gcs;
-- 4. Bilirubin: maximum within the hour.
DROP TABLE IF EXISTS sofa_bili;
CREATE TABLE sofa_bili AS
SELECT g.icustay_id, g.hr
, MAX(le.valuenum) AS bilirubin_max
FROM sofa_grid g
INNER JOIN icustays ie
ON ie.icustay_id = g.icustay_id
LEFT JOIN labevents le
ON le.subject_id = ie.subject_id
AND le.hadm_id = ie.hadm_id
AND le.charttime > g.starttime
AND le.charttime <= g.endtime
AND le.itemid = 50885
AND le.valuenum IS NOT NULL
AND le.valuenum > 0
GROUP BY g.icustay_id, g.hr;
CREATE INDEX IF NOT EXISTS sofa_bili_idx ON sofa_bili (icustay_id, hr);
ANALYZE sofa_bili;
-- 5. Creatinine: maximum within the hour.
DROP TABLE IF EXISTS sofa_cr;
CREATE TABLE sofa_cr AS
SELECT g.icustay_id, g.hr
, MAX(le.valuenum) AS creatinine_max
FROM sofa_grid g
INNER JOIN icustays ie
ON ie.icustay_id = g.icustay_id
LEFT JOIN labevents le
ON le.subject_id = ie.subject_id
AND le.hadm_id = ie.hadm_id
AND le.charttime > g.starttime
AND le.charttime <= g.endtime
AND le.itemid = 50912
AND le.valuenum IS NOT NULL
AND le.valuenum > 0
AND le.valuenum < 150 -- sanity (mg/dL)
GROUP BY g.icustay_id, g.hr;
CREATE INDEX IF NOT EXISTS sofa_cr_idx ON sofa_cr (icustay_id, hr);
ANALYZE sofa_cr;
-- 6. Platelets: minimum within the hour.
DROP TABLE IF EXISTS sofa_plt;
CREATE TABLE sofa_plt AS
SELECT g.icustay_id, g.hr
, MIN(le.valuenum) AS platelet_min
FROM sofa_grid g
INNER JOIN icustays ie
ON ie.icustay_id = g.icustay_id
LEFT JOIN labevents le
ON le.subject_id = ie.subject_id
AND le.hadm_id = ie.hadm_id
AND le.charttime > g.starttime
AND le.charttime <= g.endtime
AND le.itemid = 51265
AND le.valuenum IS NOT NULL
AND le.valuenum > 0
GROUP BY g.icustay_id, g.hr;
CREATE INDEX IF NOT EXISTS sofa_plt_idx ON sofa_plt (icustay_id, hr);
ANALYZE sofa_plt;
-- 7. PaO2/FiO2: split into vent / novent based on whether the patient
-- is on invasive ventilation at the time of the blood gas. We
-- take the worst (lowest) PaO2:FiO2 of each kind during the hour.
DROP TABLE IF EXISTS sofa_pf;
CREATE TABLE sofa_pf AS
WITH pafi AS (
SELECT bg.icustay_id, bg.charttime
, CASE WHEN vd.icustay_id IS NULL THEN bg.pao2fio2 END AS pao2fio2_novent
, CASE WHEN vd.icustay_id IS NOT NULL THEN bg.pao2fio2 END AS pao2fio2_vent
FROM blood_gas_arterial bg
LEFT JOIN ventilation_durations vd
ON bg.icustay_id = vd.icustay_id
AND bg.charttime >= vd.starttime
AND bg.charttime <= vd.endtime
WHERE bg.pao2fio2 IS NOT NULL
)
SELECT g.icustay_id, g.hr
, MIN(p.pao2fio2_novent) AS pao2fio2_novent
, MIN(p.pao2fio2_vent) AS pao2fio2_vent
FROM sofa_grid g
LEFT JOIN pafi p
ON p.icustay_id = g.icustay_id
AND p.charttime > g.starttime
AND p.charttime <= g.endtime
GROUP BY g.icustay_id, g.hr;
CREATE INDEX IF NOT EXISTS sofa_pf_idx ON sofa_pf (icustay_id, hr);
ANALYZE sofa_pf;
-- 8. Urine output: sum over the preceding 24 h plus a count of how many
-- distinct hours actually had a UO observation (`uo_tm_24hr`). This
-- matches the MIMIC-IV `urine_output_rate` table's two columns and
-- lets the renal CASE distinguish "no data" from "really oliguric".
-- The renal score in (11) requires uo_tm_24hr to be in a plausible
-- range before the UO branch fires; otherwise it falls back to
-- creatinine alone. See port note #3 in the file header.
DROP TABLE IF EXISTS sofa_uo;
CREATE TABLE sofa_uo AS
SELECT g.icustay_id, g.hr
, SUM(uo.value) AS uo_24hr
, COUNT(DISTINCT date_trunc('hour', uo.charttime)) AS uo_tm_24hr
FROM sofa_grid g
LEFT JOIN urine_output uo
ON uo.icustay_id = g.icustay_id
AND uo.charttime > DATETIME_SUB(g.endtime, INTERVAL '24' HOUR)
AND uo.charttime <= g.endtime
GROUP BY g.icustay_id, g.hr;
CREATE INDEX IF NOT EXISTS sofa_uo_idx ON sofa_uo (icustay_id, hr);
ANALYZE sofa_uo;
-- 9. Vasopressor rates: take the maximum rate of each pressor active at
-- `endtime`. Each upstream dose table is (icustay_id, starttime,
-- endtime, vaso_rate, vaso_amount).
DROP TABLE IF EXISTS sofa_vaso;
CREATE TABLE sofa_vaso AS
SELECT g.icustay_id, g.hr
, MAX(epi.vaso_rate) AS rate_epinephrine
, MAX(nor.vaso_rate) AS rate_norepinephrine
, MAX(dop.vaso_rate) AS rate_dopamine
, MAX(dob.vaso_rate) AS rate_dobutamine
FROM sofa_grid g
LEFT JOIN epinephrine_dose epi
ON epi.icustay_id = g.icustay_id
AND g.endtime > epi.starttime
AND g.endtime <= epi.endtime
LEFT JOIN norepinephrine_dose nor
ON nor.icustay_id = g.icustay_id
AND g.endtime > nor.starttime
AND g.endtime <= nor.endtime
LEFT JOIN dopamine_dose dop
ON dop.icustay_id = g.icustay_id
AND g.endtime > dop.starttime
AND g.endtime <= dop.endtime
LEFT JOIN dobutamine_dose dob
ON dob.icustay_id = g.icustay_id
AND g.endtime > dob.starttime
AND g.endtime <= dob.endtime
WHERE epi.icustay_id IS NOT NULL
OR nor.icustay_id IS NOT NULL
OR dop.icustay_id IS NOT NULL
OR dob.icustay_id IS NOT NULL
GROUP BY g.icustay_id, g.hr;
CREATE INDEX IF NOT EXISTS sofa_vaso_idx ON sofa_vaso (icustay_id, hr);
ANALYZE sofa_vaso;
-- 10. Wide assembly: left-join all of the above onto the grid.
DROP TABLE IF EXISTS sofa_wide;
CREATE TABLE sofa_wide AS
SELECT g.subject_id, g.hadm_id, g.icustay_id, g.hr
, g.starttime, g.endtime
, vs.meanbp_min
, gc.gcs_min
, bi.bilirubin_max
, cr.creatinine_max
, pl.platelet_min
, pf.pao2fio2_novent
, pf.pao2fio2_vent
, uo.uo_24hr
, uo.uo_tm_24hr
, va.rate_epinephrine
, va.rate_norepinephrine
, va.rate_dopamine
, va.rate_dobutamine
FROM sofa_grid g
LEFT JOIN sofa_vs vs ON vs.icustay_id = g.icustay_id AND vs.hr = g.hr
LEFT JOIN sofa_gcs gc ON gc.icustay_id = g.icustay_id AND gc.hr = g.hr
LEFT JOIN sofa_bili bi ON bi.icustay_id = g.icustay_id AND bi.hr = g.hr
LEFT JOIN sofa_cr cr ON cr.icustay_id = g.icustay_id AND cr.hr = g.hr
LEFT JOIN sofa_plt pl ON pl.icustay_id = g.icustay_id AND pl.hr = g.hr
LEFT JOIN sofa_pf pf ON pf.icustay_id = g.icustay_id AND pf.hr = g.hr
LEFT JOIN sofa_uo uo ON uo.icustay_id = g.icustay_id AND uo.hr = g.hr
LEFT JOIN sofa_vaso va ON va.icustay_id = g.icustay_id AND va.hr = g.hr;
CREATE INDEX IF NOT EXISTS sofa_wide_idx ON sofa_wide (icustay_id, hr);
ANALYZE sofa_wide;
-- 11. Per-hour component scores (no rolling window yet).
DROP TABLE IF EXISTS sofa_components;
CREATE TABLE sofa_components AS
SELECT w.*
-- Respiration
, CASE
WHEN pao2fio2_vent < 100 THEN 4
WHEN pao2fio2_vent < 200 THEN 3
WHEN pao2fio2_novent < 300 THEN 2
WHEN pao2fio2_vent < 300 THEN 2
WHEN pao2fio2_novent < 400 THEN 1
WHEN pao2fio2_vent < 400 THEN 1
WHEN COALESCE(pao2fio2_vent, pao2fio2_novent) IS NULL THEN NULL
ELSE 0
END AS respiration
-- Coagulation
, CASE
WHEN platelet_min < 20 THEN 4
WHEN platelet_min < 50 THEN 3
WHEN platelet_min < 100 THEN 2
WHEN platelet_min < 150 THEN 1
WHEN platelet_min IS NULL THEN NULL
ELSE 0
END AS coagulation
-- Liver (mg/dL)
, CASE
WHEN bilirubin_max >= 12.0 THEN 4
WHEN bilirubin_max >= 6.0 THEN 3
WHEN bilirubin_max >= 2.0 THEN 2
WHEN bilirubin_max >= 1.2 THEN 1
WHEN bilirubin_max IS NULL THEN NULL
ELSE 0
END AS liver
-- Cardiovascular
, CASE
WHEN rate_dopamine > 15 OR rate_epinephrine > 0.1 OR rate_norepinephrine > 0.1 THEN 4
WHEN rate_dopamine > 5 OR rate_epinephrine <= 0.1 OR rate_norepinephrine <= 0.1 THEN 3
WHEN rate_dopamine > 0 OR rate_dobutamine > 0 THEN 2
WHEN meanbp_min < 70 THEN 1
WHEN COALESCE(meanbp_min, rate_dopamine, rate_dobutamine,
rate_epinephrine, rate_norepinephrine) IS NULL THEN NULL
ELSE 0
END AS cardiovascular
-- CNS (GCS)
, CASE
WHEN gcs_min >= 13 AND gcs_min <= 14 THEN 1
WHEN gcs_min >= 10 AND gcs_min <= 12 THEN 2
WHEN gcs_min >= 6 AND gcs_min <= 9 THEN 3
WHEN gcs_min < 6 THEN 4
WHEN gcs_min IS NULL THEN NULL
ELSE 0
END AS cns
-- Renal
-- We scale uo_24hr to a 24h-equivalent only when the rolling
-- window has at least 22 distinct hours of observations
-- (matching MIMIC-IV's `urine_output_rate` upper-bound check).
-- If fewer than 22 valid hours exist, the UO branch is treated
-- as missing and the renal score falls back to creatinine alone.
-- We also clip GU-irrigant negative net values to zero so a
-- patient on continuous bladder irrigation isn't mis-scored as
-- oliguric.
, CASE
WHEN creatinine_max >= 5.0 THEN 4
WHEN uo_tm_24hr BETWEEN 22 AND 30
AND GREATEST(uo_24hr, 0) * 24.0 / uo_tm_24hr < 200 THEN 4
WHEN creatinine_max >= 3.5 AND creatinine_max < 5.0 THEN 3
WHEN uo_tm_24hr BETWEEN 22 AND 30
AND GREATEST(uo_24hr, 0) * 24.0 / uo_tm_24hr < 500 THEN 3
WHEN creatinine_max >= 2.0 AND creatinine_max < 3.5 THEN 2
WHEN creatinine_max >= 1.2 AND creatinine_max < 2.0 THEN 1
WHEN creatinine_max IS NULL
AND NOT (uo_tm_24hr BETWEEN 22 AND 30) THEN NULL
ELSE 0
END AS renal
FROM sofa_wide w;
CREATE INDEX IF NOT EXISTS sofa_components_idx
ON sofa_components (icustay_id, hr);
ANALYZE sofa_components;
-- 12. Final hourly SOFA: 24-hour rolling MAX per component, summed.
DROP TABLE IF EXISTS sofa_hourly;
CREATE TABLE sofa_hourly AS
SELECT s.subject_id, s.hadm_id, s.icustay_id, s.hr
, s.starttime, s.endtime
, s.respiration, s.coagulation, s.liver
, s.cardiovascular, s.cns, s.renal
, COALESCE(MAX(s.respiration) OVER w, 0) AS respiration_24hours
, COALESCE(MAX(s.coagulation) OVER w, 0) AS coagulation_24hours
, COALESCE(MAX(s.liver) OVER w, 0) AS liver_24hours
, COALESCE(MAX(s.cardiovascular) OVER w, 0) AS cardiovascular_24hours
, COALESCE(MAX(s.cns) OVER w, 0) AS cns_24hours
, COALESCE(MAX(s.renal) OVER w, 0) AS renal_24hours
, COALESCE(MAX(s.respiration) OVER w, 0)
+ COALESCE(MAX(s.coagulation) OVER w, 0)
+ COALESCE(MAX(s.liver) OVER w, 0)
+ COALESCE(MAX(s.cardiovascular) OVER w, 0)
+ COALESCE(MAX(s.cns) OVER w, 0)
+ COALESCE(MAX(s.renal) OVER w, 0)
AS sofa_24hours
FROM sofa_components s
WINDOW w AS (
PARTITION BY s.icustay_id
ORDER BY s.hr
ROWS BETWEEN 23 PRECEDING AND CURRENT ROW
);
CREATE INDEX IF NOT EXISTS sofa_hourly_idx
ON sofa_hourly (icustay_id, hr);
CREATE INDEX IF NOT EXISTS sofa_hourly_time_idx
ON sofa_hourly (icustay_id, endtime);
ANALYZE sofa_hourly;

View File

@@ -0,0 +1,153 @@
-- ------------------------------------------------------------------
-- Title: Suspicion of Infection
--
-- Adapted from the MIMIC-IV upstream
-- concepts/sepsis/suspicion_of_infection.sql
-- and ported to MIMIC-III v1.3 vanilla PostgreSQL.
--
-- Definition (from the original Sepsis-3 paper, Seymour 2016):
-- a patient is "suspected of infection" if a culture and an
-- antibiotic are ordered close in time:
-- - culture <= 72 h before antibiotic, OR
-- - culture <= 24 h after antibiotic.
-- The antibiotic time is taken as the suspected-infection time when
-- a culture comes second; the culture time when it comes first.
--
-- PORT NOTES:
-- 1. ID column is `icustay_id`, not `stay_id`.
-- 2. MIMIC-III has no `micro_specimen_id`; specimens are identified
-- by the tuple (subject_id, hadm_id, chartdate, charttime,
-- spec_itemid, spec_type_desc) and we deduplicate organism
-- rows by aggregating with that tuple.
-- 3. MIMIC-III `prescriptions.startdate` is DATE-precision only.
-- Consequently `antibiotic_time` always lands on midnight; the
-- MIMIC-IV branches that compare to `me.charttime` still work
-- (DATE auto-casts to TIMESTAMP at 00:00) but give day-level
-- onset precision.
-- ------------------------------------------------------------------
DROP TABLE IF EXISTS suspicion_of_infection;
CREATE TABLE suspicion_of_infection AS
WITH ab_tbl AS
(
SELECT abx.subject_id
, abx.hadm_id
, abx.icustay_id
, abx.antibiotic
, CAST(abx.starttime AS TIMESTAMP) AS antibiotic_time
, CAST(abx.starttime AS DATE) AS antibiotic_date
, CAST(abx.stoptime AS TIMESTAMP) AS antibiotic_stoptime
, ROW_NUMBER() OVER (
PARTITION BY abx.subject_id
ORDER BY abx.starttime, abx.stoptime, abx.antibiotic
) AS ab_id
FROM antibiotic abx
)
, me AS
(
SELECT subject_id, hadm_id
, spec_itemid
, spec_type_desc
, MAX(CAST(chartdate AS DATE)) AS chartdate
, MAX(charttime) AS charttime
, MAX(CASE WHEN org_name IS NOT NULL AND org_name != ''
THEN 1 ELSE 0 END) AS positiveculture
FROM microbiologyevents
GROUP BY subject_id, hadm_id, chartdate, charttime,
spec_itemid, spec_type_desc
)
, me_then_ab AS
(
SELECT ab_tbl.subject_id
, ab_tbl.hadm_id
, ab_tbl.icustay_id
, ab_tbl.ab_id
, COALESCE(me72.charttime,
CAST(me72.chartdate AS TIMESTAMP)) AS last72_charttime
, me72.positiveculture AS last72_positiveculture
, me72.spec_type_desc AS last72_specimen
, ROW_NUMBER() OVER (
PARTITION BY ab_tbl.subject_id, ab_tbl.ab_id
ORDER BY me72.chartdate, me72.charttime NULLS LAST
) AS micro_seq
FROM ab_tbl
LEFT JOIN me me72
ON ab_tbl.subject_id = me72.subject_id
AND (
(
me72.charttime IS NOT NULL
AND ab_tbl.antibiotic_time > me72.charttime
AND ab_tbl.antibiotic_time <= DATETIME_ADD(me72.charttime, INTERVAL '72' HOUR)
)
OR (
me72.charttime IS NULL
AND ab_tbl.antibiotic_date >= me72.chartdate
AND ab_tbl.antibiotic_date <= me72.chartdate + INTERVAL '3 day'
)
)
)
, ab_then_me AS
(
SELECT ab_tbl.subject_id
, ab_tbl.hadm_id
, ab_tbl.icustay_id
, ab_tbl.ab_id
, COALESCE(me24.charttime,
CAST(me24.chartdate AS TIMESTAMP)) AS next24_charttime
, me24.positiveculture AS next24_positiveculture
, me24.spec_type_desc AS next24_specimen
, ROW_NUMBER() OVER (
PARTITION BY ab_tbl.subject_id, ab_tbl.ab_id
ORDER BY me24.chartdate, me24.charttime NULLS LAST
) AS micro_seq
FROM ab_tbl
LEFT JOIN me me24
ON ab_tbl.subject_id = me24.subject_id
AND (
(
me24.charttime IS NOT NULL
AND ab_tbl.antibiotic_time >= DATETIME_SUB(me24.charttime, INTERVAL '24' HOUR)
AND ab_tbl.antibiotic_time < me24.charttime
)
OR (
me24.charttime IS NULL
AND ab_tbl.antibiotic_date >= me24.chartdate - INTERVAL '1 day'
AND ab_tbl.antibiotic_date <= me24.chartdate
)
)
)
SELECT ab_tbl.subject_id
, ab_tbl.icustay_id
, ab_tbl.hadm_id
, ab_tbl.ab_id
, ab_tbl.antibiotic
, ab_tbl.antibiotic_time
, CASE
WHEN me2ab.last72_specimen IS NULL AND ab2me.next24_specimen IS NULL
THEN 0 ELSE 1
END AS suspected_infection
, CASE
WHEN me2ab.last72_specimen IS NULL AND ab2me.next24_specimen IS NULL
THEN NULL
ELSE COALESCE(me2ab.last72_charttime, ab_tbl.antibiotic_time)
END AS suspected_infection_time
, COALESCE(me2ab.last72_charttime, ab2me.next24_charttime) AS culture_time
, COALESCE(me2ab.last72_specimen, ab2me.next24_specimen) AS specimen
, COALESCE(me2ab.last72_positiveculture,
ab2me.next24_positiveculture) AS positive_culture
FROM ab_tbl
LEFT JOIN ab_then_me ab2me
ON ab_tbl.subject_id = ab2me.subject_id
AND ab_tbl.ab_id = ab2me.ab_id
AND ab2me.micro_seq = 1
LEFT JOIN me_then_ab me2ab
ON ab_tbl.subject_id = me2ab.subject_id
AND ab_tbl.ab_id = me2ab.ab_id
AND me2ab.micro_seq = 1;
CREATE INDEX IF NOT EXISTS suspicion_of_infection_idx
ON suspicion_of_infection (icustay_id, suspected_infection_time);

View File

@@ -0,0 +1,384 @@
-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY.
DROP TABLE IF EXISTS sapsii; CREATE TABLE sapsii AS
-- ------------------------------------------------------------------
-- Title: Simplified Acute Physiology Score II (SAPS II)
-- This query extracts the simplified acute physiology score II.
-- This score is a measure of patient severity of illness.
-- The score is calculated on the first day of each ICU patients' stay.
-- ------------------------------------------------------------------
-- Reference for SAPS II:
-- Le Gall, Jean-Roger, Stanley Lemeshow, and Fabienne Saulnier.
-- "A new simplified acute physiology score (SAPS II) based on a European/North American multicenter study."
-- JAMA 270, no. 24 (1993): 2957-2963.
-- Variables used in SAPS II:
-- Age, GCS
-- VITALS: Heart rate, systolic blood pressure, temperature
-- FLAGS: ventilation/cpap
-- IO: urine output
-- LABS: PaO2/FiO2 ratio, blood urea nitrogen, WBC, potassium, sodium, HCO3
-- The following views are required to run this query:
-- 1) urine_output_first_day - generated by urine-output-first-day.sql
-- 2) ventilation_durations - generated by ventilation_durations.sql
-- 3) vitals_first_day - generated by vitals-first-day.sql
-- 4) gcs_first_day - generated by gcs-first-day.sql
-- 5) labs_first_day - generated by labs-first-day.sql
-- 6) blood_gas_arterial_first_day - generated by blood-gas-first-day-arterial.sql
-- Note:
-- The score is calculated for *all* ICU patients, with the assumption that the user will subselect appropriate ICUSTAY_IDs.
-- For example, the score is calculated for neonates, but it is likely inappropriate to actually use the score values for these patients.
-- extract CPAP from the "Oxygen Delivery Device" fields
with cpap as
(
select ie.icustay_id
, min(DATETIME_SUB(charttime, INTERVAL '1' HOUR)) as starttime
, max(DATETIME_ADD(charttime, INTERVAL '4' HOUR)) as endtime
, max(CASE
WHEN lower(ce.value) LIKE '%cpap%' THEN 1
WHEN lower(ce.value) LIKE '%bipap mask%' THEN 1
else 0 end) as cpap
FROM icustays ie
inner join chartevents ce
on ie.icustay_id = ce.icustay_id
and ce.charttime between ie.intime and DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
where itemid in
(
-- TODO: when metavision data import fixed, check the values in 226732 match the value clause below
467, 469, 226732
)
and (lower(ce.value) LIKE '%cpap%' or lower(ce.value) LIKE '%bipap mask%')
-- exclude rows marked as error
AND (ce.error IS NULL OR ce.error = 0)
group by ie.icustay_id
)
-- extract a flag for surgical service
-- this combined with "elective" FROM admissions table defines elective/non-elective surgery
, surgflag as
(
select adm.hadm_id
, case when lower(curr_service) like '%surg%' then 1 else 0 end as surgical
, ROW_NUMBER() over
(
PARTITION BY adm.HADM_ID
ORDER BY TRANSFERTIME
) as serviceOrder
FROM admissions adm
left join services se
on adm.hadm_id = se.hadm_id
)
-- icd-9 diagnostic codes are our best source for comorbidity information
-- unfortunately, they are technically a-causal
-- however, this shouldn't matter too much for the SAPS II comorbidities
, comorb as
(
select hadm_id
-- these are slightly different than elixhauser comorbidities, but based on them
-- they include some non-comorbid ICD-9 codes (e.g. 20302, relapse of multiple myeloma)
, max(CASE
when SUBSTR(icd9_code,1,3) BETWEEN '042' AND '044' THEN 1
end) as aids /* HIV and AIDS */
, max(CASE
when icd9_code between '20000' and '20238' then 1 -- lymphoma
when icd9_code between '20240' and '20248' then 1 -- leukemia
when icd9_code between '20250' and '20302' then 1 -- lymphoma
when icd9_code between '20310' and '20312' then 1 -- leukemia
when icd9_code between '20302' and '20382' then 1 -- lymphoma
when icd9_code between '20400' and '20522' then 1 -- chronic leukemia
when icd9_code between '20580' and '20702' then 1 -- other myeloid leukemia
when icd9_code between '20720' and '20892' then 1 -- other myeloid leukemia
when SUBSTR(icd9_code,1,4) = '2386' then 1 -- lymphoma
when SUBSTR(icd9_code,1,4) = '2733' then 1 -- lymphoma
end) as hem
, max(CASE
when SUBSTR(icd9_code,1,4) BETWEEN '1960' AND '1991' THEN 1
when icd9_code between '20970' and '20975' then 1
when icd9_code = '20979' then 1
when icd9_code = '78951' then 1
end) as mets /* Metastatic cancer */
from diagnoses_icd
group by hadm_id
)
, pafi1 as
(
-- join blood gas to ventilation durations to determine if patient was vent
-- also join to cpap table for the same purpose
select bg.icustay_id, bg.charttime
, pao2fio2
, case when vd.icustay_id is not null then 1 else 0 end as vent
, case when cp.icustay_id is not null then 1 else 0 end as cpap
from blood_gas_first_day_arterial bg
left join ventilation_durations vd
on bg.icustay_id = vd.icustay_id
and bg.charttime >= vd.starttime
and bg.charttime <= vd.endtime
left join cpap cp
on bg.icustay_id = cp.icustay_id
and bg.charttime >= cp.starttime
and bg.charttime <= cp.endtime
)
, pafi2 as
(
-- get the minimum PaO2/FiO2 ratio *only for ventilated/cpap patients*
select icustay_id
, min(pao2fio2) as pao2fio2_vent_min
from pafi1
where vent = 1 or cpap = 1
group by icustay_id
)
, cohort as
(
select ie.subject_id, ie.hadm_id, ie.icustay_id
, ie.intime
, ie.outtime
-- the casts ensure the result is numeric.. we could equally extract EPOCH from the interval
-- however this code works in Oracle and Postgres
, DATETIME_DIFF(ie.intime, pat.dob, 'YEAR') as age
, vital.heartrate_max
, vital.heartrate_min
, vital.sysbp_max
, vital.sysbp_min
, vital.tempc_max
, vital.tempc_min
-- this value is non-null iff the patient is on vent/cpap
, pf.pao2fio2_vent_min
, uo.urineoutput
, labs.bun_min
, labs.bun_max
, labs.wbc_min
, labs.wbc_max
, labs.potassium_min
, labs.potassium_max
, labs.sodium_min
, labs.sodium_max
, labs.bicarbonate_min
, labs.bicarbonate_max
, labs.bilirubin_min
, labs.bilirubin_max
, gcs.mingcs
, comorb.aids
, comorb.hem
, comorb.mets
, case
when adm.ADMISSION_TYPE = 'ELECTIVE' and sf.surgical = 1
then 'ScheduledSurgical'
when adm.ADMISSION_TYPE != 'ELECTIVE' and sf.surgical = 1
then 'UnscheduledSurgical'
else 'Medical'
end as admissiontype
FROM icustays ie
inner join admissions adm
on ie.hadm_id = adm.hadm_id
inner join patients pat
on ie.subject_id = pat.subject_id
-- join to above views
left join pafi2 pf
on ie.icustay_id = pf.icustay_id
left join surgflag sf
on adm.hadm_id = sf.hadm_id and sf.serviceOrder = 1
left join comorb
on ie.hadm_id = comorb.hadm_id
-- join to custom tables to get more data....
left join gcs_first_day gcs
on ie.icustay_id = gcs.icustay_id
left join vitals_first_day vital
on ie.icustay_id = vital.icustay_id
left join urine_output_first_day uo
on ie.icustay_id = uo.icustay_id
left join labs_first_day labs
on ie.icustay_id = labs.icustay_id
)
, scorecomp as
(
select
cohort.*
-- Below code calculates the component scores needed for SAPS
, case
when age is null then null
when age < 40 then 0
when age < 60 then 7
when age < 70 then 12
when age < 75 then 15
when age < 80 then 16
when age >= 80 then 18
end as age_score
, case
when heartrate_max is null then null
when heartrate_min < 40 then 11
when heartrate_max >= 160 then 7
when heartrate_max >= 120 then 4
when heartrate_min < 70 then 2
when heartrate_max >= 70 and heartrate_max < 120
and heartrate_min >= 70 and heartrate_min < 120
then 0
end as hr_score
, case
when sysbp_min is null then null
when sysbp_min < 70 then 13
when sysbp_min < 100 then 5
when sysbp_max >= 200 then 2
when sysbp_max >= 100 and sysbp_max < 200
and sysbp_min >= 100 and sysbp_min < 200
then 0
end as sysbp_score
, case
when tempc_max is null then null
when tempc_min < 39.0 then 0
when tempc_max >= 39.0 then 3
end as temp_score
, case
when pao2fio2_vent_min is null then null
when pao2fio2_vent_min < 100 then 11
when pao2fio2_vent_min < 200 then 9
when pao2fio2_vent_min >= 200 then 6
end as pao2fio2_score
, case
when urineoutput is null then null
when urineoutput < 500.0 then 11
when urineoutput < 1000.0 then 4
when urineoutput >= 1000.0 then 0
end as uo_score
, case
when bun_max is null then null
when bun_max < 28.0 then 0
when bun_max < 84.0 then 6
when bun_max >= 84.0 then 10
end as bun_score
, case
when wbc_max is null then null
when wbc_min < 1.0 then 12
when wbc_max >= 20.0 then 3
when wbc_max >= 1.0 and wbc_max < 20.0
and wbc_min >= 1.0 and wbc_min < 20.0
then 0
end as wbc_score
, case
when potassium_max is null then null
when potassium_min < 3.0 then 3
when potassium_max >= 5.0 then 3
when potassium_max >= 3.0 and potassium_max < 5.0
and potassium_min >= 3.0 and potassium_min < 5.0
then 0
end as potassium_score
, case
when sodium_max is null then null
when sodium_min < 125 then 5
when sodium_max >= 145 then 1
when sodium_max >= 125 and sodium_max < 145
and sodium_min >= 125 and sodium_min < 145
then 0
end as sodium_score
, case
when bicarbonate_max is null then null
when bicarbonate_min < 15.0 then 5
when bicarbonate_min < 20.0 then 3
when bicarbonate_max >= 20.0
and bicarbonate_min >= 20.0
then 0
end as bicarbonate_score
, case
when bilirubin_max is null then null
when bilirubin_max < 4.0 then 0
when bilirubin_max < 6.0 then 4
when bilirubin_max >= 6.0 then 9
end as bilirubin_score
, case
when mingcs is null then null
when mingcs < 3 then null -- erroneous value/on trach
when mingcs < 6 then 26
when mingcs < 9 then 13
when mingcs < 11 then 7
when mingcs < 14 then 5
when mingcs >= 14
and mingcs <= 15
then 0
end as gcs_score
, case
when aids = 1 then 17
when hem = 1 then 10
when mets = 1 then 9
else 0
end as comorbidity_score
, case
when admissiontype = 'ScheduledSurgical' then 0
when admissiontype = 'Medical' then 6
when admissiontype = 'UnscheduledSurgical' then 8
else null
end as admissiontype_score
from cohort
)
-- Calculate SAPS II here so we can use it in the probability calculation below
, score as
(
select s.*
-- coalesce statements impute normal score of zero if data element is missing
, coalesce(age_score,0)
+ coalesce(hr_score,0)
+ coalesce(sysbp_score,0)
+ coalesce(temp_score,0)
+ coalesce(pao2fio2_score,0)
+ coalesce(uo_score,0)
+ coalesce(bun_score,0)
+ coalesce(wbc_score,0)
+ coalesce(potassium_score,0)
+ coalesce(sodium_score,0)
+ coalesce(bicarbonate_score,0)
+ coalesce(bilirubin_score,0)
+ coalesce(gcs_score,0)
+ coalesce(comorbidity_score,0)
+ coalesce(admissiontype_score,0)
as sapsii
from scorecomp s
)
select ie.subject_id, ie.hadm_id, ie.icustay_id
, sapsii
, 1 / (1 + exp(- (-7.7631 + 0.0737*(sapsii) + 0.9971*(ln(sapsii + 1))) )) as sapsii_prob
, age_score
, hr_score
, sysbp_score
, temp_score
, pao2fio2_score
, uo_score
, bun_score
, wbc_score
, potassium_score
, sodium_score
, bicarbonate_score
, bilirubin_score
, gcs_score
, comorbidity_score
, admissiontype_score
FROM icustays ie
left join score s
on ie.icustay_id = s.icustay_id
order by ie.icustay_id;