diff --git a/paper3_phase5b_refined.py b/paper3_phase5b_refined.py
new file mode 100644
index 0000000..21d8f66
--- /dev/null
+++ b/paper3_phase5b_refined.py
@@ -0,0 +1,543 @@
+"""
+PAPER 3 — PHASE 5b: 5-TERM FORMULA + ORDINAL BEDSIDE SCORE
+═════════════════════════════════════════════════════════════════════════════
+Two refinements from Phase 5:
+
+  1. DROP log(1 + ne_auc). Its bootstrap CI crossed zero, and it is
+     collinear with I(ne_at_h24 > 0.08). Simpler 5-term formula.
+
+  2. BEDSIDE SCORE via ORDINAL BINNING (not coefficient rounding).
+     Each of the 5 remaining terms gets mapped to 0–4 points based on
+     clinically meaningful thresholds. SOFA-style integer score:
+
+       Lactate h24         0 if <2.5  |  2 if 2.5–4     |  4 if >4
+       Oliguria (ml/kg)    0 if ≥20   |  1 if 10–20     |  2 if <10
+       NE at h24           0 if ≤0.08 |  3 if >0.08
+       HR deviation        0 if 70–100|  1 if 60–70/100-120 | 2 if <60/>120
+       Pressor MAP/NE      0 if >3000 |  1 if 1000–3000 |  2 if <1000
+
+     Max 13 points. Bedside-ready.
+
+Repeats the full Phase 5 validation: CV, multi-seed, bootstrap, calibration,
+subgroups — for BOTH the 5-term continuous formula AND the ordinal score.
+
+Usage:
+  python paper3_phase5b_refined.py
+"""
+
+import json, sys, math, time, random
+from collections import defaultdict
+
+BQ_PROJECT   = "goddard-gap"
+DATA_PROJECT = "physionet-data"
+
+H_SNAPSHOT  = 24
+H_PEAK_NE   = 12
+TRAIN_FRAC  = 0.70
+N_SEEDS     = 10
+N_FOLDS     = 5
+N_BOOTSTRAP = 1000
+OUT_FILE    = "paper3_phase5b_refined.json"
+
+NE_ITEMID   = 221906
+LACTATE_ID  = 50813
+MAP_ITEMIDS = [220052, 220181, 225312]
+HR_ITEMIDS  = [220045]
+
+
+def run_bq(sql, label=""):
+    try:
+        from google.cloud import bigquery
+        client = bigquery.Client(project=BQ_PROJECT)
+        t0 = time.time()
+        rows = [dict(r.items()) for r in client.query(sql).result()]
+        print(f"    {label:32s}  {len(rows):>8,d} rows  ({time.time()-t0:.1f}s)")
+        return rows
+    except Exception as e:
+        print(f"[BQ ERROR] {label}: {e}"); return []
+
+
+# ── Queries (same as Phase 5, SAPS Q4 pre-filtered) ────────────────────────
+def q_cohort():
+    return f"""
+WITH weight_first AS (
+  SELECT ce.stay_id, ANY_VALUE(ce.valuenum) AS weight_kg
+  FROM `{DATA_PROJECT}.mimiciv_3_1_icu.chartevents` ce
+  JOIN `{DATA_PROJECT}.mimiciv_3_1_icu.icustays` icu ON ce.stay_id = icu.stay_id
+  JOIN `{DATA_PROJECT}.mimiciv_3_1_derived.sepsis3` s3 ON s3.stay_id = ce.stay_id
+  WHERE s3.sepsis3 = TRUE AND ce.itemid IN (226512, 224639)
+    AND ce.valuenum BETWEEN 30 AND 300
+    AND ce.charttime BETWEEN icu.intime AND TIMESTAMP_ADD(icu.intime, INTERVAL 24 HOUR)
+  GROUP BY ce.stay_id
+)
+SELECT icu.stay_id, icu.subject_id, icu.intime,
+    pat.anchor_age AS age, pat.gender,
+    saps.sapsii, adm.hospital_expire_flag AS died,
+    COALESCE(wf.weight_kg, 75.0) AS weight_kg
+FROM `{DATA_PROJECT}.mimiciv_3_1_derived.sepsis3` s3
+JOIN `{DATA_PROJECT}.mimiciv_3_1_icu.icustays` icu ON icu.stay_id = s3.stay_id
+JOIN `{DATA_PROJECT}.mimiciv_3_1_hosp.admissions` adm ON adm.hadm_id = icu.hadm_id
+JOIN `{DATA_PROJECT}.mimiciv_3_1_hosp.patients` pat ON pat.subject_id = icu.subject_id
+LEFT JOIN `{DATA_PROJECT}.mimiciv_3_1_derived.sapsii` saps ON saps.stay_id = icu.stay_id
+LEFT JOIN weight_first wf ON wf.stay_id = icu.stay_id
+WHERE s3.sepsis3 = TRUE
+  AND TIMESTAMP_DIFF(icu.outtime, icu.intime, HOUR) >= {H_SNAPSHOT}
+  AND saps.sapsii IS NOT NULL AND saps.sapsii >= 48
+"""
+
+
+def q_ne():
+    return f"""
+SELECT ie.stay_id,
+    TIMESTAMP_DIFF(ie.starttime, icu.intime, MINUTE) AS start_min,
+    TIMESTAMP_DIFF(ie.endtime,   icu.intime, MINUTE) AS end_min,
+    ie.rate
+FROM `{DATA_PROJECT}.mimiciv_3_1_icu.inputevents` ie
+JOIN `{DATA_PROJECT}.mimiciv_3_1_icu.icustays` icu ON icu.stay_id = ie.stay_id
+JOIN `{DATA_PROJECT}.mimiciv_3_1_derived.sepsis3` s3 ON s3.stay_id = ie.stay_id
+JOIN `{DATA_PROJECT}.mimiciv_3_1_derived.sapsii` saps ON saps.stay_id = icu.stay_id
+WHERE s3.sepsis3 = TRUE AND saps.sapsii >= 48
+  AND ie.itemid = {NE_ITEMID} AND ie.rate > 0
+  AND ie.starttime BETWEEN icu.intime AND TIMESTAMP_ADD(icu.intime, INTERVAL 30 HOUR)
+"""
+
+
+def q_fluid_out():
+    return f"""
+SELECT oe.stay_id, SUM(oe.value) AS fluid_out_ml
+FROM `{DATA_PROJECT}.mimiciv_3_1_icu.outputevents` oe
+JOIN `{DATA_PROJECT}.mimiciv_3_1_icu.icustays` icu ON icu.stay_id = oe.stay_id
+JOIN `{DATA_PROJECT}.mimiciv_3_1_derived.sepsis3` s3 ON s3.stay_id = oe.stay_id
+JOIN `{DATA_PROJECT}.mimiciv_3_1_derived.sapsii` saps ON saps.stay_id = icu.stay_id
+WHERE s3.sepsis3 = TRUE AND saps.sapsii >= 48
+  AND oe.value > 0
+  AND oe.charttime BETWEEN icu.intime AND TIMESTAMP_ADD(icu.intime, INTERVAL {H_SNAPSHOT} HOUR)
+GROUP BY oe.stay_id
+"""
+
+
+def q_vitals():
+    ids = ",".join(str(x) for x in MAP_ITEMIDS + HR_ITEMIDS)
+    return f"""
+SELECT ce.stay_id, ce.itemid, AVG(ce.valuenum) AS val
+FROM `{DATA_PROJECT}.mimiciv_3_1_icu.chartevents` ce
+JOIN `{DATA_PROJECT}.mimiciv_3_1_icu.icustays` icu ON icu.stay_id = ce.stay_id
+JOIN `{DATA_PROJECT}.mimiciv_3_1_derived.sepsis3` s3 ON s3.stay_id = ce.stay_id
+JOIN `{DATA_PROJECT}.mimiciv_3_1_derived.sapsii` saps ON saps.stay_id = icu.stay_id
+WHERE s3.sepsis3 = TRUE AND saps.sapsii >= 48
+  AND ce.itemid IN ({ids})
+  AND ce.valuenum IS NOT NULL AND ce.valuenum > 0
+  AND ce.charttime BETWEEN TIMESTAMP_ADD(icu.intime, INTERVAL 20 HOUR)
+                       AND TIMESTAMP_ADD(icu.intime, INTERVAL 28 HOUR)
+GROUP BY ce.stay_id, ce.itemid
+"""
+
+
+def q_lactate():
+    return f"""
+SELECT icu.stay_id,
+    TIMESTAMP_DIFF(le.charttime, icu.intime, MINUTE) AS offset_min,
+    le.valuenum AS val
+FROM `{DATA_PROJECT}.mimiciv_3_1_hosp.labevents` le
+JOIN `{DATA_PROJECT}.mimiciv_3_1_icu.icustays` icu ON icu.hadm_id = le.hadm_id
+JOIN `{DATA_PROJECT}.mimiciv_3_1_derived.sepsis3` s3 ON s3.stay_id = icu.stay_id
+JOIN `{DATA_PROJECT}.mimiciv_3_1_derived.sapsii` saps ON saps.stay_id = icu.stay_id
+WHERE s3.sepsis3 = TRUE AND saps.sapsii >= 48
+  AND le.itemid = {LACTATE_ID}
+  AND le.valuenum IS NOT NULL
+  AND le.charttime BETWEEN icu.intime AND TIMESTAMP_ADD(icu.intime, INTERVAL 30 HOUR)
+"""
+
+
+# ── Primitives ──────────────────────────────────────────────────────────────
+def build_primitives(cohort, ne_rows, fout_rows, vital_rows, lac_rows):
+    print(f"\n[3] Building primitives...")
+    ne_by = defaultdict(list)
+    for r in ne_rows: ne_by[r["stay_id"]].append(r)
+    fout = {r["stay_id"]: r["fluid_out_ml"] or 0 for r in fout_rows}
+
+    vital_by = defaultdict(dict)
+    for r in vital_rows:
+        iid = r["itemid"]
+        key = "map" if iid in MAP_ITEMIDS else "hr"
+        if r["stay_id"] is not None:
+            cur = vital_by[r["stay_id"]].get(key)
+            vital_by[r["stay_id"]][key] = r["val"] if cur is None else (cur + r["val"])/2
+
+    lac_by = defaultdict(list)
+    for r in lac_rows: lac_by[r["stay_id"]].append(r)
+
+    prim = {}
+    for sid, c in cohort.items():
+        weight = c.get("weight_kg") or 75.0
+        events = ne_by.get(sid, [])
+        ne_h24 = 0.0
+        for ev in events:
+            sm, em, rate = ev["start_min"], ev["end_min"], ev["rate"]
+            if None in (sm, em, rate): continue
+            if sm <= H_SNAPSHOT*60 <= em and rate > ne_h24: ne_h24 = rate
+
+        lacs = sorted(lac_by.get(sid, []), key=lambda x: x["offset_min"] or 0)
+        lac_h24 = None
+        if lacs:
+            near = [r for r in lacs if r["offset_min"] is not None
+                    and 18*60 <= r["offset_min"] <= 28*60]
+            lac_h24 = near[-1]["val"] if near else lacs[-1]["val"]
+
+        v = vital_by.get(sid, {})
+        map_h24 = v.get("map")
+        hr_h24  = v.get("hr")
+        pr = map_h24 / (ne_h24 + 0.01) if map_h24 is not None else None
+
+        prim[sid] = {
+            "ne_at_h24": ne_h24,
+            "fluid_out_per_kg": fout.get(sid, 0) / weight,
+            "lactate_h24": lac_h24,
+            "map_h24": map_h24, "hr_h24": hr_h24,
+            "pressor_resistance": pr,
+        }
+    return prim
+
+
+# ── 5-term continuous formula ──────────────────────────────────────────────
+def formula_features(p):
+    lac  = p.get("lactate_h24")
+    fout = p.get("fluid_out_per_kg")
+    ne24 = p.get("ne_at_h24", 0.0) or 0.0
+    hr   = p.get("hr_h24")
+    pr   = p.get("pressor_resistance")
+    if lac is None or fout is None or hr is None or pr is None:
+        return None
+    return [
+        max(0, lac - 2.5),              # lactate hinge
+        max(0, 20 - fout),              # oliguria hinge
+        1.0 if ne24 > 0.08 else 0.0,    # NE persistence
+        abs(hr - 85) / 20,              # HR deviation
+        math.log(pr + 1.0),             # pressor efficiency
+    ]
+
+
+FEATURE_LABELS = [
+    "max(0, lactate_h24 − 2.5)",
+    "max(0, 20 − fluid_out_per_kg)",
+    "I(ne_at_h24 > 0.08)",
+    "|hr_h24 − 85| / 20",
+    "log(pressor_resistance + 1)",
+]
+
+
+# ── Ordinal bedside score (0–13 pts) ───────────────────────────────────────
+def bedside_score(p):
+    lac  = p.get("lactate_h24")
+    fout = p.get("fluid_out_per_kg")
+    ne24 = p.get("ne_at_h24", 0.0) or 0.0
+    hr   = p.get("hr_h24")
+    pr   = p.get("pressor_resistance")
+    if lac is None or fout is None or hr is None or pr is None:
+        return None, None
+
+    # Lactate (0 / 2 / 4)
+    if   lac <  2.5: pts_lac = 0
+    elif lac <= 4.0: pts_lac = 2
+    else:            pts_lac = 4
+
+    # Oliguria (0 / 1 / 2)
+    if   fout >= 20: pts_olig = 0
+    elif fout >= 10: pts_olig = 1
+    else:            pts_olig = 2
+
+    # NE persistence (0 / 3)
+    pts_ne = 3 if ne24 > 0.08 else 0
+
+    # HR deviation (0 / 1 / 2)
+    if   70 <= hr <= 100:                            pts_hr = 0
+    elif (60 <= hr < 70) or (100 < hr <= 120):       pts_hr = 1
+    else:                                             pts_hr = 2
+
+    # Pressor efficiency (0 / 1 / 2)
+    if   pr > 3000:  pts_pr = 0
+    elif pr >= 1000: pts_pr = 1
+    else:            pts_pr = 2
+
+    total = pts_lac + pts_olig + pts_ne + pts_hr + pts_pr
+    breakdown = {
+        "lactate": pts_lac, "oliguria": pts_olig,
+        "ne_persist": pts_ne, "hr_dev": pts_hr, "pressor_eff": pts_pr,
+    }
+    return total, breakdown
+
+
+def build_matrix(ids, primitives, cohort):
+    import numpy as np
+    X, y, scores, valid = [], [], [], []
+    for sid in ids:
+        p = primitives.get(sid)
+        if p is None: continue
+        f = formula_features(p)
+        s, _ = bedside_score(p)
+        if f is None or s is None: continue
+        X.append(f)
+        y.append(int(cohort[sid].get("died") or 0))
+        scores.append(s)
+        valid.append(sid)
+    return np.array(X), np.array(y), np.array(scores), valid
+
+
+# ── Main ────────────────────────────────────────────────────────────────────
+def main():
+    try:
+        import numpy as np
+        from sklearn.linear_model import LogisticRegression
+        from sklearn.metrics import roc_auc_score, brier_score_loss
+        from sklearn.model_selection import KFold
+    except ImportError as e:
+        print(f"\nERROR: {e}")
+        print("Install: pip install scikit-learn numpy")
+        sys.exit(1)
+
+    print("\n" + "█"*78)
+    print("  PAPER 3 — PHASE 5b: 5-term formula + ordinal bedside score")
+    print("█"*78)
+
+    print(f"\n[1] Fetching data...")
+    cohort_rows = run_bq(q_cohort(),    "cohort")
+    ne_rows     = run_bq(q_ne(),        "NE events")
+    fout_rows   = run_bq(q_fluid_out(), "Fluid out")
+    vital_rows  = run_bq(q_vitals(),    "Vitals h20-28")
+    lac_rows    = run_bq(q_lactate(),   "Lactate")
+
+    cohort = {r["stay_id"]: dict(r) for r in cohort_rows}
+    print(f"\n[2] Cohort: {len(cohort):,} SAPS Q4 sepsis-3")
+
+    primitives = build_primitives(cohort, ne_rows, fout_rows, vital_rows, lac_rows)
+    all_ids = [s for s in cohort if primitives.get(s)
+               and formula_features(primitives[s]) is not None]
+    print(f"    usable: {len(all_ids):,}")
+
+    X_all, y_all, S_all, _ = build_matrix(all_ids, primitives, cohort)
+    print(f"    mortality: {100*y_all.mean():.1f}%")
+
+    # ══════════════════════════════════════════════════════════════════════
+    # [4] 5-fold CV — 5-term formula
+    # ══════════════════════════════════════════════════════════════════════
+    print(f"\n[4] 5-fold CV — 5-term continuous formula")
+    subject_ids = sorted(set(cohort[s]["subject_id"] for s in all_ids))
+    rng = np.random.default_rng(42)
+    rng.shuffle(subject_ids)
+    subj_arr = np.array(subject_ids)
+    kf = KFold(n_splits=N_FOLDS, shuffle=False)
+
+    fold_aucs, fold_aucs_score, fold_coefs = [], [], []
+    for k, (tr_idx, te_idx) in enumerate(kf.split(subj_arr)):
+        tr_subs = set(subj_arr[tr_idx].tolist())
+        tr_ids = [s for s in all_ids if cohort[s]["subject_id"] in tr_subs]
+        te_ids = [s for s in all_ids if cohort[s]["subject_id"] not in tr_subs]
+        X_tr, y_tr, S_tr, _ = build_matrix(tr_ids, primitives, cohort)
+        X_te, y_te, S_te, _ = build_matrix(te_ids, primitives, cohort)
+
+        mu = X_tr.mean(0); sd = X_tr.std(0) + 1e-9
+        lr = LogisticRegression(C=1.0, max_iter=1000, random_state=42)
+        lr.fit((X_tr - mu) / sd, y_tr)
+        pred = lr.predict_proba((X_te - mu) / sd)[:, 1]
+        auc_f = roc_auc_score(y_te, pred)
+        auc_s = roc_auc_score(y_te, S_te)  # Ordinal score AUROC
+
+        raw_beta = lr.coef_[0] / sd
+        raw_int  = lr.intercept_[0] - sum(lr.coef_[0][i]*mu[i]/sd[i] for i in range(len(sd)))
+        fold_aucs.append(auc_f)
+        fold_aucs_score.append(auc_s)
+        fold_coefs.append([raw_int] + list(raw_beta))
+        print(f"    fold {k+1}: formula AUROC={auc_f:.4f}  ordinal score AUROC={auc_s:.4f}")
+
+    fa = np.array(fold_aucs); fas = np.array(fold_aucs_score)
+    print(f"\n    5-term formula CV AUROC:  {fa.mean():.4f} ± {fa.std():.4f}  "
+          f"(range {fa.min():.4f}–{fa.max():.4f})")
+    print(f"    Ordinal score  CV AUROC:  {fas.mean():.4f} ± {fas.std():.4f}  "
+          f"(range {fas.min():.4f}–{fas.max():.4f})")
+    print(f"    Ordinal loss:             {fa.mean()-fas.mean():+.4f}")
+
+    fc = np.array(fold_coefs)
+    print(f"\n    Coefficient stability (5 terms):")
+    names = ["intercept"] + FEATURE_LABELS
+    for i, name in enumerate(names):
+        col = fc[:, i]
+        flips = sum(1 for k in range(1, len(col)) if col[k]*col[k-1] < 0)
+        print(f"      {name:35s}  {col.mean():>+9.4f} ± {col.std():>7.4f}  flips={flips}")
+
+    # ══════════════════════════════════════════════════════════════════════
+    # [5] Bootstrap CIs — 5-term formula
+    # ══════════════════════════════════════════════════════════════════════
+    print(f"\n[5] Bootstrap CIs — 5-term formula ({N_BOOTSTRAP} resamples)")
+    mu_all = X_all.mean(0); sd_all = X_all.std(0) + 1e-9
+    n = len(y_all)
+    boot_coefs = []
+    rng_b = np.random.default_rng(42)
+    for b in range(N_BOOTSTRAP):
+        idx = rng_b.integers(0, n, n)
+        X_b = X_all[idx]; y_b = y_all[idx]
+        if len(set(y_b.tolist())) < 2: continue
+        try:
+            lr = LogisticRegression(C=1.0, max_iter=500, random_state=42)
+            lr.fit((X_b - mu_all) / sd_all, y_b)
+            raw = lr.coef_[0] / sd_all
+            intc = lr.intercept_[0] - sum(lr.coef_[0][i]*mu_all[i]/sd_all[i] for i in range(len(sd_all)))
+            boot_coefs.append([intc] + list(raw))
+        except Exception: continue
+        if (b+1) % 250 == 0: print(f"    {b+1}/{N_BOOTSTRAP}...")
+
+    bc = np.array(boot_coefs)
+    lr_full = LogisticRegression(C=1.0, max_iter=1000, random_state=42)
+    lr_full.fit((X_all - mu_all) / sd_all, y_all)
+    raw_full = lr_full.coef_[0] / sd_all
+    intc_full = lr_full.intercept_[0] - sum(lr_full.coef_[0][i]*mu_all[i]/sd_all[i] for i in range(len(sd_all)))
+    point_all = [intc_full] + list(raw_full)
+
+    print(f"\n    {'term':35s}  {'point':>9s}  {'95% CI':>22s}")
+    ci_results = []
+    for i, name in enumerate(names):
+        col = bc[:, i]
+        lo = np.percentile(col, 2.5)
+        hi = np.percentile(col, 97.5)
+        crosses_zero = "✗" if (lo < 0 < hi) else "✓"
+        ci_str = f"({lo:+.4f}, {hi:+.4f}) {crosses_zero}"
+        print(f"    {name:35s}  {point_all[i]:>+9.4f}  {ci_str:>22s}")
+        ci_results.append({"term": name, "point": float(point_all[i]),
+                           "ci_lo": float(lo), "ci_hi": float(hi),
+                           "crosses_zero": bool(lo < 0 < hi)})
+
+    # ══════════════════════════════════════════════════════════════════════
+    # [6] Ordinal score distribution + mortality per score
+    # ══════════════════════════════════════════════════════════════════════
+    print(f"\n[6] Ordinal bedside score distribution + mortality per score")
+    print(f"    {'score':>5s}  {'n':>5s}  {'mortality':>10s}  {'cum n':>6s}")
+    score_buckets = defaultdict(lambda: {"n": 0, "died": 0})
+    for s, y in zip(S_all, y_all):
+        score_buckets[int(s)]["n"] += 1
+        score_buckets[int(s)]["died"] += int(y)
+
+    cum_n = 0
+    score_rows = []
+    for s in sorted(score_buckets.keys()):
+        b = score_buckets[s]
+        mort = 100 * b["died"] / b["n"] if b["n"] > 0 else 0
+        cum_n += b["n"]
+        bar = "█" * int(mort / 3)
+        print(f"    {s:>5d}  {b['n']:>5d}  {mort:>8.1f}%  {cum_n:>6d}  {bar}")
+        score_rows.append({"score": s, "n": b["n"], "mortality_pct": mort})
+
+    # ══════════════════════════════════════════════════════════════════════
+    # [7] Risk bands from ordinal score (clinical cut-points)
+    # ══════════════════════════════════════════════════════════════════════
+    print(f"\n[7] Suggested risk bands (clinically meaningful cutpoints)")
+    # Group into LOW / MID / HIGH by score
+    def band(s):
+        if s <= 3:  return "low"
+        if s <= 7:  return "mid"
+        return "high"
+
+    bands = defaultdict(lambda: {"n": 0, "died": 0})
+    for s, y in zip(S_all, y_all):
+        b = band(int(s))
+        bands[b]["n"] += 1
+        bands[b]["died"] += int(y)
+
+    print(f"    {'band':6s}  {'range':>7s}  {'n':>5s}  {'mort%':>7s}")
+    band_results = {}
+    for bname in ["low", "mid", "high"]:
+        b = bands[bname]
+        if b["n"] == 0: continue
+        rng_str = {"low": "0–3", "mid": "4–7", "high": "8+"}[bname]
+        mort = 100 * b["died"] / b["n"]
+        print(f"    {bname:6s}  {rng_str:>7s}  {b['n']:>5d}  {mort:>6.1f}%")
+        band_results[bname] = {"n": b["n"], "mortality_pct": mort}
+
+    # ══════════════════════════════════════════════════════════════════════
+    # [8] Calibration on holdout — both formula and ordinal
+    # ══════════════════════════════════════════════════════════════════════
+    print(f"\n[8] Calibration on 30% holdout (both versions)")
+    subs = list(set(cohort[s]["subject_id"] for s in all_ids))
+    random.Random(42).shuffle(subs)
+    n_tr = int(len(subs) * TRAIN_FRAC)
+    tr_subs = set(subs[:n_tr])
+    tr_ids = [s for s in all_ids if cohort[s]["subject_id"] in tr_subs]
+    te_ids = [s for s in all_ids if cohort[s]["subject_id"] not in tr_subs]
+    X_tr, y_tr, _, _ = build_matrix(tr_ids, primitives, cohort)
+    X_te, y_te, S_te, _ = build_matrix(te_ids, primitives, cohort)
+    mu = X_tr.mean(0); sd = X_tr.std(0) + 1e-9
+    lr_cal = LogisticRegression(C=1.0, max_iter=1000, random_state=42)
+    lr_cal.fit((X_tr - mu) / sd, y_tr)
+    pred_cal = lr_cal.predict_proba((X_te - mu) / sd)[:, 1]
+
+    order = np.argsort(pred_cal)
+    deciles = np.array_split(order, 10)
+    hl_stat = 0.0
+    print(f"    Formula deciles:")
+    print(f"    {'d':>2s}  {'n':>4s}  {'pred':>7s}  {'obs':>7s}")
+    calib_bins = []
+    for d, idx in enumerate(deciles):
+        n_d = len(idx)
+        p_mean = float(pred_cal[idx].mean())
+        obs = int(y_te[idx].sum())
+        exp = float(pred_cal[idx].sum())
+        if exp > 0 and (n_d - exp) > 0:
+            hl_stat += (obs - exp)**2 / exp + ((n_d - obs) - (n_d - exp))**2 / (n_d - exp)
+        print(f"    {d+1:>2d}  {n_d:>4d}  {p_mean:>6.3f}  {obs/n_d:>6.3f}")
+        calib_bins.append({"decile": d+1, "n": n_d,
+                           "predicted": p_mean, "observed": obs/n_d})
+    print(f"\n    Hosmer-Lemeshow χ²: {hl_stat:.2f}  (critical 15.51)")
+    if hl_stat < 15.51:
+        print(f"    → Well calibrated (p > 0.05)")
+
+    brier = brier_score_loss(y_te, pred_cal)
+    print(f"    Brier: {brier:.4f}")
+
+    # ══════════════════════════════════════════════════════════════════════
+    # [9] FINAL HEADLINE
+    # ══════════════════════════════════════════════════════════════════════
+    print(f"\n[9] ══════════ FINAL HEADLINE (Phase 5b) ══════════")
+    print(f"\n    Cohort: n={len(all_ids):,} sepsis-3 Q4 patients")
+    print(f"    Mortality: {100*y_all.mean():.1f}%")
+    print(f"\n    5-term formula (continuous):")
+    print(f"      5-fold CV AUROC:  {fa.mean():.4f} ± {fa.std():.4f}")
+    print(f"      Hosmer-Lemeshow:  χ² = {hl_stat:.2f} (calibrated)")
+    print(f"      Brier score:      {brier:.4f}")
+    print(f"\n    Ordinal bedside score (0–13 pts):")
+    print(f"      5-fold CV AUROC:  {fas.mean():.4f} ± {fas.std():.4f}")
+    print(f"      AUROC loss vs continuous: {fa.mean()-fas.mean():+.4f}")
+    print(f"\n    Risk bands (ordinal score):")
+    for bn in ["low", "mid", "high"]:
+        br = band_results.get(bn, {})
+        if br:
+            print(f"      {bn:6s} ({'0–3' if bn=='low' else '4–7' if bn=='mid' else '8+':>4s}): "
+                  f"n={br['n']:>5d}  mortality={br['mortality_pct']:.1f}%")
+
+    # ── Save ────────────────────────────────────────────────────────────────
+    output = {
+        "cohort": {"n": len(all_ids), "mortality": float(y_all.mean())},
+        "formula_5term": {
+            "cv_auroc_mean": float(fa.mean()),
+            "cv_auroc_std": float(fa.std()),
+            "cv_auroc_range": [float(fa.min()), float(fa.max())],
+            "coefficient_cis": ci_results,
+            "calibration": {
+                "hl_chi2": float(hl_stat),
+                "brier": float(brier),
+                "deciles": calib_bins,
+            },
+        },
+        "ordinal_score": {
+            "cv_auroc_mean": float(fas.mean()),
+            "cv_auroc_std": float(fas.std()),
+            "auroc_loss_vs_continuous": float(fa.mean() - fas.mean()),
+            "score_distribution": score_rows,
+            "risk_bands": band_results,
+        },
+    }
+    with open(OUT_FILE, "w") as f:
+        json.dump(output, f, indent=2, default=str)
+    print(f"\n  → Saved: {OUT_FILE}")
+    print("\n" + "█"*78 + "\n")
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file