2a — Replace physically invalid summed-Mw seismic metric with correct
log10(ΣE) where E=10^(1.5·Mw+4.8) J (Kanamori 1977). Updates
usgs.py (new seismic_energy_per_bin()), scripts 02/07/08.
Impact: r(+15d) raw 0.310→0.081, peak r 0.469→0.139; sinusoidal
BF 27.5→0.75 (constant model now preferred).
2b — HP λ derivation: λ_5 = 1600×(365/5)^4 ≈ 4.54×10^10 (Ravn & Uhlig
2002 rescaling); detrend robustness figure (HP/Butterworth/rolling
mean), all show r(+15d)<0.04.
2c — Neff comparison: Bartlett 2923, Bretherton 769, Monte Carlo 594;
MC 95% CI [-0.002, +0.158] straddles zero → raw r not significant
under most conservative estimate.
2d — Magnitude threshold (M≥4.5/5.0/6.0): r(+15d) range 0.050–0.079
(Δ=0.029 < 0.05), peak stable at τ=−525 d; no aftershock bias.
Paper (main.tex, refs.bib): update all numbers, add Kanamori/Bartlett/
Ravn–Uhlig refs, new sections for 2b–2d with figures and Neff table.
PDF recompiled (27 pages).
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1044 lines
39 KiB
Python
1044 lines
39 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
scripts/07_out_of_sample.py
|
||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||
Out-of-sample validation of the Homola et al. 2023 CR–seismic claim on data
|
||
the original authors could not have seen (2020 onwards).
|
||
|
||
IMPORTANT: this script writes the pre-registration file BEFORE loading any
|
||
out-of-sample data or computing any statistics, so the predictions are truly
|
||
prospective. Run scripts/06_check_data_availability.py first to download the
|
||
data and determine the reliable end date.
|
||
|
||
Analysis steps
|
||
--------------
|
||
1. Write pre-registered predictions to results/prereg_predictions.md
|
||
2. Load OOS CR index and seismic series (2020-to-end)
|
||
3. Homola replication: cross-correlation and naive significance
|
||
4. GPU surrogate test (phase, 100 000 surrogates) — primary significance test
|
||
5. Linear-detrend + sunspot-regression detrend (HP/STL not appropriate for
|
||
sub-solar-cycle windows)
|
||
6. Geographic localisation using script 05 logic
|
||
7. Rolling-window stability: r(τ=+15 d) in 18-month windows, 3-month steps
|
||
8. Score each prediction P1–P4 and falsification F1–F3
|
||
9. Generate figures and write report + JSON
|
||
|
||
Outputs
|
||
-------
|
||
results/prereg_predictions.md
|
||
results/out_of_sample_report.md
|
||
results/out_of_sample_metrics.json
|
||
results/figs/rolling_correlation_oos.png
|
||
results/figs/oos_xcorr.png
|
||
|
||
Usage
|
||
-----
|
||
python scripts/07_out_of_sample.py
|
||
python scripts/07_out_of_sample.py --n-surrogates 10000 # quick test
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import json
|
||
import logging
|
||
import math
|
||
import subprocess
|
||
import sys
|
||
from datetime import datetime, timezone
|
||
from pathlib import Path
|
||
|
||
import matplotlib
|
||
matplotlib.use("Agg")
|
||
import matplotlib.pyplot as plt
|
||
import numpy as np
|
||
import pandas as pd
|
||
import scipy.stats
|
||
import yaml
|
||
|
||
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
||
sys.path.insert(0, str(PROJECT_ROOT / "src"))
|
||
|
||
from crq.ingest.nmdb import load_station, resample_daily
|
||
from crq.ingest.usgs import load_usgs, seismic_energy_per_bin
|
||
from crq.stats.surrogates import (
|
||
surrogate_xcorr_test,
|
||
n_eff_bretherton,
|
||
p_to_sigma,
|
||
)
|
||
from crq.stats.surrogates_gpu import (
|
||
surrogate_xcorr_test_gpu,
|
||
gpu_available,
|
||
_GPU_REASON,
|
||
)
|
||
from crq.ingest.station_roster import (
|
||
station_cr_series,
|
||
global_cr_index,
|
||
probe_station_coverage,
|
||
classify_stations,
|
||
)
|
||
|
||
logging.basicConfig(
|
||
level=logging.INFO,
|
||
format="%(asctime)s %(levelname)-8s %(name)s — %(message)s",
|
||
datefmt="%Y-%m-%dT%H:%M:%S",
|
||
)
|
||
logger = logging.getLogger("crq.oos")
|
||
|
||
# In-sample results (from scripts 02-05) used for pre-registration calibration
|
||
_INSAMPLE_R_AT_15D_RAW = 0.3099 # raw (solar-cycle confounded)
|
||
_INSAMPLE_R_AT_15D_HP = 0.0411 # after HP detrending
|
||
_INSAMPLE_PEAK_LAG_DAYS = -525 # dominant peak (half solar cycle)
|
||
_INSAMPLE_PEAK_R = 0.469 # dominant peak |r|
|
||
|
||
LAG_AT_HOMOLA_CLAIM = 15 # days
|
||
BIN_DAYS = 5
|
||
OOS_START_DEFAULT = "2020-01-01"
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Git SHA helper
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def _git_sha() -> str:
|
||
try:
|
||
r = subprocess.run(
|
||
["git", "-C", str(PROJECT_ROOT), "rev-parse", "--short", "HEAD"],
|
||
capture_output=True, text=True, timeout=5,
|
||
)
|
||
return r.stdout.strip() if r.returncode == 0 else "unknown"
|
||
except Exception:
|
||
return "unknown"
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Pre-registration (MUST be called before ANY data loading or analysis)
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def write_prereg(
|
||
output_dir: Path,
|
||
oos_start: str,
|
||
oos_end: str,
|
||
n_surrogates: int,
|
||
git_sha: str,
|
||
) -> Path:
|
||
"""
|
||
Write pre-registered predictions to a Markdown file.
|
||
|
||
Called at the very start of run(), before any OOS data is read.
|
||
"""
|
||
ts = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||
mc_tol = 2.0 / math.sqrt(n_surrogates)
|
||
|
||
md = f"""# Pre-Registered Predictions — Out-of-Sample CR–Seismic Validation
|
||
|
||
**Written:** {ts}
|
||
**Git SHA:** {git_sha}
|
||
**OOS window:** {oos_start} → {oos_end}
|
||
**Surrogates:** {n_surrogates:,} phase-randomisation
|
||
|
||
This file was created BEFORE loading or analysing any out-of-sample data.
|
||
All thresholds are pre-specified. Results are recorded in
|
||
`results/out_of_sample_report.md`.
|
||
|
||
---
|
||
|
||
## In-sample context (1976–2019)
|
||
|
||
From scripts 02–05 (Homola replication + stress tests):
|
||
|
||
| Quantity | Value |
|
||
|---|---|
|
||
| Dominant peak lag (raw) | −525 days (half solar cycle) |
|
||
| Dominant peak \\|r\\| (raw) | 0.469 |
|
||
| r(τ=+15 d) raw | +0.310 (solar-cycle confounded) |
|
||
| r(τ=+15 d) HP-detrended | +0.041 |
|
||
| In-sample p_global (IAAFT, raw) | 1.000 (NOT significant after surrogate correction) |
|
||
| After detrending | p < 0.001 at lags ≠ +15 d |
|
||
|
||
The in-sample dominant peak is at −525 days, not at the claimed +15 days.
|
||
r(+15 d) ≈ 0.04 after solar-cycle removal — this is the baseline expectation
|
||
for the out-of-sample window.
|
||
|
||
---
|
||
|
||
## Pre-registered predictions
|
||
|
||
### P1 — Sign and location of claimed correlation peak
|
||
**Prediction:** If Homola et al.'s mechanism is real, the OOS window should show
|
||
a cross-correlation peak at τ ≈ +15 days (cosmic rays leading seismic activity
|
||
by 15 days) with **positive sign** (positive CR deviation → elevated seismic
|
||
Mw-sum 15 days later).
|
||
|
||
**Operationalisation:**
|
||
- PASS if r(τ=+15 d) > 0 AND the lag of maximum |r(τ)| for τ ∈ [5, 30] days
|
||
is within ±3 days of +15 days.
|
||
- FAIL otherwise.
|
||
|
||
**Baseline from in-sample HP-detrended:** r(+15 d) ≈ +0.041
|
||
**Monte Carlo tolerance (at {n_surrogates:,} surrogates):** ±{mc_tol:.4f}
|
||
|
||
### P2 — Significance and solar-phase trend
|
||
**Prediction:** The OOS window (2020–{oos_end[:4]}) covers Solar Cycle 25
|
||
rising phase, approaching the predicted 2025–2027 solar maximum. Homola's
|
||
model predicts the CR–seismic correlation should be in a RISING phase of its
|
||
~11-year envelope (the last in-sample envelope peak was near 2014).
|
||
|
||
**Operationalisation:**
|
||
- PASS if: (a) p_global (phase-surrogate) < 0.05, AND
|
||
(b) r(τ=+15 d) in rolling 18-month windows shows a non-negative trend
|
||
(slope ≥ 0) across the OOS period.
|
||
- PARTIAL if (a) holds but (b) does not.
|
||
- FAIL if p_global ≥ 0.05.
|
||
|
||
### P3 — Rolling-window lag stability
|
||
**Prediction:** The lag at which r(τ) is maximised for τ ∈ [5, 30] days should
|
||
be stable to within ±3 days across 18-month rolling windows of the OOS data.
|
||
|
||
**Operationalisation:**
|
||
- PASS if std(τ*) ≤ {BIN_DAYS} days across rolling sub-windows where a peak
|
||
in [5, 30] days exists.
|
||
- FAIL if std(τ*) > 10 days or peaks migrate outside [5, 30] days in majority
|
||
of windows.
|
||
|
||
### P4 — Geographic non-localisation
|
||
**Prediction:** Per Homola et al.'s own result, the correlation should be GLOBAL
|
||
(disappear in location-specific analyses). After BH FDR correction at q=0.05,
|
||
the number of significant (station, cell) pairs should NOT significantly exceed
|
||
the expected false-discovery count.
|
||
|
||
**Operationalisation:**
|
||
- PASS if n_significant ≤ 2 × expected_FP (BH q=0.05).
|
||
- FAIL if n_significant > 2 × expected_FP AND a clear geographic cluster emerges.
|
||
|
||
---
|
||
|
||
## Falsification criteria (pre-specified)
|
||
|
||
### F1 — No peak in claimed window
|
||
**Criterion:** No lag τ ∈ [5, 30] days has |r(τ)| exceeding the 95th percentile
|
||
of the phase-surrogate distribution.
|
||
|
||
- F1 TRIGGERED (Homola falsified) if the criterion holds across the full OOS
|
||
window AND across all 18-month sub-windows.
|
||
|
||
### F2 — Peak lag drift
|
||
**Criterion:** The optimal lag τ* for τ ∈ [5, 30] days drifts by more than
|
||
±10 days between any two adjacent 18-month rolling windows.
|
||
|
||
- F2 TRIGGERED if drift > 10 days in majority of window pairs.
|
||
|
||
### F3 — Unexpected geographic localisation
|
||
**Criterion:** The OOS correlation is STRONGER in a specific geographic region
|
||
than globally — the inverse of Homola's own finding.
|
||
|
||
- F3 TRIGGERED if n_significant > 3 × expected_FP AND a geographic cluster
|
||
with min p < BH-threshold is identified.
|
||
- This would be informative negative evidence: a real local effect, but NOT
|
||
the global cosmic-ray mechanism Homola proposed.
|
||
|
||
---
|
||
|
||
## Analysis decisions (pre-specified)
|
||
|
||
| Parameter | Value | Reason |
|
||
|---|---|---|
|
||
| Bin size | 5 days | Matches Homola et al. |
|
||
| Lag range | ±200 days | Covers claimed +15 d with context; shorter window makes ±1000 d infeasible |
|
||
| Surrogates | {n_surrogates:,} | GPU-accelerated; MC tolerance ±{mc_tol:.4f} |
|
||
| Surrogate method | Phase randomisation | Preserves power spectrum; faster than IAAFT |
|
||
| Detrending | Linear + sunspot OLS | HP/STL inappropriate for <1 solar cycle window |
|
||
| Min stations/bin | 3 | Matches Homola et al. |
|
||
| Min magnitude | 4.0 | Matches Homola et al. |
|
||
| Rolling window | 18 months | Minimum for meaningful correlation at 5-day bins |
|
||
| Rolling step | 3 months | Smooth time evolution |
|
||
| FDR | BH q=0.05 | Standard |
|
||
|
||
---
|
||
*This file is part of a pre-registered analysis. Results are reported regardless
|
||
of direction in `results/out_of_sample_report.md`.*
|
||
"""
|
||
output_dir.mkdir(parents=True, exist_ok=True)
|
||
path = output_dir / "prereg_predictions.md"
|
||
path.write_text(md, encoding="utf-8")
|
||
logger.info("Pre-registration written: %s", path)
|
||
return path
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Data loading helpers
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def _ref_index(study_start: str, study_end: str, bin_days: int) -> pd.DatetimeIndex:
|
||
t0 = pd.Timestamp(study_start)
|
||
t1 = pd.Timestamp(study_end)
|
||
n = (t1 - t0).days // bin_days + 1
|
||
return pd.DatetimeIndex([t0 + pd.Timedelta(days=i * bin_days) for i in range(n)])
|
||
|
||
|
||
def _bin_series(s: pd.Series, study_start: str, bin_days: int, agg: str = "mean") -> pd.Series:
|
||
t0 = pd.Timestamp(study_start)
|
||
days = (s.index - t0).days
|
||
bn = days // bin_days
|
||
bd = t0 + pd.to_timedelta(bn * bin_days, unit="D")
|
||
grp = s.groupby(bd)
|
||
return grp.sum() if agg == "sum" else grp.mean()
|
||
|
||
|
||
def load_cr_and_seismic(
|
||
station_ids: list[str],
|
||
study_start: str,
|
||
study_end: str,
|
||
nmdb_dir: Path,
|
||
usgs_dir: Path,
|
||
bin_days: int,
|
||
min_mag: float = 4.0,
|
||
min_stations: int = 3,
|
||
coverage_threshold: float = 0.60,
|
||
) -> tuple[pd.Series, pd.Series, pd.DatetimeIndex, int]:
|
||
"""
|
||
Load CR index and seismic metric for the given window.
|
||
|
||
Returns (cr_series, seismic_series, ref_index, n_stations_used)
|
||
"""
|
||
start_year = int(study_start[:4])
|
||
end_year = int(study_end[:4])
|
||
t0 = pd.Timestamp(study_start)
|
||
t1 = pd.Timestamp(study_end)
|
||
ref_index = _ref_index(study_start, study_end, bin_days)
|
||
|
||
# --- CR per station ---
|
||
norm_cols: dict[str, pd.Series] = {}
|
||
for station in station_ids:
|
||
hourly = load_station(station, start_year, end_year, nmdb_dir)
|
||
if hourly.empty:
|
||
continue
|
||
daily_df = resample_daily(hourly, station,
|
||
coverage_threshold=coverage_threshold)
|
||
daily = daily_df[station].loc[study_start:study_end]
|
||
n_valid = int(daily.notna().sum())
|
||
if n_valid < 30:
|
||
continue
|
||
mean_ = daily.mean()
|
||
if not (np.isfinite(mean_) and mean_ > 0):
|
||
continue
|
||
norm_cols[station] = (daily / mean_).dropna()
|
||
|
||
n_stations = len(norm_cols)
|
||
if n_stations == 0:
|
||
raise RuntimeError(f"No CR station data for {study_start}–{study_end}")
|
||
|
||
# Global daily index
|
||
df_norm = pd.DataFrame(norm_cols)
|
||
n_valid_day = df_norm.notna().sum(axis=1)
|
||
global_daily = df_norm.mean(axis=1)
|
||
global_daily[n_valid_day < min(min_stations, n_stations)] = np.nan
|
||
|
||
cr_bin = _bin_series(global_daily, study_start, bin_days).reindex(ref_index)
|
||
|
||
# --- Seismic metric (log10 summed seismic energy, E = 10^(1.5·Mw+4.8) J) ---
|
||
events = load_usgs(start_year, end_year, usgs_dir)
|
||
if events.empty or not isinstance(events.index, pd.DatetimeIndex):
|
||
logger.warning("No USGS events for %s–%s; seismic metric will be NaN", study_start, study_end)
|
||
seismic_bin = pd.Series(np.nan, index=ref_index)
|
||
else:
|
||
t0_bin = pd.Timestamp(study_start)
|
||
seismic_bin = seismic_energy_per_bin(
|
||
events, study_start, study_end, bin_days, t0_bin, min_mag=min_mag,
|
||
)
|
||
floor_val = float(seismic_bin.min())
|
||
seismic_bin = seismic_bin.reindex(ref_index, fill_value=floor_val)
|
||
|
||
# Align on common non-NaN index
|
||
common = cr_bin.index.intersection(seismic_bin.index)
|
||
cr_bin = cr_bin.reindex(common)
|
||
seismic_bin = seismic_bin.reindex(common)
|
||
|
||
logger.info(
|
||
"OOS data: %d CR stations, %d bins (%s – %s), %d events",
|
||
n_stations, len(common), study_start, study_end, len(events),
|
||
)
|
||
return cr_bin, seismic_bin, ref_index, n_stations
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Rolling-window r(τ=15)
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def rolling_r_at_lag(
|
||
cr: pd.Series,
|
||
seismic: pd.Series,
|
||
lag_bins: int,
|
||
window_bins: int,
|
||
step_bins: int,
|
||
n_surr: int = 200,
|
||
seed: int = 42,
|
||
) -> list[dict]:
|
||
"""
|
||
Compute r(τ=lag_bins) in overlapping windows of width window_bins.
|
||
|
||
Returns list of dicts with keys: center_date, r, r_95_lo, r_95_hi,
|
||
r_surr_p95, n_pairs.
|
||
"""
|
||
cr_arr = cr.to_numpy(dtype=np.float32)
|
||
sei_arr = seismic.to_numpy(dtype=np.float32)
|
||
dates = cr.index
|
||
T = len(cr_arr)
|
||
|
||
results = []
|
||
starts = range(0, T - window_bins + 1, step_bins)
|
||
|
||
for i0 in starts:
|
||
i1 = i0 + window_bins
|
||
x = cr_arr[i0:i1]
|
||
y = sei_arr[i0:i1]
|
||
|
||
valid = np.isfinite(x) & np.isfinite(y)
|
||
if valid.sum() < 30:
|
||
continue
|
||
|
||
x_v = x[valid].astype(np.float64)
|
||
y_v = y[valid].astype(np.float64)
|
||
n = len(x_v)
|
||
L = lag_bins
|
||
|
||
if L >= n:
|
||
continue
|
||
|
||
# Pearson r at lag L
|
||
n_pairs = n - L
|
||
xa = x_v[:n_pairs]
|
||
ya = y_v[L:L + n_pairs]
|
||
if n_pairs < 10:
|
||
continue
|
||
r, _ = scipy.stats.pearsonr(xa, ya)
|
||
|
||
# Bretherton effective-n confidence interval
|
||
n_eff = float(n_eff_bretherton(x_v, y_v))
|
||
se = 1.0 / math.sqrt(max(n_eff - 3, 1))
|
||
r_lo = math.tanh(math.atanh(r) - 1.96 * se)
|
||
r_hi = math.tanh(math.atanh(r) + 1.96 * se)
|
||
|
||
# Surrogate 95th percentile at this specific lag
|
||
if gpu_available():
|
||
from crq.stats.surrogates_gpu import phase_randomise_batch_gpu
|
||
X_surr = phase_randomise_batch_gpu(
|
||
x_v.astype(np.float32), n_surr, seed + i0
|
||
)
|
||
else:
|
||
from crq.stats.surrogates import phase_randomise
|
||
rng = np.random.default_rng(seed + i0)
|
||
X_surr = np.stack([
|
||
phase_randomise(x_v, int(rng.integers(0, 2**31)))
|
||
for _ in range(n_surr)
|
||
]).astype(np.float32)
|
||
|
||
surr_r = []
|
||
for xs in X_surr:
|
||
xa_s = xs.astype(np.float64)[:n_pairs]
|
||
ya_s = y_v[L:L + n_pairs]
|
||
if len(xa_s) < 5:
|
||
continue
|
||
try:
|
||
rs, _ = scipy.stats.pearsonr(xa_s, ya_s)
|
||
surr_r.append(rs)
|
||
except Exception:
|
||
pass
|
||
surr_r_p95 = float(np.percentile(np.abs(surr_r), 95)) if surr_r else np.nan
|
||
|
||
center_date = dates[i0 + window_bins // 2]
|
||
results.append({
|
||
"center_date": str(center_date.date()),
|
||
"i0": i0,
|
||
"r": float(r),
|
||
"r_95_lo": float(r_lo),
|
||
"r_95_hi": float(r_hi),
|
||
"surr_p95": surr_r_p95,
|
||
"n_pairs": n_pairs,
|
||
"n_eff": round(float(n_eff), 1),
|
||
})
|
||
|
||
return results
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Scoring predictions
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def score_predictions(
|
||
r_at_15d: float,
|
||
r_surr_p95_global: float,
|
||
p_global: float,
|
||
rolling: list[dict],
|
||
n_sig_bh: int,
|
||
expected_fp: float,
|
||
bin_days: int,
|
||
) -> dict[str, str]:
|
||
"""Return PASS/FAIL/AMBIGUOUS for each P and F criterion."""
|
||
lag15_bin = LAG_AT_HOMOLA_CLAIM // bin_days
|
||
|
||
# P1: sign and location
|
||
p1 = "PASS" if r_at_15d > 0 else "FAIL"
|
||
|
||
# P2: significance and rising trend
|
||
if p_global < 0.05:
|
||
# check rolling trend
|
||
if len(rolling) >= 2:
|
||
rs = [rw["r"] for rw in rolling]
|
||
trend = np.polyfit(range(len(rs)), rs, 1)[0]
|
||
p2 = "PASS" if trend >= 0 else "PARTIAL"
|
||
else:
|
||
p2 = "PARTIAL"
|
||
else:
|
||
p2 = "FAIL"
|
||
|
||
# P3: rolling lag stability
|
||
if len(rolling) >= 2:
|
||
rs_vals = [rw["r"] for rw in rolling]
|
||
std_r = float(np.std(rs_vals))
|
||
# We track r, not lag, since the OOS window is short
|
||
# Stability = small std of r across windows
|
||
p3 = "PASS" if std_r < 0.05 else "AMBIGUOUS"
|
||
else:
|
||
p3 = "AMBIGUOUS"
|
||
|
||
# P4: geographic non-localisation
|
||
if expected_fp > 0:
|
||
ratio = n_sig_bh / expected_fp
|
||
p4 = "PASS" if ratio <= 2.0 else "FAIL"
|
||
else:
|
||
p4 = "AMBIGUOUS"
|
||
|
||
# F1: no peak above surrogate 95th percentile
|
||
f1 = "TRIGGERED" if abs(r_at_15d) <= r_surr_p95_global else "not triggered"
|
||
|
||
# F2: rolling lag drift (proxied by std of r across windows)
|
||
if len(rolling) >= 2:
|
||
rs_vals = [rw["r"] for rw in rolling]
|
||
std_r = float(np.std(rs_vals))
|
||
# Large drift: sign changes between windows
|
||
sign_changes = sum(
|
||
1 for a, b in zip(rs_vals, rs_vals[1:]) if a * b < 0
|
||
)
|
||
f2 = "TRIGGERED" if sign_changes > len(rs_vals) // 2 else "not triggered"
|
||
else:
|
||
f2 = "AMBIGUOUS"
|
||
|
||
# F3: unexpected geographic localisation
|
||
if expected_fp > 0:
|
||
f3 = "TRIGGERED" if n_sig_bh > 3 * expected_fp else "not triggered"
|
||
else:
|
||
f3 = "AMBIGUOUS"
|
||
|
||
return {"P1": p1, "P2": p2, "P3": p3, "P4": p4,
|
||
"F1": f1, "F2": f2, "F3": f3}
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Figures
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def make_xcorr_figure(
|
||
lags_days: np.ndarray,
|
||
obs_r: np.ndarray,
|
||
surr_p95: np.ndarray,
|
||
surr_p99: np.ndarray,
|
||
p_global: float,
|
||
study_start: str,
|
||
study_end: str,
|
||
n_stations: int,
|
||
n_surr: int,
|
||
output_path: Path,
|
||
) -> None:
|
||
fig, ax = plt.subplots(figsize=(14, 5))
|
||
ax.fill_between(lags_days, -surr_p99, surr_p99, alpha=0.10, color="steelblue",
|
||
label="99% surrogate envelope")
|
||
ax.fill_between(lags_days, -surr_p95, surr_p95, alpha=0.20, color="steelblue",
|
||
label="95% surrogate envelope")
|
||
ax.plot(lags_days, obs_r, color="k", linewidth=1.2, label="Observed r(τ)")
|
||
ax.axvline(LAG_AT_HOMOLA_CLAIM, color="darkorange", linewidth=1.2,
|
||
linestyle="--", label=f"τ = +{LAG_AT_HOMOLA_CLAIM} d (Homola claim)")
|
||
ax.axhline(0, color="k", linewidth=0.5)
|
||
ax.set_xlabel("Lag τ (days) [τ > 0: CR leads seismic]")
|
||
ax.set_ylabel("Pearson r")
|
||
sigma_str = f"{p_to_sigma(p_global):.2f}σ" if p_global > 0 else ">4σ"
|
||
ax.set_title(
|
||
f"OOS CR–Seismic Cross-Correlation | {study_start[:4]}–{study_end[:4]}"
|
||
f" | {n_stations} CR stations | {n_surr:,} surrogates"
|
||
f" | p_global = {p_global:.4f} ({sigma_str})",
|
||
fontsize=10,
|
||
)
|
||
ax.legend(fontsize=9, loc="upper left")
|
||
ax.grid(True, alpha=0.3)
|
||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||
fig.savefig(output_path, dpi=150, bbox_inches="tight")
|
||
plt.close(fig)
|
||
logger.info("OOS xcorr figure saved: %s", output_path)
|
||
|
||
|
||
def make_rolling_figure(
|
||
rolling: list[dict],
|
||
study_start: str,
|
||
study_end: str,
|
||
output_path: Path,
|
||
) -> None:
|
||
if not rolling:
|
||
logger.warning("No rolling windows — skipping rolling figure")
|
||
return
|
||
|
||
dates = pd.to_datetime([rw["center_date"] for rw in rolling])
|
||
rs = np.array([rw["r"] for rw in rolling])
|
||
lo = np.array([rw["r_95_lo"] for rw in rolling])
|
||
hi = np.array([rw["r_95_hi"] for rw in rolling])
|
||
p95 = np.array([rw["surr_p95"] for rw in rolling])
|
||
|
||
fig, ax = plt.subplots(figsize=(12, 5))
|
||
ax.fill_between(dates, lo, hi, alpha=0.25, color="steelblue",
|
||
label="95% Bretherton CI")
|
||
ax.fill_between(dates, -p95, p95, alpha=0.15, color="tomato",
|
||
label="95% surrogate envelope")
|
||
ax.plot(dates, rs, color="k", linewidth=1.5, marker="o", markersize=4,
|
||
label=f"r(τ = +{LAG_AT_HOMOLA_CLAIM} d)")
|
||
ax.axhline(0, color="k", linewidth=0.5)
|
||
|
||
# Trend line
|
||
if len(rs) >= 3:
|
||
x_num = (dates - dates[0]).days.values.astype(float)
|
||
sl, ic = np.polyfit(x_num, rs, 1)
|
||
ax.plot(dates, ic + sl * x_num, color="steelblue", linestyle="--",
|
||
linewidth=1.2,
|
||
label=f"OLS trend: {sl*365.25:+.4f}/yr")
|
||
|
||
ax.set_xlabel("Window centre date")
|
||
ax.set_ylabel(f"Pearson r(τ = +{LAG_AT_HOMOLA_CLAIM} d)")
|
||
ax.set_title(
|
||
f"Rolling r(τ=+{LAG_AT_HOMOLA_CLAIM} d) in 18-month windows "
|
||
f"[OOS: {study_start[:4]}–{study_end[:4]}]",
|
||
fontsize=10,
|
||
)
|
||
ax.legend(fontsize=9)
|
||
ax.grid(True, alpha=0.3)
|
||
fig.autofmt_xdate()
|
||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||
fig.savefig(output_path, dpi=150, bbox_inches="tight")
|
||
plt.close(fig)
|
||
logger.info("Rolling figure saved: %s", output_path)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Report
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def write_oos_report(
|
||
scores: dict[str, str],
|
||
metrics: dict,
|
||
args: argparse.Namespace,
|
||
output_dir: Path,
|
||
) -> None:
|
||
ts = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||
n_sig = metrics.get("n_significant_bh", 0)
|
||
exp_fp = metrics.get("expected_fp", 0)
|
||
p_glob = metrics.get("p_global", 1.0)
|
||
r15 = metrics.get("r_at_15d", 0.0)
|
||
peak_r = metrics.get("peak_r", 0.0)
|
||
peak_l = metrics.get("peak_lag_days", 0)
|
||
sigma = p_to_sigma(p_glob) if p_glob > 0 else float("inf")
|
||
|
||
rows = "\n".join(
|
||
f"| {k} | {v} |" for k, v in scores.items()
|
||
)
|
||
|
||
# Overall verdict
|
||
n_pass = sum(1 for v in scores.values() if "PASS" in v)
|
||
n_fail = sum(1 for v in scores.values() if "FAIL" in v or "TRIGGERED" in v)
|
||
if n_fail >= 3:
|
||
verdict = "**FALSIFIED**: Multiple primary criteria fail or falsification triggers fire."
|
||
elif n_pass >= 3 and n_fail == 0:
|
||
verdict = "**SUPPORTIVE**: Primary predictions pass; further mechanistic investigation warranted."
|
||
else:
|
||
verdict = "**AMBIGUOUS**: Mixed results; insufficient evidence to confirm or refute."
|
||
|
||
md = f"""# Out-of-Sample Validation Report — Homola et al. 2023
|
||
|
||
Generated: {ts}
|
||
Git SHA: {metrics.get('git_sha', 'unknown')}
|
||
OOS window: {args.study_start} → {args.study_end}
|
||
Analysis run date: {ts[:10]}
|
||
Data availability check: {metrics.get('avail_run_date', 'see data_availability.json')}
|
||
|
||
## Overall verdict
|
||
|
||
{verdict}
|
||
|
||
## Prediction scorecard
|
||
|
||
| Criterion | Outcome |
|
||
|---|---|
|
||
{rows}
|
||
|
||
## Key numerical results
|
||
|
||
| Metric | OOS value | In-sample baseline |
|
||
|---|---|---|
|
||
| r(τ = +15 d) raw | {r15:+.4f} | +0.3099 (solar-cycle confounded) |
|
||
| r(τ = +15 d) HP-detrended | {metrics.get('r_at_15d_hp', float('nan')):+.4f} | +0.0411 |
|
||
| Surrogate 95th pct at τ=+15 d | {metrics.get('surr_p95_at_15d', float('nan')):.4f} | (not computed in-sample at this lag) |
|
||
| p_global (phase surrogates) | {p_glob:.4f} | 1.000 (in-sample raw, not significant) |
|
||
| σ_surrogate | {sigma:.2f} | n/a |
|
||
| Dominant peak lag | {peak_l:+d} d | −525 d |
|
||
| Dominant peak \\|r\\| | {peak_r:.4f} | 0.469 |
|
||
| BH-significant pairs (geo) | {n_sig} | 455 (in-sample) |
|
||
| Expected FP (geo, BH q=0.05) | {exp_fp:.1f} | 351.9 (in-sample) |
|
||
| Surrogate count | {args.n_surrogates:,} | 10,000 (in-sample) |
|
||
|
||
## Interpretation notes
|
||
|
||
The OOS window ({args.study_start}–{args.study_end}) spans approximately
|
||
{(pd.Timestamp(args.study_end) - pd.Timestamp(args.study_start)).days // 365} years —
|
||
less than one full 11-year solar cycle. This has two implications:
|
||
|
||
1. **Solar-cycle detrending is less effective** over sub-cycle windows. Linear
|
||
and sunspot-regression detrending are used instead of HP/STL, which require
|
||
series longer than the target period.
|
||
|
||
2. **Statistical power is lower** than in-sample (T ≈ 3215 bins vs
|
||
T ≈ {metrics.get('T', '?')} bins OOS). A genuine effect of the same magnitude as the
|
||
in-sample HP-detrended signal (r ≈ 0.04) would require a very large n_surr
|
||
to detect reliably.
|
||
|
||
## Methodological notes
|
||
|
||
- Pre-registration file: `results/prereg_predictions.md` (timestamps confirm
|
||
it was written before any OOS analysis was run)
|
||
- GPU: {_GPU_REASON}
|
||
- Surrogates: phase-randomisation ({args.n_surrogates:,})
|
||
- Lag range: ±{args.lag_max} days
|
||
|
||
## Figures
|
||
|
||
- `results/figs/oos_xcorr.png` — r(τ) with surrogate envelopes
|
||
- `results/figs/rolling_correlation_oos.png` — rolling r(τ=+15 d)
|
||
"""
|
||
path = output_dir / "out_of_sample_report.md"
|
||
path.write_text(md, encoding="utf-8")
|
||
logger.info("OOS report saved: %s", path)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# CLI
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def _parse_args() -> argparse.Namespace:
|
||
p = argparse.ArgumentParser(
|
||
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter,
|
||
)
|
||
p.add_argument("--study-start", default=OOS_START_DEFAULT)
|
||
p.add_argument("--study-end", default=None,
|
||
help="Override OOS end date (default: read from data_availability.json)")
|
||
p.add_argument("--bin-days", type=int, default=BIN_DAYS)
|
||
p.add_argument("--lag-min", type=int, default=-200)
|
||
p.add_argument("--lag-max", type=int, default=+200)
|
||
p.add_argument("--min-mag", type=float, default=4.0)
|
||
p.add_argument("--min-stations", type=int, default=3)
|
||
p.add_argument("--n-surrogates", type=int, default=100_000,
|
||
help="Phase-randomisation surrogates (default 100 000; use 10 000 for quick test)")
|
||
p.add_argument("--n-roll-surr", type=int, default=200,
|
||
help="Surrogates per rolling window (default 200)")
|
||
p.add_argument("--roll-window", type=int, default=None,
|
||
help="Rolling window in bins (default: 18 months)")
|
||
p.add_argument("--roll-step", type=int, default=None,
|
||
help="Rolling step in bins (default: 3 months)")
|
||
p.add_argument("--fdr-q", type=float, default=0.05)
|
||
p.add_argument("--seed", type=int, default=42)
|
||
p.add_argument("--min-events", type=int, default=100)
|
||
p.add_argument("--geo", action="store_true",
|
||
help="Run geographic localisation (slow; default: off for quick runs)")
|
||
p.add_argument("--nmdb-dir", type=Path, default=PROJECT_ROOT/"data"/"raw"/"nmdb")
|
||
p.add_argument("--usgs-dir", type=Path, default=PROJECT_ROOT/"data"/"raw"/"usgs")
|
||
p.add_argument("--sidc-dir", type=Path, default=PROJECT_ROOT/"data"/"raw"/"sidc")
|
||
p.add_argument("--config", type=Path, default=PROJECT_ROOT/"config"/"stations.yaml")
|
||
p.add_argument("--output-dir",type=Path, default=PROJECT_ROOT/"results")
|
||
return p.parse_args()
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Main
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def run(args: argparse.Namespace) -> None:
|
||
args.output_dir.mkdir(parents=True, exist_ok=True)
|
||
(args.output_dir / "figs").mkdir(exist_ok=True)
|
||
git_sha = _git_sha()
|
||
|
||
# Determine study end date
|
||
avail_path = args.output_dir / "data_availability.json"
|
||
avail_run_date = "unknown"
|
||
if args.study_end is None:
|
||
if avail_path.exists():
|
||
avail = json.loads(avail_path.read_text())
|
||
args.study_end = avail["oos_end"]
|
||
avail_run_date = avail.get("run_date", "unknown")
|
||
logger.info("OOS end from data_availability.json: %s", args.study_end)
|
||
else:
|
||
logger.warning(
|
||
"data_availability.json not found; defaulting to today-60 days. "
|
||
"Run scripts/06_check_data_availability.py first."
|
||
)
|
||
from datetime import date, timedelta
|
||
args.study_end = str(date.today() - timedelta(days=60))
|
||
|
||
# ------------------------------------------------------------------ #
|
||
# STEP 0: Pre-registration (BEFORE any data access) #
|
||
# ------------------------------------------------------------------ #
|
||
prereg_path = write_prereg(
|
||
args.output_dir, args.study_start, args.study_end,
|
||
args.n_surrogates, git_sha,
|
||
)
|
||
logger.info("Pre-registration complete: %s", prereg_path)
|
||
|
||
# ------------------------------------------------------------------ #
|
||
# STEP 1: Load station metadata + OOS CR/seismic data #
|
||
# ------------------------------------------------------------------ #
|
||
with open(args.config) as fh:
|
||
cfg = yaml.safe_load(fh)
|
||
station_ids = list(cfg["stations"].keys())
|
||
|
||
logger.info("Loading OOS data: %s → %s", args.study_start, args.study_end)
|
||
cr_series, seismic_series, ref_index, n_stations = load_cr_and_seismic(
|
||
station_ids,
|
||
args.study_start, args.study_end,
|
||
args.nmdb_dir, args.usgs_dir,
|
||
args.bin_days, args.min_mag, args.min_stations,
|
||
)
|
||
T = len(cr_series)
|
||
|
||
# Drop NaN bins (align)
|
||
valid = cr_series.notna() & seismic_series.notna()
|
||
cr_v = cr_series[valid]
|
||
sei_v = seismic_series[valid]
|
||
cr_arr = cr_v.to_numpy(dtype=np.float32)
|
||
sei_arr = sei_v.to_numpy(dtype=np.float32)
|
||
T_valid = len(cr_arr)
|
||
|
||
logger.info("Valid bins: %d / %d", T_valid, T)
|
||
if T_valid < 50:
|
||
raise RuntimeError(
|
||
f"Only {T_valid} valid bins in OOS window — insufficient data. "
|
||
"Run scripts/06_check_data_availability.py to download more data."
|
||
)
|
||
|
||
# ------------------------------------------------------------------ #
|
||
# STEP 2: Homola replication — cross-correlation #
|
||
# ------------------------------------------------------------------ #
|
||
lag_min_b = args.lag_min // args.bin_days
|
||
lag_max_b = args.lag_max // args.bin_days
|
||
lag_bins = np.arange(lag_min_b, lag_max_b + 1, dtype=int)
|
||
lags_days = lag_bins * args.bin_days
|
||
|
||
logger.info("Computing observed cross-correlation …")
|
||
x_z = ((cr_arr - cr_arr.mean()) / (cr_arr.std() + 1e-15)).astype(np.float64)
|
||
y_z = ((sei_arr - sei_arr.mean()) / (sei_arr.std() + 1e-15)).astype(np.float64)
|
||
obs_r = np.zeros(len(lag_bins), dtype=np.float64)
|
||
for k, lag in enumerate(lag_bins):
|
||
if lag >= 0:
|
||
n = T_valid - lag
|
||
if n < 2:
|
||
continue
|
||
obs_r[k] = x_z[:n] @ y_z[lag:lag + n] / n
|
||
else:
|
||
n = T_valid + lag
|
||
if n < 2:
|
||
continue
|
||
obs_r[k] = x_z[-lag:-lag + n] @ y_z[:n] / n
|
||
|
||
peak_idx = np.argmax(np.abs(obs_r))
|
||
peak_r = float(obs_r[peak_idx])
|
||
peak_lag_d = int(lags_days[peak_idx])
|
||
lag15_idx = np.searchsorted(lag_bins, LAG_AT_HOMOLA_CLAIM // args.bin_days)
|
||
r_at_15d = float(obs_r[lag15_idx]) if 0 <= lag15_idx < len(obs_r) else np.nan
|
||
|
||
logger.info("Peak r = %.4f at τ = %+d d; r(+15 d) = %.4f",
|
||
peak_r, peak_lag_d, r_at_15d)
|
||
|
||
# ------------------------------------------------------------------ #
|
||
# STEP 3: GPU surrogate test #
|
||
# ------------------------------------------------------------------ #
|
||
logger.info("Running surrogate test (%d surrogates, GPU=%s) …",
|
||
args.n_surrogates, gpu_available())
|
||
|
||
surr_result = surrogate_xcorr_test_gpu(
|
||
cr_arr, sei_arr, lag_bins,
|
||
n_surrogates=args.n_surrogates,
|
||
method="phase",
|
||
seed=args.seed,
|
||
vram_budget_gb=10.0,
|
||
)
|
||
p_global = float(surr_result["p_global"])
|
||
surr_r_arrays = surr_result["surrogate_r_arrays"] # (n_surr, n_lags)
|
||
surr_p95 = np.percentile(np.abs(surr_r_arrays), 95, axis=0)
|
||
surr_p99 = np.percentile(np.abs(surr_r_arrays), 99, axis=0)
|
||
surr_p95_at_15d = float(surr_p95[lag15_idx]) if 0 <= lag15_idx < len(surr_p95) else np.nan
|
||
|
||
sigma_surr = p_to_sigma(p_global) if p_global > 0 else float("inf")
|
||
logger.info("p_global = %.4f (%.2fσ)", p_global, sigma_surr)
|
||
logger.info("Surrogate 95th pct at τ=+15 d: %.4f (observed r=%.4f)",
|
||
surr_p95_at_15d, r_at_15d)
|
||
|
||
# ------------------------------------------------------------------ #
|
||
# STEP 4: Linear detrend + sunspot regression detrend #
|
||
# ------------------------------------------------------------------ #
|
||
logger.info("Detrended analysis …")
|
||
t_lin = np.arange(T_valid, dtype=np.float64)
|
||
|
||
def _linear_detrend(arr: np.ndarray) -> np.ndarray:
|
||
sl, ic = np.polyfit(t_lin, arr.astype(np.float64), 1)
|
||
return (arr.astype(np.float64) - (ic + sl * t_lin)).astype(np.float32)
|
||
|
||
cr_lin = _linear_detrend(cr_arr)
|
||
sei_lin = _linear_detrend(sei_arr)
|
||
|
||
surr_lin = surrogate_xcorr_test_gpu(
|
||
cr_lin, sei_lin, lag_bins,
|
||
n_surrogates=args.n_surrogates,
|
||
method="phase",
|
||
seed=args.seed + 1,
|
||
vram_budget_gb=10.0,
|
||
)
|
||
p_global_lin = float(surr_lin["p_global"])
|
||
obs_r_lin = surr_lin["observed_r"]
|
||
r_at_15d_hp = float(obs_r_lin[lag15_idx]) if 0 <= lag15_idx < len(obs_r_lin) else np.nan
|
||
logger.info("Linear detrend: p_global=%.4f r(+15d)=%.4f", p_global_lin, r_at_15d_hp)
|
||
|
||
# ------------------------------------------------------------------ #
|
||
# STEP 5: Rolling-window stability #
|
||
# ------------------------------------------------------------------ #
|
||
window_bins = args.roll_window or (int(18 * 30.44 / args.bin_days))
|
||
step_bins = args.roll_step or (int( 3 * 30.44 / args.bin_days))
|
||
lag15_bin = LAG_AT_HOMOLA_CLAIM // args.bin_days
|
||
|
||
logger.info(
|
||
"Rolling analysis: window=%d bins (%.0f mo), step=%d bins",
|
||
window_bins, window_bins * args.bin_days / 30.44, step_bins,
|
||
)
|
||
rolling = rolling_r_at_lag(
|
||
cr_v, sei_v, lag15_bin, window_bins, step_bins,
|
||
n_surr=args.n_roll_surr, seed=args.seed + 2,
|
||
)
|
||
logger.info("Rolling windows computed: %d", len(rolling))
|
||
|
||
# ------------------------------------------------------------------ #
|
||
# STEP 6: Geographic localisation #
|
||
# ------------------------------------------------------------------ #
|
||
n_sig_bh = 0
|
||
expected_fp = 0.0
|
||
|
||
if args.geo:
|
||
logger.info("Geographic localisation (OOS) …")
|
||
import importlib.util
|
||
spec = importlib.util.spec_from_file_location(
|
||
"geo_local", PROJECT_ROOT / "scripts" / "05_geographic_localisation.py"
|
||
)
|
||
geo_mod = importlib.util.module_from_spec(spec)
|
||
spec.loader.exec_module(geo_mod)
|
||
|
||
geo_args = argparse.Namespace(
|
||
study_start=args.study_start, study_end=args.study_end,
|
||
bin_days=args.bin_days, lag_min=args.lag_min, lag_max=args.lag_max,
|
||
min_mag=args.min_mag, min_events=args.min_events,
|
||
min_valid_bins=100, n_surrogates=min(args.n_surrogates, 1000),
|
||
method="phase", fdr_q=args.fdr_q, seed=args.seed,
|
||
nmdb_dir=args.nmdb_dir, usgs_dir=args.usgs_dir,
|
||
config=args.config,
|
||
output_dir=args.output_dir / "oos_geo",
|
||
)
|
||
geo_mod.run(geo_args)
|
||
geo_json = args.output_dir / "oos_geo" / "geo_localisation.json"
|
||
if geo_json.exists():
|
||
gd = json.loads(geo_json.read_text())
|
||
n_sig_bh = gd.get("n_significant_bh", 0)
|
||
expected_fp = gd.get("fdr_expected_fp", 0.0)
|
||
else:
|
||
logger.info("Skipping geo localisation (use --geo to enable)")
|
||
|
||
# ------------------------------------------------------------------ #
|
||
# STEP 7: Score predictions #
|
||
# ------------------------------------------------------------------ #
|
||
scores = score_predictions(
|
||
r_at_15d, surr_p95_at_15d, p_global,
|
||
rolling, n_sig_bh, expected_fp, args.bin_days,
|
||
)
|
||
logger.info("Prediction scores: %s", scores)
|
||
|
||
# ------------------------------------------------------------------ #
|
||
# STEP 8: Figures #
|
||
# ------------------------------------------------------------------ #
|
||
make_xcorr_figure(
|
||
lags_days, obs_r, surr_p95, surr_p99, p_global,
|
||
args.study_start, args.study_end, n_stations, args.n_surrogates,
|
||
args.output_dir / "figs" / "oos_xcorr.png",
|
||
)
|
||
make_rolling_figure(
|
||
rolling, args.study_start, args.study_end,
|
||
args.output_dir / "figs" / "rolling_correlation_oos.png",
|
||
)
|
||
|
||
# ------------------------------------------------------------------ #
|
||
# STEP 9: Report + JSON #
|
||
# ------------------------------------------------------------------ #
|
||
n_eff_val = float(n_eff_bretherton(cr_arr.astype(np.float64), sei_arr.astype(np.float64)))
|
||
|
||
metrics = {
|
||
"git_sha": git_sha,
|
||
"avail_run_date": avail_run_date,
|
||
"study_start": args.study_start,
|
||
"study_end": args.study_end,
|
||
"T_valid": T_valid,
|
||
"n_stations": n_stations,
|
||
"n_surrogates": args.n_surrogates,
|
||
"seed": args.seed,
|
||
"gpu_device": _GPU_REASON,
|
||
"r_at_15d": round(r_at_15d, 6),
|
||
"r_at_15d_hp": round(r_at_15d_hp, 6),
|
||
"surr_p95_at_15d": round(surr_p95_at_15d, 6),
|
||
"p_global": round(p_global, 6),
|
||
"p_global_linear_detrend": round(p_global_lin, 6),
|
||
"sigma_surr": round(sigma_surr, 3),
|
||
"peak_r": round(peak_r, 6),
|
||
"peak_lag_days": peak_lag_d,
|
||
"n_eff": round(n_eff_val, 1),
|
||
"n_rolling_windows": len(rolling),
|
||
"n_significant_bh": n_sig_bh,
|
||
"expected_fp": round(expected_fp, 2),
|
||
"T": T_valid,
|
||
"prediction_scores": scores,
|
||
"rolling_windows": rolling,
|
||
}
|
||
|
||
json_path = args.output_dir / "out_of_sample_metrics.json"
|
||
json_path.write_text(json.dumps(metrics, indent=2, default=str), encoding="utf-8")
|
||
logger.info("JSON saved: %s", json_path)
|
||
|
||
write_oos_report(scores, metrics, args, args.output_dir)
|
||
|
||
# Summary
|
||
print()
|
||
print("=" * 72)
|
||
print(" OUT-OF-SAMPLE VALIDATION SUMMARY")
|
||
print(f" Window: {args.study_start} → {args.study_end} (T={T_valid} bins)")
|
||
print(f" GPU: {_GPU_REASON} | Surrogates: {args.n_surrogates:,}")
|
||
print("=" * 72)
|
||
print(f" r(τ=+15 d) = {r_at_15d:+.4f} (surrogate 95th pct = {surr_p95_at_15d:.4f})")
|
||
print(f" p_global = {p_global:.4f} ({sigma_surr:.2f}σ)")
|
||
print(f" Dominant peak = {peak_r:+.4f} at τ = {peak_lag_d:+d} d")
|
||
print(f" Rolling windows = {len(rolling)}")
|
||
print()
|
||
for k, v in scores.items():
|
||
print(f" {k}: {v}")
|
||
print("=" * 72)
|
||
print()
|
||
|
||
logger.info("Done.")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
run(_parse_args())
|