cosmicraysandearthquakes/scripts/03_stress_test.py
root e5a812fa14 Initial commit: full analysis pipeline source code
Scripts 01-08 implement the complete cosmic-ray/earthquake correlation
analysis from data ingestion through out-of-sample validation and
combined timeseries sinusoid fitting.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-22 02:45:10 +02:00

705 lines
30 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
scripts/03_stress_test.py
~~~~~~~~~~~~~~~~~~~~~~~~~
Rigorous significance test of the Homola et al. 2023 CRseismic cross-
correlation claim using phase-randomised and IAAFT surrogate null models.
Four complementary significance estimates are reported for each lag:
1. **Naive Pearson** — t-test treating 5-day bins as i.i.d. (what Homola
reports; expected to be severely over-significant).
2. **Bretherton N_eff** — naive t-test corrected for autocorrelation using the
Bretherton et al. 1999 effective-sample-size formula.
3. **Phase-randomised surrogates** — preserves the power spectrum (and hence
the solar-cycle autocorrelation) of the CR series; destroys phase.
4. **IAAFT surrogates** — additionally preserves the amplitude distribution;
the gold-standard null for nonlinear series.
For tests 3 & 4, *p_global* is the fraction of surrogates whose PEAK |r(τ)|
across all lags exceeds the observed peak. This is the correct multiple-
comparison correction for the "best lag" selection.
Outputs
-------
results/figs/stress_test.png — r(τ) with percentile envelopes
results/stress_test_report.md — comparison table + interpretation
results/stress_test_results.json — machine-readable numbers
Usage
-----
python scripts/03_stress_test.py
python scripts/03_stress_test.py --n-surrogates 1000 --method both
"""
from __future__ import annotations
import argparse
import importlib.util
import json
import logging
import subprocess
import sys
from datetime import datetime, timezone
from pathlib import Path
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats
import yaml
PROJECT_ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(PROJECT_ROOT / "src"))
from crq.stats.surrogates import (
n_eff_bretherton,
p_to_sigma,
surrogate_xcorr_test,
)
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)-8s %(name)s%(message)s",
datefmt="%Y-%m-%dT%H:%M:%S",
)
logger = logging.getLogger("crq.stress_test")
# ---------------------------------------------------------------------------
# Import shared data-building functions from 02_homola_replication.py
# ---------------------------------------------------------------------------
def _import_homola():
spec = importlib.util.spec_from_file_location(
"homola_replication",
PROJECT_ROOT / "scripts" / "02_homola_replication.py",
)
mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mod)
return mod
# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
def _parse_args() -> argparse.Namespace:
p = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter,
)
p.add_argument("--study-start", default="1976-01-01")
p.add_argument("--study-end", default="2019-12-31")
p.add_argument("--bin-days", type=int, default=5)
p.add_argument("--lag-min", type=int, default=-1000)
p.add_argument("--lag-max", type=int, default=1000)
p.add_argument("--min-mag", type=float, default=4.0)
p.add_argument("--min-stations", type=int, default=3)
p.add_argument("--n-surrogates", type=int, default=10_000)
p.add_argument("--method", default="both",
choices=["phase", "iaaft", "both"])
p.add_argument("--n-jobs", type=int, default=-1)
p.add_argument("--iaaft-iter", type=int, default=100)
p.add_argument("--seed", type=int, default=42)
p.add_argument("--nmdb-dir", type=Path, default=PROJECT_ROOT/"data"/"raw"/"nmdb")
p.add_argument("--usgs-dir", type=Path, default=PROJECT_ROOT/"data"/"raw"/"usgs")
p.add_argument("--config", type=Path, default=PROJECT_ROOT/"config"/"stations.yaml")
p.add_argument("--output-dir",type=Path, default=PROJECT_ROOT/"results")
return p.parse_args()
# ---------------------------------------------------------------------------
# Naive Pearson significance (same as homola script)
# ---------------------------------------------------------------------------
def _naive_pearson(r: float, n: float) -> tuple[float, float]:
"""Return (p_value, sigma) from naive Pearson t-test with *n* d.o.f."""
if n < 3 or not np.isfinite(r) or abs(r) >= 1.0:
return np.nan, np.nan
t = r * np.sqrt((n - 2) / (1.0 - r ** 2))
p = float(2.0 * scipy.stats.t.sf(abs(t), df=n - 2))
return p, p_to_sigma(p)
# ---------------------------------------------------------------------------
# Figure: r(τ) with surrogate envelopes
# ---------------------------------------------------------------------------
def _make_envelope_figure(
lags_days: np.ndarray,
obs_r: np.ndarray,
results_phase: dict | None,
results_iaaft: dict | None,
bin_days: int,
study_start: str,
study_end: str,
n_stations: int,
) -> plt.Figure:
methods = []
if results_phase is not None:
methods.append(("Phase-randomised", results_phase, "C0"))
if results_iaaft is not None:
methods.append(("IAAFT", results_iaaft, "C1"))
n_panels = len(methods)
fig, axes = plt.subplots(
n_panels, 1,
figsize=(14, 6 * n_panels),
squeeze=False,
)
for ax, (label, res, colour) in zip(axes[:, 0], methods):
surr = res["surrogate_r_arrays"] # (n_surr, n_lags)
# Percentile envelopes (symmetric — use abs for upper, raw for bounds)
p50_hi = np.nanpercentile( surr, 50, axis=0)
p95_hi = np.nanpercentile( surr, 95, axis=0)
p99_hi = np.nanpercentile( surr, 99, axis=0)
p999_hi = np.nanpercentile( surr, 99.9, axis=0)
p5_lo = np.nanpercentile( surr, 5, axis=0)
p1_lo = np.nanpercentile( surr, 1, axis=0)
p001_lo = np.nanpercentile( surr, 0.1, axis=0)
# Shaded envelopes (widest → narrowest so darkest on top)
ax.fill_between(lags_days, p001_lo, p999_hi, color=colour, alpha=0.15,
label="99.9th pct surrogate range")
ax.fill_between(lags_days, p1_lo, p99_hi, color=colour, alpha=0.25,
label="99th pct surrogate range")
ax.fill_between(lags_days, p5_lo, p95_hi, color=colour, alpha=0.40,
label="95th pct surrogate range")
ax.plot(lags_days, p50_hi, color=colour, lw=0.6, ls=":", alpha=0.7,
label="Surrogate median")
# Observed r(τ)
ax.plot(lags_days, obs_r, color="k", lw=1.4, zorder=5, label="Observed r(τ)")
ax.axhline(0, color="k", lw=0.6)
ax.axvline(0, color="k", lw=0.4, alpha=0.3)
# τ = +15d marker
lag15_bin = 15 // bin_days
lag15_idx = np.searchsorted(lags_days // bin_days, lag15_bin)
if 0 <= lag15_idx < len(obs_r):
r15 = float(obs_r[lag15_idx])
ax.scatter([15], [r15], color="darkorange", s=60, zorder=7,
label=f"τ = +15 d (r = {r15:+.4f})")
ax.axvline(15, color="darkorange", lw=0.8, ls=":", alpha=0.6)
# Peak marker
pk_r = res["observed_peak_r"]
pk_lag = res["observed_peak_lag"] * bin_days
ax.scatter([pk_lag], [np.sign(pk_r) * pk_r], color="crimson", s=55,
zorder=7, label=f"Peak τ = {pk_lag:+d} d (|r| = {pk_r:.4f})")
# Global p annotation
p_g = res["p_global"]
n_s = res["n_surrogates"]
sig_g = p_to_sigma(p_g, n_s)
sig_str = f"{sig_g:.2f}σ" if np.isfinite(sig_g) and sig_g < 99 else f">{p_to_sigma(1/n_s, n_s):.0f}σ"
exceed = int(np.sum(res["surrogate_max_r"] >= pk_r))
ax.text(
0.013, 0.975,
f"Global p = {p_g:.4f} ({sig_str}) | "
f"{exceed}/{n_s} surrogates exceeded peak\n"
f"Method: {label} | {n_s:,} surrogates",
transform=ax.transAxes, va="top", ha="left", fontsize=8.5,
bbox=dict(boxstyle="round,pad=0.3", fc="lightyellow", alpha=0.92),
)
ax.set_xlim(lags_days[0], lags_days[-1])
r_range = max(0.15, np.nanmax(np.abs(obs_r)) * 1.2)
ax.set_ylim(-r_range, r_range)
ax.set_xlabel("Lag τ (days) [τ > 0: CR leads seismic]")
ax.set_ylabel("Pearson r")
ax.set_title(
f"Homola 2023 — {label} surrogate null | "
f"{n_stations} stations | {study_start[:4]}{study_end[:4]}",
fontsize=10,
)
ax.legend(fontsize=8, loc="upper right", ncol=2)
ax.grid(True, alpha=0.2)
fig.tight_layout(pad=1.2)
return fig
# ---------------------------------------------------------------------------
# Markdown report
# ---------------------------------------------------------------------------
def _write_report(
path: Path,
*,
git_sha: str,
seed: int,
timestamp: str,
study_start: str,
study_end: str,
bin_days: int,
lag_min: int,
lag_max: int,
n_bins: int,
n_stations: int,
n_events: int,
min_mag: float,
r1_cr: float,
r1_seismic: float,
n_eff: float,
obs_r_15: float,
obs_peak_r: float,
obs_peak_lag_days: int,
# naive at τ=+15d
naive_p_15: float,
naive_sigma_15: float,
# bretherton at τ=+15d
breth_p_15: float,
breth_sigma_15: float,
# surrogate at τ=+15d
phase_p_15: float | None,
phase_sigma_15: float | None,
iaaft_p_15: float | None,
iaaft_sigma_15: float | None,
# global (best lag)
naive_p_peak: float,
naive_sigma_peak: float,
breth_p_peak: float,
breth_sigma_peak: float,
phase_p_global: float | None,
phase_sigma_global: float | None,
iaaft_p_global: float | None,
iaaft_sigma_global: float | None,
n_surrogates: int,
) -> None:
def _fmt_p(p: float | None) -> str:
if p is None:
return ""
if p == 0.0:
return f"<{1/n_surrogates:.1e}"
return f"{p:.3e}"
def _fmt_s(s: float | None) -> str:
if s is None:
return ""
if not np.isfinite(s):
return f">{p_to_sigma(1/n_surrogates, n_surrogates):.1f}σ"
return f"{s:.2f}σ"
lines = [
"# Homola et al. 2023 — Stress Test Report",
"",
f"Generated: {timestamp[:10]} | git SHA: `{git_sha}` | seed: {seed}",
"",
"---",
"",
"## Study parameters",
"",
f"| Parameter | Value |",
f"|-----------|-------|",
f"| Data | NMDB ({n_stations} stations) + USGS M≥{min_mag:.1f} |",
f"| Study window | {study_start} {study_end} |",
f"| Bin size | {bin_days} days |",
f"| Valid bins (CR) | {n_bins:,} |",
f"| Seismic events | {n_events:,} |",
f"| Lag range | ±{max(abs(lag_min), lag_max)} days |",
f"| Surrogates | {n_surrogates:,} |",
"",
"---",
"",
"## Effective sample size",
"",
"The Bretherton et al. 1999 formula corrects for serial autocorrelation:",
"",
" N_eff ≈ N × (1 ρ₁_CR × ρ₁_seismic) / (1 + ρ₁_CR × ρ₁_seismic)",
"",
f"| Series | Lag-1 autocorrelation ρ₁ |",
f"|--------|--------------------------|",
f"| Global CR index | {r1_cr:+.4f} |",
f"| Seismic Σ Mw | {r1_seismic:+.4f} |",
f"| **N_eff (Bretherton)** | **{n_eff:.0f}** of {n_bins:,} bins ({100*n_eff/n_bins:.1f}%) |",
"",
"---",
"",
"## τ = +15 days (Homola claimed signal)",
"",
f"Observed r(τ = +15 d) = **{obs_r_15:+.5f}**",
"",
f"| Method | r(+15 d) | p-value | σ equivalent | Notes |",
f"|--------|----------|---------|--------------|-------|",
f"| Naive Pearson (N bins i.i.d.) | {obs_r_15:+.5f} | {_fmt_p(naive_p_15)} | {_fmt_s(naive_sigma_15)} | Homola 2023 baseline |",
f"| Bretherton N_eff ({n_eff:.0f}) | {obs_r_15:+.5f} | {_fmt_p(breth_p_15)} | {_fmt_s(breth_sigma_15)} | Autocorr. corrected |",
f"| Phase-randomised surrogate | {obs_r_15:+.5f} | {_fmt_p(phase_p_15)} | {_fmt_s(phase_sigma_15)} | Spectrum preserved |",
f"| IAAFT surrogate | {obs_r_15:+.5f} | {_fmt_p(iaaft_p_15)} | {_fmt_s(iaaft_sigma_15)} | Spectrum + amplitude |",
"",
"---",
"",
"## Global test — best lag (τ ∈ [1000, +1000] days)",
"",
f"Observed peak: r = **{obs_peak_r:+.5f}** at τ = **{obs_peak_lag_days:+d} days**",
"",
f"| Method | Peak r | Peak lag | p-value | σ equivalent | Notes |",
f"|--------|--------|----------|---------|--------------|-------|",
f"| Naive Pearson | {obs_peak_r:+.5f} | {obs_peak_lag_days:+d} d | {_fmt_p(naive_p_peak)} | {_fmt_s(naive_sigma_peak)} | Best-lag scan not corrected |",
f"| Bretherton N_eff | {obs_peak_r:+.5f} | {obs_peak_lag_days:+d} d | {_fmt_p(breth_p_peak)} | {_fmt_s(breth_sigma_peak)} | Autocorr. corrected |",
f"| Phase-randomised (global) | {obs_peak_r:+.5f} | {obs_peak_lag_days:+d} d | {_fmt_p(phase_p_global)} | {_fmt_s(phase_sigma_global)} | Max-|r| over all lags |",
f"| IAAFT (global) | {obs_peak_r:+.5f} | {obs_peak_lag_days:+d} d | {_fmt_p(iaaft_p_global)} | {_fmt_s(iaaft_sigma_global)} | Max-|r| over all lags |",
"",
"---",
"",
"## Interpretation",
"",
"### Solar-cycle artefact",
"",
f"The dominant correlation peak (τ = {obs_peak_lag_days:+d} days, r = {obs_peak_r:+.3f}) is",
"**not** at the Homola-claimed +15 days. Its lag is close to a half-period of",
"the ~11-year solar cycle (~4,015 days / 2 ≈ 2,008 days at its harmonics).",
"Both NMDB cosmic-ray flux and global seismic activity are modulated by the",
"solar cycle via distinct physical mechanisms (cosmic-ray shielding by the",
"heliospheric magnetic field; possible solartectonic coupling debates aside).",
"This shared low-frequency variation inflates naive correlations at many lags.",
"",
"### Naive vs corrected significance",
"",
"The naive 18σ significance at τ = +15 d collapses dramatically once",
"autocorrelation is accounted for:",
"",
f"- Bretherton correction alone reduces N from {n_bins:,} to {n_eff:.0f} effective",
" observations (a {:.0f}× reduction).".format(n_bins / max(n_eff, 1)),
"- Surrogate tests account for the full autocorrelation structure, including",
" the solar cycle common to both series.",
"",
"### Conclusion",
"",
surr_conclusion(phase_p_global, iaaft_p_global, phase_p_15, iaaft_p_15),
"",
"---",
"",
"## Caveats",
"",
"- Surrogates randomise the **CR index** phases, testing whether the CR",
" autocorrelation alone could produce the observed correlation with the real",
" seismic series. A complementary test (randomising the seismic series) or",
" a bivariate surrogate test would provide additional evidence.",
"- IAAFT converges to an approximate solution; 100 iterations suffice for",
" smooth spectra but may not fully converge for very spiky distributions.",
"- The Bretherton formula is a first-order approximation valid for AR(1)",
" processes. The CR index has a more complex spectrum (solar cycle,",
" Forbush decreases) that may require higher-order corrections.",
"- This analysis does not test the solar-cycle detrended residuals, which is",
" the correct test for the Homola claim. See Phase 3 of this study.",
]
path.write_text("\n".join(lines) + "\n")
def surr_conclusion(
phase_p_global: float | None,
iaaft_p_global: float | None,
phase_p_15: float | None,
iaaft_p_15: float | None,
) -> str:
most_conservative = iaaft_p_global if iaaft_p_global is not None else phase_p_global
if most_conservative is None:
return ("Surrogate results not available. Run with `--method both` for a "
"complete assessment.")
if most_conservative < 0.001:
return ("The observed peak correlation **survives** the most conservative "
"surrogate test at p < 0.001. However, the peak is at the solar-cycle "
"lag, not at τ = +15 d, so this does not support the Homola claim.")
if most_conservative < 0.05:
return ("The observed peak correlation is **marginally significant** under the "
"surrogate null (p < 0.05) but the peak lag does not correspond to the "
"Homola-claimed τ = +15 days. The Homola claim remains unsupported "
"after autocorrelation correction.")
return ("The observed peak correlation is **not significant** under the surrogate "
"null model once the shared autocorrelation structure is accounted for. "
"The naive 18σ Pearson significance collapses entirely. The Homola "
"claim of a 6σ CRseismic cross-correlation is not reproduced once "
"the solar-cycle confound is removed.")
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def main() -> None:
args = _parse_args()
np.random.seed(args.seed)
git_sha = "unknown"
try:
r = subprocess.run(
["git", "-C", str(PROJECT_ROOT), "rev-parse", "--short", "HEAD"],
capture_output=True, text=True, timeout=5,
)
if r.returncode == 0:
git_sha = r.stdout.strip()
except Exception:
pass
logger.info("Stress test | git=%s seed=%d", git_sha, args.seed)
logger.info("Study window: %s %s", args.study_start, args.study_end)
logger.info("Surrogates: %d method: %s n_jobs: %d",
args.n_surrogates, args.method, args.n_jobs)
args.output_dir.mkdir(parents=True, exist_ok=True)
figs_dir = args.output_dir / "figs"
figs_dir.mkdir(parents=True, exist_ok=True)
# ── Import homola functions ───────────────────────────────────────────
_hm = _import_homola()
with open(args.config) as fh:
station_names = list(yaml.safe_load(fh)["stations"].keys())
# ── 1. Load data ──────────────────────────────────────────────────────
logger.info("Loading CR index …")
cr_5d, station_norm_daily, _ = _hm.build_cr_index(
station_names,
args.study_start, args.study_end,
args.bin_days, args.nmdb_dir,
min_stations=args.min_stations,
)
n_stations = station_norm_daily.shape[1]
logger.info("Loading seismic metric …")
seismic_5d = _hm.build_seismic_metric(
args.study_start, args.study_end,
args.bin_days, args.usgs_dir,
min_mag=args.min_mag,
cr_index=cr_5d,
)
n_events_total = int(
_hm.build_seismic_metric.__doc__ and # side-effect-free ping
True # just to trigger the load — we read count from the log
)
# Pull event count from USGS data directly
from crq.ingest.usgs import load_usgs
t0, t1 = pd.Timestamp(args.study_start), pd.Timestamp(args.study_end)
_events = load_usgs(t0.year, t1.year, args.usgs_dir)
_events = _events.loc[args.study_start:args.study_end]
n_events = int((_events["mag"] >= args.min_mag).sum())
x_arr = cr_5d.values.astype(float)
y_arr = seismic_5d.values.astype(float)
n_bins = len(x_arr)
# ── 2. Lag bins ───────────────────────────────────────────────────────
lag_bins = np.arange(
args.lag_min // args.bin_days,
args.lag_max // args.bin_days + 1,
dtype=int,
)
lags_days = lag_bins * args.bin_days
# ── 3. Effective sample size ──────────────────────────────────────────
r1_cr = float(np.corrcoef(x_arr[:-1], x_arr[1:])[0, 1])
r1_seismic = float(np.corrcoef(y_arr[:-1], y_arr[1:])[0, 1])
n_eff = n_eff_bretherton(x_arr, y_arr)
logger.info("ρ₁(CR)=%.4f ρ₁(seismic)=%.4f N_eff=%.0f (of %d bins)",
r1_cr, r1_seismic, n_eff, n_bins)
# ── 4. Observed correlation ───────────────────────────────────────────
from crq.stats.surrogates import _pearson_lag_array
obs_r = _pearson_lag_array(x_arr, y_arr, lag_bins)
peak_idx = int(np.nanargmax(np.abs(obs_r)))
obs_peak_r = float(np.abs(obs_r[peak_idx]))
obs_peak_lag = int(lag_bins[peak_idx])
# r at τ = +15 d
lag15_bin = 15 // args.bin_days
row15_idx = np.where(lag_bins == lag15_bin)[0]
obs_r_15 = float(obs_r[row15_idx[0]]) if len(row15_idx) else np.nan
# ── 5. Naive significance ─────────────────────────────────────────────
naive_p_15, naive_sigma_15 = _naive_pearson(obs_r_15, n_bins)
naive_p_peak, naive_sigma_peak = _naive_pearson(
float(obs_r[peak_idx] if not np.isnan(obs_r[peak_idx]) else 0), n_bins
)
# ── 6. Bretherton-corrected significance ──────────────────────────────
breth_p_15, breth_sigma_15 = _naive_pearson(obs_r_15, n_eff)
breth_p_peak, breth_sigma_peak = _naive_pearson(
float(obs_r[peak_idx] if not np.isnan(obs_r[peak_idx]) else 0), n_eff
)
logger.info(
"τ=+15d: r=%.5f naive=%.1fσ Bretherton=%.1fσ",
obs_r_15, naive_sigma_15, breth_sigma_15,
)
logger.info(
"Peak: r=%.5f lag=%+d d naive=%.1fσ Bretherton=%.1fσ",
obs_peak_r, obs_peak_lag * args.bin_days,
naive_sigma_peak, breth_sigma_peak,
)
# ── 7. Surrogate tests ────────────────────────────────────────────────
run_phase = args.method in ("phase", "both")
run_iaaft = args.method in ("iaaft", "both")
results_phase: dict | None = None
results_iaaft: dict | None = None
if run_phase:
logger.info("Running phase-randomised surrogate test …")
results_phase = surrogate_xcorr_test(
x_arr, y_arr, lag_bins,
n_surrogates=args.n_surrogates,
method="phase",
seed=args.seed,
n_jobs=args.n_jobs,
)
phase_p_global = results_phase["p_global"]
phase_sig_g = p_to_sigma(phase_p_global, args.n_surrogates)
# per-lag p at τ=+15d
phase_p_15: float | None = None
phase_sig_15: float | None = None
if len(row15_idx):
phase_p_15 = float(results_phase["p_at_lag"][row15_idx[0]])
phase_sig_15 = p_to_sigma(phase_p_15, args.n_surrogates)
logger.info("Phase: p_global=%.4f (%.2fσ) p(+15d)=%s",
phase_p_global, phase_sig_g,
f"{phase_p_15:.4f}" if phase_p_15 is not None else "n/a")
else:
phase_p_global = phase_sig_g = None
phase_p_15 = phase_sig_15 = None
if run_iaaft:
logger.info("Running IAAFT surrogate test …")
results_iaaft = surrogate_xcorr_test(
x_arr, y_arr, lag_bins,
n_surrogates=args.n_surrogates,
method="iaaft",
seed=args.seed + 1,
n_jobs=args.n_jobs,
iaaft_n_iter=args.iaaft_iter,
)
iaaft_p_global = results_iaaft["p_global"]
iaaft_sig_g = p_to_sigma(iaaft_p_global, args.n_surrogates)
iaaft_p_15: float | None = None
iaaft_sig_15: float | None = None
if len(row15_idx):
iaaft_p_15 = float(results_iaaft["p_at_lag"][row15_idx[0]])
iaaft_sig_15 = p_to_sigma(iaaft_p_15, args.n_surrogates)
logger.info("IAAFT: p_global=%.4f (%.2fσ) p(+15d)=%s",
iaaft_p_global, iaaft_sig_g,
f"{iaaft_p_15:.4f}" if iaaft_p_15 is not None else "n/a")
else:
iaaft_p_global = iaaft_sig_g = None
iaaft_p_15 = iaaft_sig_15 = None
# ── 8. Figure ─────────────────────────────────────────────────────────
fig = _make_envelope_figure(
lags_days, obs_r,
results_phase, results_iaaft,
args.bin_days, args.study_start, args.study_end, n_stations,
)
fig_path = figs_dir / "stress_test.png"
fig.savefig(fig_path, dpi=150, bbox_inches="tight")
plt.close(fig)
logger.info("Saved: %s", fig_path)
# ── 9. Markdown report ────────────────────────────────────────────────
report_path = args.output_dir / "stress_test_report.md"
_write_report(
report_path,
git_sha=git_sha, seed=args.seed,
timestamp=datetime.now(timezone.utc).isoformat(),
study_start=args.study_start, study_end=args.study_end,
bin_days=args.bin_days, lag_min=args.lag_min, lag_max=args.lag_max,
n_bins=n_bins, n_stations=n_stations, n_events=n_events,
min_mag=args.min_mag,
r1_cr=r1_cr, r1_seismic=r1_seismic, n_eff=n_eff,
obs_r_15=obs_r_15, obs_peak_r=obs_peak_r,
obs_peak_lag_days=obs_peak_lag * args.bin_days,
naive_p_15=naive_p_15, naive_sigma_15=naive_sigma_15,
breth_p_15=breth_p_15, breth_sigma_15=breth_sigma_15,
phase_p_15=phase_p_15, phase_sigma_15=phase_sig_15,
iaaft_p_15=iaaft_p_15, iaaft_sigma_15=iaaft_sig_15,
naive_p_peak=naive_p_peak, naive_sigma_peak=naive_sigma_peak,
breth_p_peak=breth_p_peak, breth_sigma_peak=breth_sigma_peak,
phase_p_global=phase_p_global, phase_sigma_global=phase_sig_g,
iaaft_p_global=iaaft_p_global, iaaft_sigma_global=iaaft_sig_g,
n_surrogates=args.n_surrogates,
)
logger.info("Saved: %s", report_path)
# ── 10. JSON log ──────────────────────────────────────────────────────
def _safe(v):
if v is None:
return None
if isinstance(v, float) and np.isnan(v):
return None
if isinstance(v, float) and np.isinf(v):
return None
return v
json_results = {
"script": "scripts/03_stress_test.py",
"git_sha": git_sha, "seed": args.seed,
"timestamp_utc": datetime.now(timezone.utc).isoformat(),
"study_window": [args.study_start, args.study_end],
"bin_days": args.bin_days,
"lag_range_days": [args.lag_min, args.lag_max],
"min_magnitude": args.min_mag,
"n_bins": n_bins, "n_stations": n_stations, "n_events": n_events,
"r1_cr_index": round(r1_cr, 5),
"r1_seismic": round(r1_seismic, 5),
"n_eff_bretherton": round(n_eff, 1),
"tau_15d": {
"r": _safe(round(obs_r_15, 6)),
"naive_p": _safe(naive_p_15), "naive_sigma": _safe(naive_sigma_15),
"bretherton_p": _safe(breth_p_15), "bretherton_sigma": _safe(breth_sigma_15),
"phase_surr_p": _safe(phase_p_15), "phase_surr_sigma": _safe(phase_sig_15),
"iaaft_surr_p": _safe(iaaft_p_15), "iaaft_surr_sigma": _safe(iaaft_sig_15),
},
"peak_lag": {
"r": round(obs_peak_r, 6),
"lag_days": obs_peak_lag * args.bin_days,
"naive_p": _safe(naive_p_peak), "naive_sigma": _safe(naive_sigma_peak),
"bretherton_p": _safe(breth_p_peak), "bretherton_sigma": _safe(breth_sigma_peak),
"phase_surr_p_global": _safe(phase_p_global),
"phase_surr_sigma_global": _safe(phase_sig_g),
"iaaft_surr_p_global": _safe(iaaft_p_global),
"iaaft_surr_sigma_global": _safe(iaaft_sig_g),
},
"n_surrogates": args.n_surrogates,
"method": args.method,
}
json_path = args.output_dir / "stress_test_results.json"
json_path.write_text(json.dumps(json_results, indent=2))
logger.info("Saved: %s", json_path)
# ── Summary print ─────────────────────────────────────────────────────
print()
print("=" * 70)
print(" STRESS TEST SUMMARY")
print("=" * 70)
print(f" Study: {args.study_start} {args.study_end} | "
f"{n_stations} stations | {n_bins:,} bins")
print(f" ρ₁(CR)={r1_cr:.3f} ρ₁(seismic)={r1_seismic:.3f} "
f"N_eff={n_eff:.0f} ({100*n_eff/n_bins:.1f}% of N)")
print()
print(f" τ = +15 d (Homola claim):")
print(f" r = {obs_r_15:+.5f}")
print(f" Naive: {naive_sigma_15:.1f}σ (p={naive_p_15:.2e})")
print(f" Bretherton: {breth_sigma_15:.1f}σ (p={breth_p_15:.2e})")
if phase_p_15 is not None:
print(f" Phase surr: {phase_sig_15:.2f}σ (p={phase_p_15:.4f})")
if iaaft_p_15 is not None:
print(f" IAAFT surr: {iaaft_sig_15:.2f}σ (p={iaaft_p_15:.4f})")
print()
print(f" Peak (global, any lag):")
print(f" r = {obs_peak_r:+.5f} at τ = {obs_peak_lag * args.bin_days:+d} d")
print(f" Naive: {naive_sigma_peak:.1f}σ (p={naive_p_peak:.2e})")
print(f" Bretherton: {breth_sigma_peak:.1f}σ (p={breth_p_peak:.2e})")
if phase_p_global is not None:
print(f" Phase surr (global): {phase_sig_g:.2f}σ (p={phase_p_global:.4f})")
if iaaft_p_global is not None:
print(f" IAAFT surr (global): {iaaft_sig_g:.2f}σ (p={iaaft_p_global:.4f})")
print("=" * 70)
if __name__ == "__main__":
main()