Add pre-registration and data availability report

Pre-registered predictions written BEFORE any OOS analysis data is loaded
(UTC 2026-04-22T00:44:30). OOS window: 2020-01-01 to 2025-04-29,
constrained by NMDB reliable end date. 30 NMDB stations pass coverage
threshold in this window.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
root 2026-04-22 02:45:19 +02:00
parent e5a812fa14
commit 1832f73f74
3 changed files with 523 additions and 0 deletions

View file

@ -0,0 +1,358 @@
{
"run_date": "2026-04-22",
"oos_start": "2020-01-01",
"oos_end": "2025-04-29",
"constrained_by": "NMDB",
"nmdb_reliable_end": "2025-04-29",
"usgs_reliable_end": "2026-03-08",
"sidc_reliable_end": "2026-03-23",
"good_stations_oos": [
"AATB",
"APTY",
"ATHN",
"BKSN",
"DOMB",
"DOMC",
"FSMT",
"INVK",
"IRK3",
"IRKT",
"JUNG",
"JUNG1",
"KERG",
"KIEL2",
"LMKS",
"MOSC",
"MRNY",
"MXCO",
"NAIN",
"NANM",
"NEWK",
"NRLK",
"OULU",
"PSNM",
"PWNK",
"ROME",
"SOPB",
"SOPO",
"TERA",
"THUL"
],
"flagged_stations": [
"AATA",
"DJON",
"DRBS",
"FSMT",
"HRMS",
"INVK",
"IRK2",
"IRK3",
"KERG",
"KIEL2",
"LMKS",
"MOSC",
"MRNY",
"MXCO",
"NAIN",
"NANM",
"NEWK",
"NRLK",
"PTFM",
"PWNK",
"SNAE",
"SOPB",
"SOPO",
"TERA",
"THUL",
"TSMB"
],
"nmdb_station_detail": {
"AATA": {
"status": "low_coverage",
"coverage_oos": 0.2691,
"last_date": "2022-09-14",
"gap_days": 1316
},
"AATB": {
"status": "ok",
"coverage_oos": 0.947,
"last_date": "2026-04-20",
"gap_days": 2
},
"APTY": {
"status": "ok",
"coverage_oos": 0.9978,
"last_date": "2026-04-21",
"gap_days": 1
},
"ARNM": {
"status": "no_data",
"coverage_oos": 0.0,
"last_date": null,
"gap_days": null
},
"ATHN": {
"status": "ok",
"coverage_oos": 0.8967,
"last_date": "2026-04-21",
"gap_days": 1
},
"BKSN": {
"status": "ok",
"coverage_oos": 0.9861,
"last_date": "2026-04-21",
"gap_days": 1
},
"CALM": {
"status": "no_data",
"coverage_oos": 0.0,
"last_date": null,
"gap_days": null
},
"DJON": {
"status": "low_coverage",
"coverage_oos": 0.0499,
"last_date": "2021-12-17",
"gap_days": 1587
},
"DOMB": {
"status": "ok",
"coverage_oos": 0.8481,
"last_date": "2026-04-20",
"gap_days": 2
},
"DOMC": {
"status": "ok",
"coverage_oos": 0.9688,
"last_date": "2026-04-20",
"gap_days": 2
},
"DRBS": {
"status": "low_coverage",
"coverage_oos": 0.0226,
"last_date": "2020-02-21",
"gap_days": 2252
},
"FSMT": {
"status": "ok",
"coverage_oos": 0.8746,
"last_date": "2025-07-26",
"gap_days": 270
},
"HRMS": {
"status": "low_coverage",
"coverage_oos": 0.0013,
"last_date": "2021-06-01",
"gap_days": 1786
},
"INVK": {
"status": "ok",
"coverage_oos": 0.8763,
"last_date": "2025-07-26",
"gap_days": 270
},
"IRK2": {
"status": "low_coverage",
"coverage_oos": 0.0751,
"last_date": "2020-07-27",
"gap_days": 2095
},
"IRK3": {
"status": "ok",
"coverage_oos": 0.5799,
"last_date": "2025-05-22",
"gap_days": 335
},
"IRKT": {
"status": "ok",
"coverage_oos": 0.9015,
"last_date": "2026-04-21",
"gap_days": 1
},
"JBGO": {
"status": "low_coverage",
"coverage_oos": 0.3481,
"last_date": "2026-04-20",
"gap_days": 2
},
"JUNG": {
"status": "ok",
"coverage_oos": 0.9978,
"last_date": "2026-04-21",
"gap_days": 1
},
"JUNG1": {
"status": "ok",
"coverage_oos": 0.9957,
"last_date": "2026-04-21",
"gap_days": 1
},
"KERG": {
"status": "ok",
"coverage_oos": 0.9709,
"last_date": "2026-02-28",
"gap_days": 53
},
"KIEL": {
"status": "no_data",
"coverage_oos": 0.0,
"last_date": null,
"gap_days": null
},
"KIEL2": {
"status": "ok",
"coverage_oos": 0.5929,
"last_date": "2023-10-03",
"gap_days": 932
},
"LMKS": {
"status": "ok",
"coverage_oos": 0.5586,
"last_date": "2023-07-10",
"gap_days": 1017
},
"MCRL": {
"status": "low_coverage",
"coverage_oos": 0.0,
"last_date": null,
"gap_days": null
},
"MGDN": {
"status": "no_data",
"coverage_oos": 0.0,
"last_date": null,
"gap_days": null
},
"MOSC": {
"status": "ok",
"coverage_oos": 0.8286,
"last_date": "2025-03-23",
"gap_days": 395
},
"MRNY": {
"status": "ok",
"coverage_oos": 0.776,
"last_date": "2025-02-28",
"gap_days": 418
},
"MXCO": {
"status": "ok",
"coverage_oos": 0.8819,
"last_date": "2025-10-13",
"gap_days": 191
},
"NAIN": {
"status": "ok",
"coverage_oos": 0.8516,
"last_date": "2025-05-29",
"gap_days": 328
},
"NANM": {
"status": "ok",
"coverage_oos": 0.5716,
"last_date": "2024-11-14",
"gap_days": 524
},
"NEWK": {
"status": "ok",
"coverage_oos": 0.8763,
"last_date": "2025-07-26",
"gap_days": 270
},
"NRLK": {
"status": "ok",
"coverage_oos": 0.6801,
"last_date": "2025-04-29",
"gap_days": 358
},
"OULU": {
"status": "ok",
"coverage_oos": 0.9983,
"last_date": "2026-04-21",
"gap_days": 1
},
"PSNM": {
"status": "ok",
"coverage_oos": 0.9514,
"last_date": "2026-04-19",
"gap_days": 3
},
"PTFM": {
"status": "low_coverage",
"coverage_oos": 0.0009,
"last_date": "2025-01-02",
"gap_days": 475
},
"PWNK": {
"status": "ok",
"coverage_oos": 0.8763,
"last_date": "2025-07-26",
"gap_days": 270
},
"ROME": {
"status": "ok",
"coverage_oos": 0.8485,
"last_date": "2026-04-21",
"gap_days": 1
},
"SNAE": {
"status": "low_coverage",
"coverage_oos": 0.0009,
"last_date": "2025-01-02",
"gap_days": 475
},
"SOPB": {
"status": "ok",
"coverage_oos": 0.8767,
"last_date": "2025-07-25",
"gap_days": 271
},
"SOPO": {
"status": "ok",
"coverage_oos": 0.8767,
"last_date": "2025-07-25",
"gap_days": 271
},
"TERA": {
"status": "ok",
"coverage_oos": 0.9466,
"last_date": "2026-02-28",
"gap_days": 53
},
"THUL": {
"status": "ok",
"coverage_oos": 0.8763,
"last_date": "2025-07-26",
"gap_days": 270
},
"TSMB": {
"status": "low_coverage",
"coverage_oos": 0.0004,
"last_date": "2020-05-01",
"gap_days": 2182
}
},
"usgs_detail": {
"status": "ok",
"years_present": [
2020,
2021,
2022,
2023,
2024,
2025,
2026
],
"total_events": 47860,
"reliable_end": "2026-03-08",
"note": "Catalogue stability: complete within ~30 days; using today-45 days"
},
"sidc_detail": {
"status": "ok",
"last_date": "2026-03-31",
"definitive_end": "2026-03-31",
"reliable_end": "2026-03-23",
"n_records": 76061,
"note": "Using provisional values (prov=1); definitive lag ~6 months"
}
}

View file

@ -0,0 +1,38 @@
OUT-OF-SAMPLE DATA AVAILABILITY REPORT
Run date: 2026-04-22
OOS window: 2020-01-01 → 2025-04-29 (constrained by NMDB)
NMDB stations with ≥50% OOS coverage: 30
AATB coverage= 94.7% last=2026-04-20
APTY coverage= 99.8% last=2026-04-21
ATHN coverage= 89.7% last=2026-04-21
BKSN coverage= 98.6% last=2026-04-21
DOMB coverage= 84.8% last=2026-04-20
DOMC coverage= 96.9% last=2026-04-20
FSMT coverage= 87.5% last=2025-07-26
INVK coverage= 87.6% last=2025-07-26
IRK3 coverage= 58.0% last=2025-05-22
IRKT coverage= 90.1% last=2026-04-21
JUNG coverage= 99.8% last=2026-04-21
JUNG1 coverage= 99.6% last=2026-04-21
KERG coverage= 97.1% last=2026-02-28
KIEL2 coverage= 59.3% last=2023-10-03
LMKS coverage= 55.9% last=2023-07-10
MOSC coverage= 82.9% last=2025-03-23
MRNY coverage= 77.6% last=2025-02-28
MXCO coverage= 88.2% last=2025-10-13
NAIN coverage= 85.2% last=2025-05-29
NANM coverage= 57.2% last=2024-11-14
NEWK coverage= 87.6% last=2025-07-26
NRLK coverage= 68.0% last=2025-04-29
OULU coverage= 99.8% last=2026-04-21
PSNM coverage= 95.1% last=2026-04-19
PWNK coverage= 87.6% last=2025-07-26
ROME coverage= 84.9% last=2026-04-21
SOPB coverage= 87.7% last=2025-07-25
SOPO coverage= 87.7% last=2025-07-25
TERA coverage= 94.7% last=2026-02-28
THUL coverage= 87.6% last=2025-07-26
USGS: years available = [2020, 2021, 2022, 2023, 2024, 2025, 2026] events = 47,860
SIDC: last_date = 2026-03-31 (provisional note: Using provisional values (prov=1); definitive lag ~6 months)

View file

@ -0,0 +1,127 @@
# Pre-Registered Predictions — Out-of-Sample CRSeismic Validation
**Written:** 2026-04-22T00:44:30Z
**Git SHA:** unknown
**OOS window:** 2020-01-01 → 2025-04-29
**Surrogates:** 100,000 phase-randomisation
This file was created BEFORE loading or analysing any out-of-sample data.
All thresholds are pre-specified. Results are recorded in
`results/out_of_sample_report.md`.
---
## In-sample context (19762019)
From scripts 0205 (Homola replication + stress tests):
| Quantity | Value |
|---|---|
| Dominant peak lag (raw) | 525 days (half solar cycle) |
| Dominant peak \|r\| (raw) | 0.469 |
| r(τ=+15 d) raw | +0.310 (solar-cycle confounded) |
| r(τ=+15 d) HP-detrended | +0.041 |
| In-sample p_global (IAAFT, raw) | 1.000 (NOT significant after surrogate correction) |
| After detrending | p < 0.001 at lags +15 d |
The in-sample dominant peak is at 525 days, not at the claimed +15 days.
r(+15 d) ≈ 0.04 after solar-cycle removal — this is the baseline expectation
for the out-of-sample window.
---
## Pre-registered predictions
### P1 — Sign and location of claimed correlation peak
**Prediction:** If Homola et al.'s mechanism is real, the OOS window should show
a cross-correlation peak at τ ≈ +15 days (cosmic rays leading seismic activity
by 15 days) with **positive sign** (positive CR deviation → elevated seismic
Mw-sum 15 days later).
**Operationalisation:**
- PASS if r(τ=+15 d) > 0 AND the lag of maximum |r(τ)| for τ ∈ [5, 30] days
is within ±3 days of +15 days.
- FAIL otherwise.
**Baseline from in-sample HP-detrended:** r(+15 d) ≈ +0.041
**Monte Carlo tolerance (at 100,000 surrogates):** ±0.0063
### P2 — Significance and solar-phase trend
**Prediction:** The OOS window (20202025) covers Solar Cycle 25
rising phase, approaching the predicted 20252027 solar maximum. Homola's
model predicts the CRseismic correlation should be in a RISING phase of its
~11-year envelope (the last in-sample envelope peak was near 2014).
**Operationalisation:**
- PASS if: (a) p_global (phase-surrogate) < 0.05, AND
(b) r(τ=+15 d) in rolling 18-month windows shows a non-negative trend
(slope ≥ 0) across the OOS period.
- PARTIAL if (a) holds but (b) does not.
- FAIL if p_global ≥ 0.05.
### P3 — Rolling-window lag stability
**Prediction:** The lag at which r(τ) is maximised for τ ∈ [5, 30] days should
be stable to within ±3 days across 18-month rolling windows of the OOS data.
**Operationalisation:**
- PASS if std(τ*) ≤ 5 days across rolling sub-windows where a peak
in [5, 30] days exists.
- FAIL if std(τ*) > 10 days or peaks migrate outside [5, 30] days in majority
of windows.
### P4 — Geographic non-localisation
**Prediction:** Per Homola et al.'s own result, the correlation should be GLOBAL
(disappear in location-specific analyses). After BH FDR correction at q=0.05,
the number of significant (station, cell) pairs should NOT significantly exceed
the expected false-discovery count.
**Operationalisation:**
- PASS if n_significant ≤ 2 × expected_FP (BH q=0.05).
- FAIL if n_significant > 2 × expected_FP AND a clear geographic cluster emerges.
---
## Falsification criteria (pre-specified)
### F1 — No peak in claimed window
**Criterion:** No lag τ ∈ [5, 30] days has |r(τ)| exceeding the 95th percentile
of the phase-surrogate distribution.
- F1 TRIGGERED (Homola falsified) if the criterion holds across the full OOS
window AND across all 18-month sub-windows.
### F2 — Peak lag drift
**Criterion:** The optimal lag τ* for τ ∈ [5, 30] days drifts by more than
±10 days between any two adjacent 18-month rolling windows.
- F2 TRIGGERED if drift > 10 days in majority of window pairs.
### F3 — Unexpected geographic localisation
**Criterion:** The OOS correlation is STRONGER in a specific geographic region
than globally — the inverse of Homola's own finding.
- F3 TRIGGERED if n_significant > 3 × expected_FP AND a geographic cluster
with min p < BH-threshold is identified.
- This would be informative negative evidence: a real local effect, but NOT
the global cosmic-ray mechanism Homola proposed.
---
## Analysis decisions (pre-specified)
| Parameter | Value | Reason |
|---|---|---|
| Bin size | 5 days | Matches Homola et al. |
| Lag range | ±200 days | Covers claimed +15 d with context; shorter window makes ±1000 d infeasible |
| Surrogates | 100,000 | GPU-accelerated; MC tolerance ±0.0063 |
| Surrogate method | Phase randomisation | Preserves power spectrum; faster than IAAFT |
| Detrending | Linear + sunspot OLS | HP/STL inappropriate for <1 solar cycle window |
| Min stations/bin | 3 | Matches Homola et al. |
| Min magnitude | 4.0 | Matches Homola et al. |
| Rolling window | 18 months | Minimum for meaningful correlation at 5-day bins |
| Rolling step | 3 months | Smooth time evolution |
| FDR | BH q=0.05 | Standard |
---
*This file is part of a pre-registered analysis. Results are reported regardless
of direction in `results/out_of_sample_report.md`.*