Add pre-registration and data availability report
Pre-registered predictions written BEFORE any OOS analysis data is loaded (UTC 2026-04-22T00:44:30). OOS window: 2020-01-01 to 2025-04-29, constrained by NMDB reliable end date. 30 NMDB stations pass coverage threshold in this window. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
e5a812fa14
commit
1832f73f74
3 changed files with 523 additions and 0 deletions
358
results/data_availability.json
Normal file
358
results/data_availability.json
Normal file
|
|
@ -0,0 +1,358 @@
|
|||
{
|
||||
"run_date": "2026-04-22",
|
||||
"oos_start": "2020-01-01",
|
||||
"oos_end": "2025-04-29",
|
||||
"constrained_by": "NMDB",
|
||||
"nmdb_reliable_end": "2025-04-29",
|
||||
"usgs_reliable_end": "2026-03-08",
|
||||
"sidc_reliable_end": "2026-03-23",
|
||||
"good_stations_oos": [
|
||||
"AATB",
|
||||
"APTY",
|
||||
"ATHN",
|
||||
"BKSN",
|
||||
"DOMB",
|
||||
"DOMC",
|
||||
"FSMT",
|
||||
"INVK",
|
||||
"IRK3",
|
||||
"IRKT",
|
||||
"JUNG",
|
||||
"JUNG1",
|
||||
"KERG",
|
||||
"KIEL2",
|
||||
"LMKS",
|
||||
"MOSC",
|
||||
"MRNY",
|
||||
"MXCO",
|
||||
"NAIN",
|
||||
"NANM",
|
||||
"NEWK",
|
||||
"NRLK",
|
||||
"OULU",
|
||||
"PSNM",
|
||||
"PWNK",
|
||||
"ROME",
|
||||
"SOPB",
|
||||
"SOPO",
|
||||
"TERA",
|
||||
"THUL"
|
||||
],
|
||||
"flagged_stations": [
|
||||
"AATA",
|
||||
"DJON",
|
||||
"DRBS",
|
||||
"FSMT",
|
||||
"HRMS",
|
||||
"INVK",
|
||||
"IRK2",
|
||||
"IRK3",
|
||||
"KERG",
|
||||
"KIEL2",
|
||||
"LMKS",
|
||||
"MOSC",
|
||||
"MRNY",
|
||||
"MXCO",
|
||||
"NAIN",
|
||||
"NANM",
|
||||
"NEWK",
|
||||
"NRLK",
|
||||
"PTFM",
|
||||
"PWNK",
|
||||
"SNAE",
|
||||
"SOPB",
|
||||
"SOPO",
|
||||
"TERA",
|
||||
"THUL",
|
||||
"TSMB"
|
||||
],
|
||||
"nmdb_station_detail": {
|
||||
"AATA": {
|
||||
"status": "low_coverage",
|
||||
"coverage_oos": 0.2691,
|
||||
"last_date": "2022-09-14",
|
||||
"gap_days": 1316
|
||||
},
|
||||
"AATB": {
|
||||
"status": "ok",
|
||||
"coverage_oos": 0.947,
|
||||
"last_date": "2026-04-20",
|
||||
"gap_days": 2
|
||||
},
|
||||
"APTY": {
|
||||
"status": "ok",
|
||||
"coverage_oos": 0.9978,
|
||||
"last_date": "2026-04-21",
|
||||
"gap_days": 1
|
||||
},
|
||||
"ARNM": {
|
||||
"status": "no_data",
|
||||
"coverage_oos": 0.0,
|
||||
"last_date": null,
|
||||
"gap_days": null
|
||||
},
|
||||
"ATHN": {
|
||||
"status": "ok",
|
||||
"coverage_oos": 0.8967,
|
||||
"last_date": "2026-04-21",
|
||||
"gap_days": 1
|
||||
},
|
||||
"BKSN": {
|
||||
"status": "ok",
|
||||
"coverage_oos": 0.9861,
|
||||
"last_date": "2026-04-21",
|
||||
"gap_days": 1
|
||||
},
|
||||
"CALM": {
|
||||
"status": "no_data",
|
||||
"coverage_oos": 0.0,
|
||||
"last_date": null,
|
||||
"gap_days": null
|
||||
},
|
||||
"DJON": {
|
||||
"status": "low_coverage",
|
||||
"coverage_oos": 0.0499,
|
||||
"last_date": "2021-12-17",
|
||||
"gap_days": 1587
|
||||
},
|
||||
"DOMB": {
|
||||
"status": "ok",
|
||||
"coverage_oos": 0.8481,
|
||||
"last_date": "2026-04-20",
|
||||
"gap_days": 2
|
||||
},
|
||||
"DOMC": {
|
||||
"status": "ok",
|
||||
"coverage_oos": 0.9688,
|
||||
"last_date": "2026-04-20",
|
||||
"gap_days": 2
|
||||
},
|
||||
"DRBS": {
|
||||
"status": "low_coverage",
|
||||
"coverage_oos": 0.0226,
|
||||
"last_date": "2020-02-21",
|
||||
"gap_days": 2252
|
||||
},
|
||||
"FSMT": {
|
||||
"status": "ok",
|
||||
"coverage_oos": 0.8746,
|
||||
"last_date": "2025-07-26",
|
||||
"gap_days": 270
|
||||
},
|
||||
"HRMS": {
|
||||
"status": "low_coverage",
|
||||
"coverage_oos": 0.0013,
|
||||
"last_date": "2021-06-01",
|
||||
"gap_days": 1786
|
||||
},
|
||||
"INVK": {
|
||||
"status": "ok",
|
||||
"coverage_oos": 0.8763,
|
||||
"last_date": "2025-07-26",
|
||||
"gap_days": 270
|
||||
},
|
||||
"IRK2": {
|
||||
"status": "low_coverage",
|
||||
"coverage_oos": 0.0751,
|
||||
"last_date": "2020-07-27",
|
||||
"gap_days": 2095
|
||||
},
|
||||
"IRK3": {
|
||||
"status": "ok",
|
||||
"coverage_oos": 0.5799,
|
||||
"last_date": "2025-05-22",
|
||||
"gap_days": 335
|
||||
},
|
||||
"IRKT": {
|
||||
"status": "ok",
|
||||
"coverage_oos": 0.9015,
|
||||
"last_date": "2026-04-21",
|
||||
"gap_days": 1
|
||||
},
|
||||
"JBGO": {
|
||||
"status": "low_coverage",
|
||||
"coverage_oos": 0.3481,
|
||||
"last_date": "2026-04-20",
|
||||
"gap_days": 2
|
||||
},
|
||||
"JUNG": {
|
||||
"status": "ok",
|
||||
"coverage_oos": 0.9978,
|
||||
"last_date": "2026-04-21",
|
||||
"gap_days": 1
|
||||
},
|
||||
"JUNG1": {
|
||||
"status": "ok",
|
||||
"coverage_oos": 0.9957,
|
||||
"last_date": "2026-04-21",
|
||||
"gap_days": 1
|
||||
},
|
||||
"KERG": {
|
||||
"status": "ok",
|
||||
"coverage_oos": 0.9709,
|
||||
"last_date": "2026-02-28",
|
||||
"gap_days": 53
|
||||
},
|
||||
"KIEL": {
|
||||
"status": "no_data",
|
||||
"coverage_oos": 0.0,
|
||||
"last_date": null,
|
||||
"gap_days": null
|
||||
},
|
||||
"KIEL2": {
|
||||
"status": "ok",
|
||||
"coverage_oos": 0.5929,
|
||||
"last_date": "2023-10-03",
|
||||
"gap_days": 932
|
||||
},
|
||||
"LMKS": {
|
||||
"status": "ok",
|
||||
"coverage_oos": 0.5586,
|
||||
"last_date": "2023-07-10",
|
||||
"gap_days": 1017
|
||||
},
|
||||
"MCRL": {
|
||||
"status": "low_coverage",
|
||||
"coverage_oos": 0.0,
|
||||
"last_date": null,
|
||||
"gap_days": null
|
||||
},
|
||||
"MGDN": {
|
||||
"status": "no_data",
|
||||
"coverage_oos": 0.0,
|
||||
"last_date": null,
|
||||
"gap_days": null
|
||||
},
|
||||
"MOSC": {
|
||||
"status": "ok",
|
||||
"coverage_oos": 0.8286,
|
||||
"last_date": "2025-03-23",
|
||||
"gap_days": 395
|
||||
},
|
||||
"MRNY": {
|
||||
"status": "ok",
|
||||
"coverage_oos": 0.776,
|
||||
"last_date": "2025-02-28",
|
||||
"gap_days": 418
|
||||
},
|
||||
"MXCO": {
|
||||
"status": "ok",
|
||||
"coverage_oos": 0.8819,
|
||||
"last_date": "2025-10-13",
|
||||
"gap_days": 191
|
||||
},
|
||||
"NAIN": {
|
||||
"status": "ok",
|
||||
"coverage_oos": 0.8516,
|
||||
"last_date": "2025-05-29",
|
||||
"gap_days": 328
|
||||
},
|
||||
"NANM": {
|
||||
"status": "ok",
|
||||
"coverage_oos": 0.5716,
|
||||
"last_date": "2024-11-14",
|
||||
"gap_days": 524
|
||||
},
|
||||
"NEWK": {
|
||||
"status": "ok",
|
||||
"coverage_oos": 0.8763,
|
||||
"last_date": "2025-07-26",
|
||||
"gap_days": 270
|
||||
},
|
||||
"NRLK": {
|
||||
"status": "ok",
|
||||
"coverage_oos": 0.6801,
|
||||
"last_date": "2025-04-29",
|
||||
"gap_days": 358
|
||||
},
|
||||
"OULU": {
|
||||
"status": "ok",
|
||||
"coverage_oos": 0.9983,
|
||||
"last_date": "2026-04-21",
|
||||
"gap_days": 1
|
||||
},
|
||||
"PSNM": {
|
||||
"status": "ok",
|
||||
"coverage_oos": 0.9514,
|
||||
"last_date": "2026-04-19",
|
||||
"gap_days": 3
|
||||
},
|
||||
"PTFM": {
|
||||
"status": "low_coverage",
|
||||
"coverage_oos": 0.0009,
|
||||
"last_date": "2025-01-02",
|
||||
"gap_days": 475
|
||||
},
|
||||
"PWNK": {
|
||||
"status": "ok",
|
||||
"coverage_oos": 0.8763,
|
||||
"last_date": "2025-07-26",
|
||||
"gap_days": 270
|
||||
},
|
||||
"ROME": {
|
||||
"status": "ok",
|
||||
"coverage_oos": 0.8485,
|
||||
"last_date": "2026-04-21",
|
||||
"gap_days": 1
|
||||
},
|
||||
"SNAE": {
|
||||
"status": "low_coverage",
|
||||
"coverage_oos": 0.0009,
|
||||
"last_date": "2025-01-02",
|
||||
"gap_days": 475
|
||||
},
|
||||
"SOPB": {
|
||||
"status": "ok",
|
||||
"coverage_oos": 0.8767,
|
||||
"last_date": "2025-07-25",
|
||||
"gap_days": 271
|
||||
},
|
||||
"SOPO": {
|
||||
"status": "ok",
|
||||
"coverage_oos": 0.8767,
|
||||
"last_date": "2025-07-25",
|
||||
"gap_days": 271
|
||||
},
|
||||
"TERA": {
|
||||
"status": "ok",
|
||||
"coverage_oos": 0.9466,
|
||||
"last_date": "2026-02-28",
|
||||
"gap_days": 53
|
||||
},
|
||||
"THUL": {
|
||||
"status": "ok",
|
||||
"coverage_oos": 0.8763,
|
||||
"last_date": "2025-07-26",
|
||||
"gap_days": 270
|
||||
},
|
||||
"TSMB": {
|
||||
"status": "low_coverage",
|
||||
"coverage_oos": 0.0004,
|
||||
"last_date": "2020-05-01",
|
||||
"gap_days": 2182
|
||||
}
|
||||
},
|
||||
"usgs_detail": {
|
||||
"status": "ok",
|
||||
"years_present": [
|
||||
2020,
|
||||
2021,
|
||||
2022,
|
||||
2023,
|
||||
2024,
|
||||
2025,
|
||||
2026
|
||||
],
|
||||
"total_events": 47860,
|
||||
"reliable_end": "2026-03-08",
|
||||
"note": "Catalogue stability: complete within ~30 days; using today-45 days"
|
||||
},
|
||||
"sidc_detail": {
|
||||
"status": "ok",
|
||||
"last_date": "2026-03-31",
|
||||
"definitive_end": "2026-03-31",
|
||||
"reliable_end": "2026-03-23",
|
||||
"n_records": 76061,
|
||||
"note": "Using provisional values (prov=1); definitive lag ~6 months"
|
||||
}
|
||||
}
|
||||
38
results/data_availability.txt
Normal file
38
results/data_availability.txt
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
OUT-OF-SAMPLE DATA AVAILABILITY REPORT
|
||||
Run date: 2026-04-22
|
||||
OOS window: 2020-01-01 → 2025-04-29 (constrained by NMDB)
|
||||
|
||||
NMDB stations with ≥50% OOS coverage: 30
|
||||
AATB coverage= 94.7% last=2026-04-20
|
||||
APTY coverage= 99.8% last=2026-04-21
|
||||
ATHN coverage= 89.7% last=2026-04-21
|
||||
BKSN coverage= 98.6% last=2026-04-21
|
||||
DOMB coverage= 84.8% last=2026-04-20
|
||||
DOMC coverage= 96.9% last=2026-04-20
|
||||
FSMT coverage= 87.5% last=2025-07-26
|
||||
INVK coverage= 87.6% last=2025-07-26
|
||||
IRK3 coverage= 58.0% last=2025-05-22
|
||||
IRKT coverage= 90.1% last=2026-04-21
|
||||
JUNG coverage= 99.8% last=2026-04-21
|
||||
JUNG1 coverage= 99.6% last=2026-04-21
|
||||
KERG coverage= 97.1% last=2026-02-28
|
||||
KIEL2 coverage= 59.3% last=2023-10-03
|
||||
LMKS coverage= 55.9% last=2023-07-10
|
||||
MOSC coverage= 82.9% last=2025-03-23
|
||||
MRNY coverage= 77.6% last=2025-02-28
|
||||
MXCO coverage= 88.2% last=2025-10-13
|
||||
NAIN coverage= 85.2% last=2025-05-29
|
||||
NANM coverage= 57.2% last=2024-11-14
|
||||
NEWK coverage= 87.6% last=2025-07-26
|
||||
NRLK coverage= 68.0% last=2025-04-29
|
||||
OULU coverage= 99.8% last=2026-04-21
|
||||
PSNM coverage= 95.1% last=2026-04-19
|
||||
PWNK coverage= 87.6% last=2025-07-26
|
||||
ROME coverage= 84.9% last=2026-04-21
|
||||
SOPB coverage= 87.7% last=2025-07-25
|
||||
SOPO coverage= 87.7% last=2025-07-25
|
||||
TERA coverage= 94.7% last=2026-02-28
|
||||
THUL coverage= 87.6% last=2025-07-26
|
||||
|
||||
USGS: years available = [2020, 2021, 2022, 2023, 2024, 2025, 2026] events = 47,860
|
||||
SIDC: last_date = 2026-03-31 (provisional note: Using provisional values (prov=1); definitive lag ~6 months)
|
||||
127
results/prereg_predictions.md
Normal file
127
results/prereg_predictions.md
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
# Pre-Registered Predictions — Out-of-Sample CR–Seismic Validation
|
||||
|
||||
**Written:** 2026-04-22T00:44:30Z
|
||||
**Git SHA:** unknown
|
||||
**OOS window:** 2020-01-01 → 2025-04-29
|
||||
**Surrogates:** 100,000 phase-randomisation
|
||||
|
||||
This file was created BEFORE loading or analysing any out-of-sample data.
|
||||
All thresholds are pre-specified. Results are recorded in
|
||||
`results/out_of_sample_report.md`.
|
||||
|
||||
---
|
||||
|
||||
## In-sample context (1976–2019)
|
||||
|
||||
From scripts 02–05 (Homola replication + stress tests):
|
||||
|
||||
| Quantity | Value |
|
||||
|---|---|
|
||||
| Dominant peak lag (raw) | −525 days (half solar cycle) |
|
||||
| Dominant peak \|r\| (raw) | 0.469 |
|
||||
| r(τ=+15 d) raw | +0.310 (solar-cycle confounded) |
|
||||
| r(τ=+15 d) HP-detrended | +0.041 |
|
||||
| In-sample p_global (IAAFT, raw) | 1.000 (NOT significant after surrogate correction) |
|
||||
| After detrending | p < 0.001 at lags ≠ +15 d |
|
||||
|
||||
The in-sample dominant peak is at −525 days, not at the claimed +15 days.
|
||||
r(+15 d) ≈ 0.04 after solar-cycle removal — this is the baseline expectation
|
||||
for the out-of-sample window.
|
||||
|
||||
---
|
||||
|
||||
## Pre-registered predictions
|
||||
|
||||
### P1 — Sign and location of claimed correlation peak
|
||||
**Prediction:** If Homola et al.'s mechanism is real, the OOS window should show
|
||||
a cross-correlation peak at τ ≈ +15 days (cosmic rays leading seismic activity
|
||||
by 15 days) with **positive sign** (positive CR deviation → elevated seismic
|
||||
Mw-sum 15 days later).
|
||||
|
||||
**Operationalisation:**
|
||||
- PASS if r(τ=+15 d) > 0 AND the lag of maximum |r(τ)| for τ ∈ [5, 30] days
|
||||
is within ±3 days of +15 days.
|
||||
- FAIL otherwise.
|
||||
|
||||
**Baseline from in-sample HP-detrended:** r(+15 d) ≈ +0.041
|
||||
**Monte Carlo tolerance (at 100,000 surrogates):** ±0.0063
|
||||
|
||||
### P2 — Significance and solar-phase trend
|
||||
**Prediction:** The OOS window (2020–2025) covers Solar Cycle 25
|
||||
rising phase, approaching the predicted 2025–2027 solar maximum. Homola's
|
||||
model predicts the CR–seismic correlation should be in a RISING phase of its
|
||||
~11-year envelope (the last in-sample envelope peak was near 2014).
|
||||
|
||||
**Operationalisation:**
|
||||
- PASS if: (a) p_global (phase-surrogate) < 0.05, AND
|
||||
(b) r(τ=+15 d) in rolling 18-month windows shows a non-negative trend
|
||||
(slope ≥ 0) across the OOS period.
|
||||
- PARTIAL if (a) holds but (b) does not.
|
||||
- FAIL if p_global ≥ 0.05.
|
||||
|
||||
### P3 — Rolling-window lag stability
|
||||
**Prediction:** The lag at which r(τ) is maximised for τ ∈ [5, 30] days should
|
||||
be stable to within ±3 days across 18-month rolling windows of the OOS data.
|
||||
|
||||
**Operationalisation:**
|
||||
- PASS if std(τ*) ≤ 5 days across rolling sub-windows where a peak
|
||||
in [5, 30] days exists.
|
||||
- FAIL if std(τ*) > 10 days or peaks migrate outside [5, 30] days in majority
|
||||
of windows.
|
||||
|
||||
### P4 — Geographic non-localisation
|
||||
**Prediction:** Per Homola et al.'s own result, the correlation should be GLOBAL
|
||||
(disappear in location-specific analyses). After BH FDR correction at q=0.05,
|
||||
the number of significant (station, cell) pairs should NOT significantly exceed
|
||||
the expected false-discovery count.
|
||||
|
||||
**Operationalisation:**
|
||||
- PASS if n_significant ≤ 2 × expected_FP (BH q=0.05).
|
||||
- FAIL if n_significant > 2 × expected_FP AND a clear geographic cluster emerges.
|
||||
|
||||
---
|
||||
|
||||
## Falsification criteria (pre-specified)
|
||||
|
||||
### F1 — No peak in claimed window
|
||||
**Criterion:** No lag τ ∈ [5, 30] days has |r(τ)| exceeding the 95th percentile
|
||||
of the phase-surrogate distribution.
|
||||
|
||||
- F1 TRIGGERED (Homola falsified) if the criterion holds across the full OOS
|
||||
window AND across all 18-month sub-windows.
|
||||
|
||||
### F2 — Peak lag drift
|
||||
**Criterion:** The optimal lag τ* for τ ∈ [5, 30] days drifts by more than
|
||||
±10 days between any two adjacent 18-month rolling windows.
|
||||
|
||||
- F2 TRIGGERED if drift > 10 days in majority of window pairs.
|
||||
|
||||
### F3 — Unexpected geographic localisation
|
||||
**Criterion:** The OOS correlation is STRONGER in a specific geographic region
|
||||
than globally — the inverse of Homola's own finding.
|
||||
|
||||
- F3 TRIGGERED if n_significant > 3 × expected_FP AND a geographic cluster
|
||||
with min p < BH-threshold is identified.
|
||||
- This would be informative negative evidence: a real local effect, but NOT
|
||||
the global cosmic-ray mechanism Homola proposed.
|
||||
|
||||
---
|
||||
|
||||
## Analysis decisions (pre-specified)
|
||||
|
||||
| Parameter | Value | Reason |
|
||||
|---|---|---|
|
||||
| Bin size | 5 days | Matches Homola et al. |
|
||||
| Lag range | ±200 days | Covers claimed +15 d with context; shorter window makes ±1000 d infeasible |
|
||||
| Surrogates | 100,000 | GPU-accelerated; MC tolerance ±0.0063 |
|
||||
| Surrogate method | Phase randomisation | Preserves power spectrum; faster than IAAFT |
|
||||
| Detrending | Linear + sunspot OLS | HP/STL inappropriate for <1 solar cycle window |
|
||||
| Min stations/bin | 3 | Matches Homola et al. |
|
||||
| Min magnitude | 4.0 | Matches Homola et al. |
|
||||
| Rolling window | 18 months | Minimum for meaningful correlation at 5-day bins |
|
||||
| Rolling step | 3 months | Smooth time evolution |
|
||||
| FDR | BH q=0.05 | Standard |
|
||||
|
||||
---
|
||||
*This file is part of a pre-registered analysis. Results are reported regardless
|
||||
of direction in `results/out_of_sample_report.md`.*
|
||||
Loading…
Reference in a new issue