From 1832f73f74439d0303d568818fea3a112fd58a03 Mon Sep 17 00:00:00 2001 From: root Date: Wed, 22 Apr 2026 02:45:19 +0200 Subject: [PATCH] Add pre-registration and data availability report Pre-registered predictions written BEFORE any OOS analysis data is loaded (UTC 2026-04-22T00:44:30). OOS window: 2020-01-01 to 2025-04-29, constrained by NMDB reliable end date. 30 NMDB stations pass coverage threshold in this window. Co-Authored-By: Claude Sonnet 4.6 --- results/data_availability.json | 358 +++++++++++++++++++++++++++++++++ results/data_availability.txt | 38 ++++ results/prereg_predictions.md | 127 ++++++++++++ 3 files changed, 523 insertions(+) create mode 100644 results/data_availability.json create mode 100644 results/data_availability.txt create mode 100644 results/prereg_predictions.md diff --git a/results/data_availability.json b/results/data_availability.json new file mode 100644 index 0000000..5c8dbe9 --- /dev/null +++ b/results/data_availability.json @@ -0,0 +1,358 @@ +{ + "run_date": "2026-04-22", + "oos_start": "2020-01-01", + "oos_end": "2025-04-29", + "constrained_by": "NMDB", + "nmdb_reliable_end": "2025-04-29", + "usgs_reliable_end": "2026-03-08", + "sidc_reliable_end": "2026-03-23", + "good_stations_oos": [ + "AATB", + "APTY", + "ATHN", + "BKSN", + "DOMB", + "DOMC", + "FSMT", + "INVK", + "IRK3", + "IRKT", + "JUNG", + "JUNG1", + "KERG", + "KIEL2", + "LMKS", + "MOSC", + "MRNY", + "MXCO", + "NAIN", + "NANM", + "NEWK", + "NRLK", + "OULU", + "PSNM", + "PWNK", + "ROME", + "SOPB", + "SOPO", + "TERA", + "THUL" + ], + "flagged_stations": [ + "AATA", + "DJON", + "DRBS", + "FSMT", + "HRMS", + "INVK", + "IRK2", + "IRK3", + "KERG", + "KIEL2", + "LMKS", + "MOSC", + "MRNY", + "MXCO", + "NAIN", + "NANM", + "NEWK", + "NRLK", + "PTFM", + "PWNK", + "SNAE", + "SOPB", + "SOPO", + "TERA", + "THUL", + "TSMB" + ], + "nmdb_station_detail": { + "AATA": { + "status": "low_coverage", + "coverage_oos": 0.2691, + "last_date": "2022-09-14", + "gap_days": 1316 + }, + "AATB": { + "status": "ok", + "coverage_oos": 0.947, + "last_date": "2026-04-20", + "gap_days": 2 + }, + "APTY": { + "status": "ok", + "coverage_oos": 0.9978, + "last_date": "2026-04-21", + "gap_days": 1 + }, + "ARNM": { + "status": "no_data", + "coverage_oos": 0.0, + "last_date": null, + "gap_days": null + }, + "ATHN": { + "status": "ok", + "coverage_oos": 0.8967, + "last_date": "2026-04-21", + "gap_days": 1 + }, + "BKSN": { + "status": "ok", + "coverage_oos": 0.9861, + "last_date": "2026-04-21", + "gap_days": 1 + }, + "CALM": { + "status": "no_data", + "coverage_oos": 0.0, + "last_date": null, + "gap_days": null + }, + "DJON": { + "status": "low_coverage", + "coverage_oos": 0.0499, + "last_date": "2021-12-17", + "gap_days": 1587 + }, + "DOMB": { + "status": "ok", + "coverage_oos": 0.8481, + "last_date": "2026-04-20", + "gap_days": 2 + }, + "DOMC": { + "status": "ok", + "coverage_oos": 0.9688, + "last_date": "2026-04-20", + "gap_days": 2 + }, + "DRBS": { + "status": "low_coverage", + "coverage_oos": 0.0226, + "last_date": "2020-02-21", + "gap_days": 2252 + }, + "FSMT": { + "status": "ok", + "coverage_oos": 0.8746, + "last_date": "2025-07-26", + "gap_days": 270 + }, + "HRMS": { + "status": "low_coverage", + "coverage_oos": 0.0013, + "last_date": "2021-06-01", + "gap_days": 1786 + }, + "INVK": { + "status": "ok", + "coverage_oos": 0.8763, + "last_date": "2025-07-26", + "gap_days": 270 + }, + "IRK2": { + "status": "low_coverage", + "coverage_oos": 0.0751, + "last_date": "2020-07-27", + "gap_days": 2095 + }, + "IRK3": { + "status": "ok", + "coverage_oos": 0.5799, + "last_date": "2025-05-22", + "gap_days": 335 + }, + "IRKT": { + "status": "ok", + "coverage_oos": 0.9015, + "last_date": "2026-04-21", + "gap_days": 1 + }, + "JBGO": { + "status": "low_coverage", + "coverage_oos": 0.3481, + "last_date": "2026-04-20", + "gap_days": 2 + }, + "JUNG": { + "status": "ok", + "coverage_oos": 0.9978, + "last_date": "2026-04-21", + "gap_days": 1 + }, + "JUNG1": { + "status": "ok", + "coverage_oos": 0.9957, + "last_date": "2026-04-21", + "gap_days": 1 + }, + "KERG": { + "status": "ok", + "coverage_oos": 0.9709, + "last_date": "2026-02-28", + "gap_days": 53 + }, + "KIEL": { + "status": "no_data", + "coverage_oos": 0.0, + "last_date": null, + "gap_days": null + }, + "KIEL2": { + "status": "ok", + "coverage_oos": 0.5929, + "last_date": "2023-10-03", + "gap_days": 932 + }, + "LMKS": { + "status": "ok", + "coverage_oos": 0.5586, + "last_date": "2023-07-10", + "gap_days": 1017 + }, + "MCRL": { + "status": "low_coverage", + "coverage_oos": 0.0, + "last_date": null, + "gap_days": null + }, + "MGDN": { + "status": "no_data", + "coverage_oos": 0.0, + "last_date": null, + "gap_days": null + }, + "MOSC": { + "status": "ok", + "coverage_oos": 0.8286, + "last_date": "2025-03-23", + "gap_days": 395 + }, + "MRNY": { + "status": "ok", + "coverage_oos": 0.776, + "last_date": "2025-02-28", + "gap_days": 418 + }, + "MXCO": { + "status": "ok", + "coverage_oos": 0.8819, + "last_date": "2025-10-13", + "gap_days": 191 + }, + "NAIN": { + "status": "ok", + "coverage_oos": 0.8516, + "last_date": "2025-05-29", + "gap_days": 328 + }, + "NANM": { + "status": "ok", + "coverage_oos": 0.5716, + "last_date": "2024-11-14", + "gap_days": 524 + }, + "NEWK": { + "status": "ok", + "coverage_oos": 0.8763, + "last_date": "2025-07-26", + "gap_days": 270 + }, + "NRLK": { + "status": "ok", + "coverage_oos": 0.6801, + "last_date": "2025-04-29", + "gap_days": 358 + }, + "OULU": { + "status": "ok", + "coverage_oos": 0.9983, + "last_date": "2026-04-21", + "gap_days": 1 + }, + "PSNM": { + "status": "ok", + "coverage_oos": 0.9514, + "last_date": "2026-04-19", + "gap_days": 3 + }, + "PTFM": { + "status": "low_coverage", + "coverage_oos": 0.0009, + "last_date": "2025-01-02", + "gap_days": 475 + }, + "PWNK": { + "status": "ok", + "coverage_oos": 0.8763, + "last_date": "2025-07-26", + "gap_days": 270 + }, + "ROME": { + "status": "ok", + "coverage_oos": 0.8485, + "last_date": "2026-04-21", + "gap_days": 1 + }, + "SNAE": { + "status": "low_coverage", + "coverage_oos": 0.0009, + "last_date": "2025-01-02", + "gap_days": 475 + }, + "SOPB": { + "status": "ok", + "coverage_oos": 0.8767, + "last_date": "2025-07-25", + "gap_days": 271 + }, + "SOPO": { + "status": "ok", + "coverage_oos": 0.8767, + "last_date": "2025-07-25", + "gap_days": 271 + }, + "TERA": { + "status": "ok", + "coverage_oos": 0.9466, + "last_date": "2026-02-28", + "gap_days": 53 + }, + "THUL": { + "status": "ok", + "coverage_oos": 0.8763, + "last_date": "2025-07-26", + "gap_days": 270 + }, + "TSMB": { + "status": "low_coverage", + "coverage_oos": 0.0004, + "last_date": "2020-05-01", + "gap_days": 2182 + } + }, + "usgs_detail": { + "status": "ok", + "years_present": [ + 2020, + 2021, + 2022, + 2023, + 2024, + 2025, + 2026 + ], + "total_events": 47860, + "reliable_end": "2026-03-08", + "note": "Catalogue stability: complete within ~30 days; using today-45 days" + }, + "sidc_detail": { + "status": "ok", + "last_date": "2026-03-31", + "definitive_end": "2026-03-31", + "reliable_end": "2026-03-23", + "n_records": 76061, + "note": "Using provisional values (prov=1); definitive lag ~6 months" + } +} \ No newline at end of file diff --git a/results/data_availability.txt b/results/data_availability.txt new file mode 100644 index 0000000..0d49aca --- /dev/null +++ b/results/data_availability.txt @@ -0,0 +1,38 @@ +OUT-OF-SAMPLE DATA AVAILABILITY REPORT +Run date: 2026-04-22 +OOS window: 2020-01-01 → 2025-04-29 (constrained by NMDB) + +NMDB stations with ≥50% OOS coverage: 30 + AATB coverage= 94.7% last=2026-04-20 + APTY coverage= 99.8% last=2026-04-21 + ATHN coverage= 89.7% last=2026-04-21 + BKSN coverage= 98.6% last=2026-04-21 + DOMB coverage= 84.8% last=2026-04-20 + DOMC coverage= 96.9% last=2026-04-20 + FSMT coverage= 87.5% last=2025-07-26 + INVK coverage= 87.6% last=2025-07-26 + IRK3 coverage= 58.0% last=2025-05-22 + IRKT coverage= 90.1% last=2026-04-21 + JUNG coverage= 99.8% last=2026-04-21 + JUNG1 coverage= 99.6% last=2026-04-21 + KERG coverage= 97.1% last=2026-02-28 + KIEL2 coverage= 59.3% last=2023-10-03 + LMKS coverage= 55.9% last=2023-07-10 + MOSC coverage= 82.9% last=2025-03-23 + MRNY coverage= 77.6% last=2025-02-28 + MXCO coverage= 88.2% last=2025-10-13 + NAIN coverage= 85.2% last=2025-05-29 + NANM coverage= 57.2% last=2024-11-14 + NEWK coverage= 87.6% last=2025-07-26 + NRLK coverage= 68.0% last=2025-04-29 + OULU coverage= 99.8% last=2026-04-21 + PSNM coverage= 95.1% last=2026-04-19 + PWNK coverage= 87.6% last=2025-07-26 + ROME coverage= 84.9% last=2026-04-21 + SOPB coverage= 87.7% last=2025-07-25 + SOPO coverage= 87.7% last=2025-07-25 + TERA coverage= 94.7% last=2026-02-28 + THUL coverage= 87.6% last=2025-07-26 + +USGS: years available = [2020, 2021, 2022, 2023, 2024, 2025, 2026] events = 47,860 +SIDC: last_date = 2026-03-31 (provisional note: Using provisional values (prov=1); definitive lag ~6 months) \ No newline at end of file diff --git a/results/prereg_predictions.md b/results/prereg_predictions.md new file mode 100644 index 0000000..24dd226 --- /dev/null +++ b/results/prereg_predictions.md @@ -0,0 +1,127 @@ +# Pre-Registered Predictions — Out-of-Sample CR–Seismic Validation + +**Written:** 2026-04-22T00:44:30Z +**Git SHA:** unknown +**OOS window:** 2020-01-01 → 2025-04-29 +**Surrogates:** 100,000 phase-randomisation + +This file was created BEFORE loading or analysing any out-of-sample data. +All thresholds are pre-specified. Results are recorded in +`results/out_of_sample_report.md`. + +--- + +## In-sample context (1976–2019) + +From scripts 02–05 (Homola replication + stress tests): + +| Quantity | Value | +|---|---| +| Dominant peak lag (raw) | −525 days (half solar cycle) | +| Dominant peak \|r\| (raw) | 0.469 | +| r(τ=+15 d) raw | +0.310 (solar-cycle confounded) | +| r(τ=+15 d) HP-detrended | +0.041 | +| In-sample p_global (IAAFT, raw) | 1.000 (NOT significant after surrogate correction) | +| After detrending | p < 0.001 at lags ≠ +15 d | + +The in-sample dominant peak is at −525 days, not at the claimed +15 days. +r(+15 d) ≈ 0.04 after solar-cycle removal — this is the baseline expectation +for the out-of-sample window. + +--- + +## Pre-registered predictions + +### P1 — Sign and location of claimed correlation peak +**Prediction:** If Homola et al.'s mechanism is real, the OOS window should show +a cross-correlation peak at τ ≈ +15 days (cosmic rays leading seismic activity +by 15 days) with **positive sign** (positive CR deviation → elevated seismic +Mw-sum 15 days later). + +**Operationalisation:** +- PASS if r(τ=+15 d) > 0 AND the lag of maximum |r(τ)| for τ ∈ [5, 30] days + is within ±3 days of +15 days. +- FAIL otherwise. + +**Baseline from in-sample HP-detrended:** r(+15 d) ≈ +0.041 +**Monte Carlo tolerance (at 100,000 surrogates):** ±0.0063 + +### P2 — Significance and solar-phase trend +**Prediction:** The OOS window (2020–2025) covers Solar Cycle 25 +rising phase, approaching the predicted 2025–2027 solar maximum. Homola's +model predicts the CR–seismic correlation should be in a RISING phase of its +~11-year envelope (the last in-sample envelope peak was near 2014). + +**Operationalisation:** +- PASS if: (a) p_global (phase-surrogate) < 0.05, AND + (b) r(τ=+15 d) in rolling 18-month windows shows a non-negative trend + (slope ≥ 0) across the OOS period. +- PARTIAL if (a) holds but (b) does not. +- FAIL if p_global ≥ 0.05. + +### P3 — Rolling-window lag stability +**Prediction:** The lag at which r(τ) is maximised for τ ∈ [5, 30] days should +be stable to within ±3 days across 18-month rolling windows of the OOS data. + +**Operationalisation:** +- PASS if std(τ*) ≤ 5 days across rolling sub-windows where a peak + in [5, 30] days exists. +- FAIL if std(τ*) > 10 days or peaks migrate outside [5, 30] days in majority + of windows. + +### P4 — Geographic non-localisation +**Prediction:** Per Homola et al.'s own result, the correlation should be GLOBAL +(disappear in location-specific analyses). After BH FDR correction at q=0.05, +the number of significant (station, cell) pairs should NOT significantly exceed +the expected false-discovery count. + +**Operationalisation:** +- PASS if n_significant ≤ 2 × expected_FP (BH q=0.05). +- FAIL if n_significant > 2 × expected_FP AND a clear geographic cluster emerges. + +--- + +## Falsification criteria (pre-specified) + +### F1 — No peak in claimed window +**Criterion:** No lag τ ∈ [5, 30] days has |r(τ)| exceeding the 95th percentile +of the phase-surrogate distribution. + +- F1 TRIGGERED (Homola falsified) if the criterion holds across the full OOS + window AND across all 18-month sub-windows. + +### F2 — Peak lag drift +**Criterion:** The optimal lag τ* for τ ∈ [5, 30] days drifts by more than +±10 days between any two adjacent 18-month rolling windows. + +- F2 TRIGGERED if drift > 10 days in majority of window pairs. + +### F3 — Unexpected geographic localisation +**Criterion:** The OOS correlation is STRONGER in a specific geographic region +than globally — the inverse of Homola's own finding. + +- F3 TRIGGERED if n_significant > 3 × expected_FP AND a geographic cluster + with min p < BH-threshold is identified. +- This would be informative negative evidence: a real local effect, but NOT + the global cosmic-ray mechanism Homola proposed. + +--- + +## Analysis decisions (pre-specified) + +| Parameter | Value | Reason | +|---|---|---| +| Bin size | 5 days | Matches Homola et al. | +| Lag range | ±200 days | Covers claimed +15 d with context; shorter window makes ±1000 d infeasible | +| Surrogates | 100,000 | GPU-accelerated; MC tolerance ±0.0063 | +| Surrogate method | Phase randomisation | Preserves power spectrum; faster than IAAFT | +| Detrending | Linear + sunspot OLS | HP/STL inappropriate for <1 solar cycle window | +| Min stations/bin | 3 | Matches Homola et al. | +| Min magnitude | 4.0 | Matches Homola et al. | +| Rolling window | 18 months | Minimum for meaningful correlation at 5-day bins | +| Rolling step | 3 months | Smooth time evolution | +| FDR | BH q=0.05 | Standard | + +--- +*This file is part of a pre-registered analysis. Results are reported regardless +of direction in `results/out_of_sample_report.md`.*