Scripts 01-08 implement the complete cosmic-ray/earthquake correlation analysis from data ingestion through out-of-sample validation and combined timeseries sinusoid fitting. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
66 lines
2.1 KiB
Python
66 lines
2.1 KiB
Python
"""Tests for src/crq/ingest/usgs.py"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import textwrap
|
|
from pathlib import Path
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
import pytest
|
|
|
|
from crq.ingest.usgs import compute_daily_seismic, parse_usgs_csv
|
|
|
|
|
|
USGS_CSV = textwrap.dedent("""\
|
|
time,latitude,longitude,depth,mag
|
|
2019-01-01T01:00:00.000Z,-55.877,-1.89,15,6.3
|
|
2019-01-01T05:30:00.000Z,35.556,-121.351,6,4.04
|
|
2019-01-02T12:00:00.000Z,43.7,84.542,15,5.7
|
|
""")
|
|
|
|
|
|
@pytest.fixture
|
|
def usgs_csv_file(tmp_path: Path) -> Path:
|
|
p = tmp_path / "usgs-2019.csv"
|
|
p.write_text(USGS_CSV)
|
|
return p
|
|
|
|
|
|
class TestParseUsgsCsv:
|
|
def test_row_count(self, usgs_csv_file: Path) -> None:
|
|
df = parse_usgs_csv(usgs_csv_file)
|
|
assert len(df) == 3
|
|
|
|
def test_index_is_tz_naive(self, usgs_csv_file: Path) -> None:
|
|
df = parse_usgs_csv(usgs_csv_file)
|
|
assert df.index.tz is None
|
|
|
|
def test_numeric_columns(self, usgs_csv_file: Path) -> None:
|
|
df = parse_usgs_csv(usgs_csv_file)
|
|
assert df["mag"].dtype == float
|
|
assert pd.api.types.is_numeric_dtype(df["depth"])
|
|
|
|
|
|
class TestDailySeismic:
|
|
def test_returns_series(self, usgs_csv_file: Path) -> None:
|
|
events = parse_usgs_csv(usgs_csv_file)
|
|
daily = compute_daily_seismic(events)
|
|
assert "mag" in daily.columns
|
|
|
|
def test_log_avg_is_not_arithmetic_mean(self, usgs_csv_file: Path) -> None:
|
|
"""The log-power average is always ≥ arithmetic mean for positive values."""
|
|
events = parse_usgs_csv(usgs_csv_file)
|
|
daily = compute_daily_seismic(events)
|
|
day1_mag = daily["mag"].iloc[0]
|
|
arith = (6.3 + 4.04) / 2
|
|
assert day1_mag >= arith
|
|
|
|
def test_no_events_day_is_nan(self) -> None:
|
|
"""Days with no events should be NaN, not 0."""
|
|
idx = pd.to_datetime(["2019-01-01T00:00:00"])
|
|
events = pd.DataFrame({"mag": [5.0]}, index=idx)
|
|
daily = compute_daily_seismic(events)
|
|
# 2019-01-02 should be NaN
|
|
if pd.Timestamp("2019-01-02") in daily.index:
|
|
assert np.isnan(daily.loc["2019-01-02", "mag"])
|