cosmicraysandearthquakes/tests/test_usgs.py
root e5a812fa14 Initial commit: full analysis pipeline source code
Scripts 01-08 implement the complete cosmic-ray/earthquake correlation
analysis from data ingestion through out-of-sample validation and
combined timeseries sinusoid fitting.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-22 02:45:10 +02:00

66 lines
2.1 KiB
Python

"""Tests for src/crq/ingest/usgs.py"""
from __future__ import annotations
import textwrap
from pathlib import Path
import numpy as np
import pandas as pd
import pytest
from crq.ingest.usgs import compute_daily_seismic, parse_usgs_csv
USGS_CSV = textwrap.dedent("""\
time,latitude,longitude,depth,mag
2019-01-01T01:00:00.000Z,-55.877,-1.89,15,6.3
2019-01-01T05:30:00.000Z,35.556,-121.351,6,4.04
2019-01-02T12:00:00.000Z,43.7,84.542,15,5.7
""")
@pytest.fixture
def usgs_csv_file(tmp_path: Path) -> Path:
p = tmp_path / "usgs-2019.csv"
p.write_text(USGS_CSV)
return p
class TestParseUsgsCsv:
def test_row_count(self, usgs_csv_file: Path) -> None:
df = parse_usgs_csv(usgs_csv_file)
assert len(df) == 3
def test_index_is_tz_naive(self, usgs_csv_file: Path) -> None:
df = parse_usgs_csv(usgs_csv_file)
assert df.index.tz is None
def test_numeric_columns(self, usgs_csv_file: Path) -> None:
df = parse_usgs_csv(usgs_csv_file)
assert df["mag"].dtype == float
assert pd.api.types.is_numeric_dtype(df["depth"])
class TestDailySeismic:
def test_returns_series(self, usgs_csv_file: Path) -> None:
events = parse_usgs_csv(usgs_csv_file)
daily = compute_daily_seismic(events)
assert "mag" in daily.columns
def test_log_avg_is_not_arithmetic_mean(self, usgs_csv_file: Path) -> None:
"""The log-power average is always ≥ arithmetic mean for positive values."""
events = parse_usgs_csv(usgs_csv_file)
daily = compute_daily_seismic(events)
day1_mag = daily["mag"].iloc[0]
arith = (6.3 + 4.04) / 2
assert day1_mag >= arith
def test_no_events_day_is_nan(self) -> None:
"""Days with no events should be NaN, not 0."""
idx = pd.to_datetime(["2019-01-01T00:00:00"])
events = pd.DataFrame({"mag": [5.0]}, index=idx)
daily = compute_daily_seismic(events)
# 2019-01-02 should be NaN
if pd.Timestamp("2019-01-02") in daily.index:
assert np.isnan(daily.loc["2019-01-02", "mag"])