cara/cara/data/weather.py

149 lines
5.2 KiB
Python

import datetime
import functools
import json
from pathlib import Path
import typing
import dateutil.tz
import numpy as np
from scipy.spatial import cKDTree
from timezonefinder import TimezoneFinder
WX_DATA_LOCATION = Path(__file__).absolute().parent
WxStationIdType = str
MonthType = str
# HourlyTempType - 24 temperatures, one for each hour of the day (the average for the given month).
HourlyTempType = typing.List[float]
WxStationRecordType = typing.Tuple[WxStationIdType, str, float, float]
@functools.lru_cache()
def wx_data() -> typing.Dict[WxStationIdType, typing.Dict[MonthType, HourlyTempType]]:
"""
Load the weather data (temperature in kelvin).
The data is structured by station location, and for each station location, by month.
"""
with (WX_DATA_LOCATION / 'global_weather_set.json').open("r") as json_file:
data = json.load(json_file)
for station in list(data.keys()):
for month in list(data[station].keys()):
if not np.any(np.isnan(data[station][month])):
data[station][month] = tuple(
273.15 + np.array(data[station][month]))
return data
@functools.lru_cache()
def wx_station_data() -> typing.Dict[WxStationIdType, WxStationRecordType]:
"""
Return a dictionary of ``station-id: station records``, where station records
are of the form ``(station-id, station-name, station-latitude, station-longitude)``.
The stations returned are guaranteed to have valid weather data.
"""
weather_data = wx_data()
station_data = {}
fixed_delimits = [0, 12, 13, 44, 51, 60, 69, 90, 91]
station_file = WX_DATA_LOCATION / 'hadisd_station_fullinfo_v311_202001p.txt'
for line in station_file.open('rt'):
start_end_positions = zip(fixed_delimits[:-1], fixed_delimits[1:])
split_vals = [line[start:end] for start, end in start_end_positions]
station_location = (
split_vals[0], split_vals[2], float(split_vals[3]), float(split_vals[4]),
)
# We only consider stations with weather data, don't include the rest.
if split_vals[0] in weather_data:
station_data[split_vals[0]] = station_location
return station_data
@functools.lru_cache()
def _wx_station_kdtree() -> cKDTree:
"""Build a kd-tree of wx station longitude & latitudes (note the coordinate order)"""
station_data = wx_station_data().values()
coords = np.array([(stn_record[3], stn_record[2])
for stn_record in station_data])
return cKDTree(coords)
def mean_hourly_temperatures(wx_station: str, month: int) -> HourlyTempType:
"""
Return the mean monthly temperature for the given weather station and month.
Returns
-------
temperatures: List[24 floats]
A list containing 24 temperature values, one for each hour, in kelvin.
Index 0 of the result corresponds to hour 00:00 (UTC), and index 23 (the last) to 23:00 (UTC).
"""
# Note that the current dataset encodes month number as a string.
return wx_data()[wx_station][str(month)]
def timezone_at(*, latitude: float, longitude: float) -> datetime.tzinfo:
"""Find a timezone for the given location, or raise."""
tf = TimezoneFinder()
tz_name = tf.timezone_at(lat=latitude, lng=longitude)
tz = dateutil.tz.gettz(tz_name)
if tz_name is None or tz is None:
raise ValueError(
f"Unable to determine the timezone of given location "
f"(lat={latitude}, lng={longitude})"
)
return tz
def refine_hourly_data(source_times, hourly_data, npts):
"""
Given times (in hours), where each data point is on the hour,
interpolate the data to mid-point of the returned boundaries.
For example:
>>> time_bounds, data = refine_hourly_data(list(range(24)), list(range(24)), 24)
>>> len(time_bounds), len(data)
(25, 24)
>>> time_bounds
array([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.,
13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24.])
>>> data
array([ 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5,
11.5, 12.5, 13.5, 14.5, 15.5, 16.5, 17.5, 18.5, 19.5, 20.5, 21.5,
22.5, 11.5])
The source times need not be monotonic, which allows for data to be
time-offset shifted. For example:
>>> time_bounds, data = refine_hourly_data(
... list(range(6, 28)) + [4, 5], list(range(24)), 24)
>>> data
array([18.5, 19.5, 20.5, 21.5, 22.5, 11.5, 0.5, 1.5, 2.5, 3.5, 4.5,
5.5, 6.5, 7.5, 8.5, 9.5, 10.5, 11.5, 12.5, 13.5, 14.5, 15.5,
16.5, 17.5])
"""
target_time_boundaries, step = np.linspace(
0, 24, npts + 1, retstep=True, endpoint=True,
)
target_times = target_time_boundaries[:-1] + step / 2
data = np.interp(target_times, np.array(source_times), hourly_data, period=24)
return target_time_boundaries, data
def nearest_wx_station(*, longitude: float, latitude: float) -> WxStationRecordType:
"""
Given a latitude & longitude, return the nearest station with valid weather data.
"""
ktree = _wx_station_kdtree()
station_data = list(wx_station_data().values())
dd, ii = ktree.query((longitude, latitude), k=[1])
return station_data[ii[0]]