149 lines
5.2 KiB
Python
149 lines
5.2 KiB
Python
import datetime
|
|
import functools
|
|
import json
|
|
from pathlib import Path
|
|
import typing
|
|
|
|
import dateutil.tz
|
|
import numpy as np
|
|
from scipy.spatial import cKDTree
|
|
from timezonefinder import TimezoneFinder
|
|
|
|
|
|
WX_DATA_LOCATION = Path(__file__).absolute().parent
|
|
WxStationIdType = str
|
|
MonthType = str
|
|
# HourlyTempType - 24 temperatures, one for each hour of the day (the average for the given month).
|
|
HourlyTempType = typing.List[float]
|
|
WxStationRecordType = typing.Tuple[WxStationIdType, str, float, float]
|
|
|
|
|
|
@functools.lru_cache()
|
|
def wx_data() -> typing.Dict[WxStationIdType, typing.Dict[MonthType, HourlyTempType]]:
|
|
"""
|
|
Load the weather data (temperature in kelvin).
|
|
|
|
The data is structured by station location, and for each station location, by month.
|
|
|
|
"""
|
|
with (WX_DATA_LOCATION / 'global_weather_set.json').open("r") as json_file:
|
|
data = json.load(json_file)
|
|
|
|
for station in list(data.keys()):
|
|
for month in list(data[station].keys()):
|
|
if not np.any(np.isnan(data[station][month])):
|
|
data[station][month] = tuple(
|
|
273.15 + np.array(data[station][month]))
|
|
return data
|
|
|
|
|
|
@functools.lru_cache()
|
|
def wx_station_data() -> typing.Dict[WxStationIdType, WxStationRecordType]:
|
|
"""
|
|
Return a dictionary of ``station-id: station records``, where station records
|
|
are of the form ``(station-id, station-name, station-latitude, station-longitude)``.
|
|
|
|
The stations returned are guaranteed to have valid weather data.
|
|
|
|
"""
|
|
weather_data = wx_data()
|
|
station_data = {}
|
|
fixed_delimits = [0, 12, 13, 44, 51, 60, 69, 90, 91]
|
|
station_file = WX_DATA_LOCATION / 'hadisd_station_fullinfo_v311_202001p.txt'
|
|
|
|
for line in station_file.open('rt'):
|
|
start_end_positions = zip(fixed_delimits[:-1], fixed_delimits[1:])
|
|
split_vals = [line[start:end] for start, end in start_end_positions]
|
|
station_location = (
|
|
split_vals[0], split_vals[2], float(split_vals[3]), float(split_vals[4]),
|
|
)
|
|
# We only consider stations with weather data, don't include the rest.
|
|
if split_vals[0] in weather_data:
|
|
station_data[split_vals[0]] = station_location
|
|
return station_data
|
|
|
|
|
|
@functools.lru_cache()
|
|
def _wx_station_kdtree() -> cKDTree:
|
|
"""Build a kd-tree of wx station longitude & latitudes (note the coordinate order)"""
|
|
station_data = wx_station_data().values()
|
|
coords = np.array([(stn_record[3], stn_record[2])
|
|
for stn_record in station_data])
|
|
return cKDTree(coords)
|
|
|
|
|
|
def mean_hourly_temperatures(wx_station: str, month: int) -> HourlyTempType:
|
|
"""
|
|
Return the mean monthly temperature for the given weather station and month.
|
|
|
|
Returns
|
|
-------
|
|
|
|
temperatures: List[24 floats]
|
|
A list containing 24 temperature values, one for each hour, in kelvin.
|
|
Index 0 of the result corresponds to hour 00:00 (UTC), and index 23 (the last) to 23:00 (UTC).
|
|
|
|
"""
|
|
# Note that the current dataset encodes month number as a string.
|
|
return wx_data()[wx_station][str(month)]
|
|
|
|
|
|
def timezone_at(*, latitude: float, longitude: float) -> datetime.tzinfo:
|
|
"""Find a timezone for the given location, or raise."""
|
|
tf = TimezoneFinder()
|
|
tz_name = tf.timezone_at(lat=latitude, lng=longitude)
|
|
tz = dateutil.tz.gettz(tz_name)
|
|
if tz_name is None or tz is None:
|
|
raise ValueError(
|
|
f"Unable to determine the timezone of given location "
|
|
f"(lat={latitude}, lng={longitude})"
|
|
)
|
|
return tz
|
|
|
|
|
|
def refine_hourly_data(source_times, hourly_data, npts):
|
|
"""
|
|
Given times (in hours), where each data point is on the hour,
|
|
interpolate the data to mid-point of the returned boundaries.
|
|
|
|
For example:
|
|
|
|
>>> time_bounds, data = refine_hourly_data(list(range(24)), list(range(24)), 24)
|
|
>>> len(time_bounds), len(data)
|
|
(25, 24)
|
|
>>> time_bounds
|
|
array([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.,
|
|
13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24.])
|
|
>>> data
|
|
array([ 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5,
|
|
11.5, 12.5, 13.5, 14.5, 15.5, 16.5, 17.5, 18.5, 19.5, 20.5, 21.5,
|
|
22.5, 11.5])
|
|
|
|
The source times need not be monotonic, which allows for data to be
|
|
time-offset shifted. For example:
|
|
|
|
>>> time_bounds, data = refine_hourly_data(
|
|
... list(range(6, 28)) + [4, 5], list(range(24)), 24)
|
|
>>> data
|
|
array([18.5, 19.5, 20.5, 21.5, 22.5, 11.5, 0.5, 1.5, 2.5, 3.5, 4.5,
|
|
5.5, 6.5, 7.5, 8.5, 9.5, 10.5, 11.5, 12.5, 13.5, 14.5, 15.5,
|
|
16.5, 17.5])
|
|
|
|
"""
|
|
target_time_boundaries, step = np.linspace(
|
|
0, 24, npts + 1, retstep=True, endpoint=True,
|
|
)
|
|
target_times = target_time_boundaries[:-1] + step / 2
|
|
data = np.interp(target_times, np.array(source_times), hourly_data, period=24)
|
|
return target_time_boundaries, data
|
|
|
|
|
|
def nearest_wx_station(*, longitude: float, latitude: float) -> WxStationRecordType:
|
|
"""
|
|
Given a latitude & longitude, return the nearest station with valid weather data.
|
|
|
|
"""
|
|
ktree = _wx_station_kdtree()
|
|
station_data = list(wx_station_data().values())
|
|
dd, ii = ktree.query((longitude, latitude), k=[1])
|
|
return station_data[ii[0]]
|