Merge branch 'feature/model_vectorisation' into 'master'

Begin to introduce vectorisation in the model See merge request cara/cara!158
2021-04-06 18:21:14 +00:00 · 2021-04-06 18:21:14 +00:00 · 7152be47e6
commit 7152be47e6
parent 3d3127845c 7ee8276fba
8 changed files with 193 additions and 37 deletions
--- a/cara/models.py
+++ b/cara/models.py
@ -1,15 +1,60 @@
+"""
+This module implements the core CARA models.
+
+The CARA model is a flexible, object-oriented numerical model. It is designed
+to allow the user to swap-out and extend its various components. One of the
+major abstractions of the model is the distinction between virus concentration
+(:class:`ConcentrationModel`) and virus exposure (:class:`ExposureModel`).
+
+The concentration component is a recursive (on model time) model and therefore in order
+to optimise its execution certain layers of caching are implemented. This caching
+mandates that the models in this module, once instantiated, are immutable and
+deterministic (i.e. running the same model twice will result in the same answer).
+
+In order to apply stochastic / non-deterministic analyses therefore you must
+introduce the randomness before constructing the models themselves; the
+:mod:`cara.monte_carlo` module is a good example of doing this - that module uses
+the models defined here to allow you to construct a ConcentrationModel containing
+parameters which are expressed as probability distributions. Under the hood the
+``cara.monte_carlo.ConcentrationModel`` implementation simply samples all of those
+probability distributions to produce many instances of the deterministic model.
+
+The models in this module have been designed for flexibility above performance,
+particularly in the single-model case. By using the natural expressiveness of
+Python we benefit from a powerful, readable and extendable implementation. A
+useful feature of the implementation is that we are able to benefit from numpy
+vectorisation in the case of wanting to run multiple-parameterisations of the model
+at the same time. In order to benefit from this feature you must construct the models
+with an array of parameter values. The values must be either scalar, length 1 arrays,
+or length N arrays, where N is the number of parameterisations to run; N must be
+the same for all parameters of a single model.
+
+"""
 from dataclasses import dataclass
-import functools
 import numpy as np
 import typing

+if not typing.TYPE_CHECKING:
+    from memoization import cached
+else:
+    # Workaround issue https://github.com/lonelyenvoy/python-memoization/issues/18
+    # by providing a no-op cache decorator when type-checking.
+    cached = lambda *cached_args, **cached_kwargs: lambda function: function  # noqa
+
 from .dataclass_utils import nested_replace


+# Define types for items supporting vectorisation. In the future this may be replaced
+# by ``np.ndarray[<type>]`` once/if that syntax is supported. Note that vectorization
+# implies 1d arrays: multi-dimensional arrays are not supported.
+_VectorisedFloat = typing.Union[float, np.ndarray]
+_VectorisedInt = typing.Union[int, np.ndarray]
+
+
@dataclass(frozen=True)
 class Room:
-    # The total volume of the room
-    volume: float
+    #: The total volume of the room
+    volume: _VectorisedFloat


 Time_t = typing.TypeVar('Time_t', float, int)
@ -147,7 +192,7 @@ class _VentilationBase:
    def transition_times(self) -> typing.Set[float]:
        raise NotImplementedError("Subclass must implement")

-    def air_exchange(self, room: Room, time: float) -> float:
+    def air_exchange(self, room: Room, time: float) -> _VectorisedFloat:
        """
        Returns the rate at which air is being exchanged in the given room
        at a given time (in hours).
@ -186,13 +231,15 @@ class MultipleVentilation(_VentilationBase):
            transitions.update(ventilation.transition_times())
        return transitions

-    def air_exchange(self, room: Room, time: float) -> float:
+    def air_exchange(self, room: Room, time: float) -> _VectorisedFloat:
        """
        Returns the rate at which air is being exchanged in the given room
        at a given time (in hours).
        """
-        return sum([ventilation.air_exchange(room, time)
-                    for ventilation in self.ventilations])
+        return np.array([
+            ventilation.air_exchange(room, time)
+            for ventilation in self.ventilations
+        ]).sum(axis=0)


@dataclass(frozen=True)
@ -233,7 +280,7 @@ class WindowOpening(Ventilation):
        transitions.update(self.outside_temp.transition_times)
        return transitions

-    def air_exchange(self, room: Room, time: float) -> float:
+    def air_exchange(self, room: Room, time: float) -> _VectorisedFloat:
        # If the window is shut, no air is being exchanged.
        if not self.active.triggered(time):
            return 0.
@ -318,7 +365,7 @@ class HEPAFilter(Ventilation):
    # in m^3/h
    q_air_mech: float

-    def air_exchange(self, room: Room, time: float) -> float:
+    def air_exchange(self, room: Room, time: float) -> _VectorisedFloat:
        # If the HEPA is off, no air is being exchanged.
        if not self.active.triggered(time):
            return 0.
@ -335,7 +382,7 @@ class HVACMechanical(Ventilation):
    # in m^3/h
    q_air_mech: float

-    def air_exchange(self, room: Room, time: float) -> float:
+    def air_exchange(self, room: Room, time: float) -> _VectorisedFloat:
        # If the HVAC is off, no air is being exchanged.
        if not self.active.triggered(time):
            return 0.
@ -350,9 +397,9 @@ class AirChange(Ventilation):

    #: The rate (in h^-1) at which the ventilation exchanges all the air
    # of the room (when switched on)
-    air_exch: float
+    air_exch: _VectorisedFloat

-    def air_exchange(self, room: Room, time: float) -> float:
+    def air_exchange(self, room: Room, time: float) -> _VectorisedFloat:
        # No dependence on the room volume.
        # If off, no air is being exchanged.
        if not self.active.triggered(time):
@ -364,19 +411,19 @@ class AirChange(Ventilation):
@dataclass(frozen=True)
 class Virus:
    #: Biological decay (inactivation of the virus in air)
-    halflife: float
+    halflife: _VectorisedFloat

    #: RNA copies  / mL
-    viral_load_in_sputum: float
+    viral_load_in_sputum: _VectorisedFloat

    #: Ratio between infectious aerosols and dose to cause infection.
-    coefficient_of_infectivity: float
+    coefficient_of_infectivity: _VectorisedFloat

    #: Pre-populated examples of Viruses.
    types: typing.ClassVar[typing.Dict[str, "Virus"]]

    @property
-    def decay_constant(self):
+    def decay_constant(self) -> _VectorisedFloat:
        # Viral inactivation per hour (h^-1)
        return np.log(2) / self.halflife

@ -407,10 +454,10 @@ Virus.types = {
@dataclass(frozen=True)
 class Mask:
    #: Filtration efficiency. (In %/100)
-    η_exhale: float
+    η_exhale: _VectorisedFloat

    #: Leakage through side of masks.
-    η_leaks: float
+    η_leaks: _VectorisedFloat

    #: Filtration efficiency of masks when inhaling.
    η_inhale: float
@ -424,7 +471,7 @@ class Mask:
    types: typing.ClassVar[typing.Dict[str, "Mask"]]

    @property
-    def exhale_efficiency(self):
+    def exhale_efficiency(self) -> _VectorisedFloat:
        # Overall efficiency with the effect of the leaks for aerosol emission
        #  Gammaitoni et al (1997)
        return self.η_exhale * (1 - self.η_leaks)
@ -523,7 +570,7 @@ class InfectedPopulation(Population):
    #: The type of expiration that is being emitted whilst doing the activity.
    expiration: Expiration

-    def emission_rate_when_present(self) -> float:
+    def emission_rate_when_present(self) -> _VectorisedFloat:
        """
        The emission rate if the infected population is present.

@ -532,18 +579,23 @@ class InfectedPopulation(Population):
        """
        # Emission Rate (infectious quantum / h)
        aerosols = self.expiration.aerosols(self.mask)
-        if np.isinf(aerosols):
-            # A superspreading event. Miller et al. (2020)
+
+        ER = (self.virus.viral_load_in_sputum *
+              self.virus.coefficient_of_infectivity *
+              self.activity.exhalation_rate *
+              10 ** 6 *
+              aerosols)
+
+        # For superspreading event, where ejection_factor is infinite we fix the ER
+        # based on Miller et al. (2020).
+        if isinstance(aerosols, np.ndarray):
+            ER[np.isinf(aerosols)] = 970
+        elif np.isinf(aerosols):
            ER = 970
-        else:
-            ER = (self.virus.viral_load_in_sputum *
-                  self.virus.coefficient_of_infectivity *
-                  self.activity.exhalation_rate *
-                  10**6 *
-                  aerosols)
+
        return ER

-    def individual_emission_rate(self, time) -> float:
+    def individual_emission_rate(self, time) -> _VectorisedFloat:
        """
        The emission rate of a single individual in the population.

@ -561,8 +613,8 @@ class InfectedPopulation(Population):

        return self.emission_rate_when_present()

-    @functools.lru_cache()
-    def emission_rate(self, time) -> float:
+    @cached()
+    def emission_rate(self, time) -> _VectorisedFloat:
        """
        The emission rate of the entire population.

@ -580,7 +632,7 @@ class ConcentrationModel:
    def virus(self):
        return self.infected.virus

-    def infectious_virus_removal_rate(self, time: float) -> float:
+    def infectious_virus_removal_rate(self, time: float) -> _VectorisedFloat:
        # Particle deposition on the floor
        vg = 1 * 10 ** -4
        # Height of the emission source to the floor - i.e. mouth/nose (m)
@ -590,7 +642,7 @@ class ConcentrationModel:

        return k + self.virus.decay_constant + self.ventilation.air_exchange(self.room, time)

-    @functools.lru_cache()
+    @cached()
    def state_change_times(self):
        """
        All time dependent entities on this model must provide information about
@ -613,8 +665,11 @@ class ConcentrationModel:
                return change_time
        return 0

-    @functools.lru_cache()
-    def concentration(self, time: float) -> float:
+    @cached()
+    def concentration(self, time: float) -> _VectorisedFloat:
+        # Note that time is not vectorised. You can only pass a single float
+        # to this method.
+
        if time == 0:
            return 0.0
        IVRR = self.infectious_virus_removal_rate(time)
--- a/cara/tests/apps/calculator/test_report_generator.py
+++ b/cara/tests/apps/calculator/test_report_generator.py
@ -1,13 +1,23 @@
+import time
+
 import pytest

 from cara.apps.calculator import report_generator


 def test_generate_report(baseline_form):
-    model = baseline_form.build_model()
+    # This is a simple test that confirms that given a model, we can actually
+    # generate a report for it. Because this is what happens in the cara
+    # calculator, we confirm that the generation happens within a reasonable
+    # time threshold.
+    time_limit: float = 5.0  # seconds

+    start = time.perf_counter()
+    model = baseline_form.build_model()
    report = report_generator.build_report("", model, baseline_form)
+    end = time.perf_counter()
    assert report != ""
+    assert end - start < time_limit


@pytest.mark.parametrize(
--- a/cara/tests/models/init.py
+++ b/cara/tests/models/init.py
--- a/cara/tests/models/test_concentration_model.py
+++ b/cara/tests/models/test_concentration_model.py
@ -0,0 +1,59 @@
+import numpy as np
+import pytest
+
+import cara.models
+
+
+@pytest.mark.parametrize(
+    "override_params", [
+        {'volume': np.array([100, 120])},
+        {'air_change': np.array([100, 120])},
+        {'virus_halflife': np.array([1.1, 1.5])},
+        {'viral_load_in_sputum': np.array([5e8, 1e9])},
+        {'coefficient_of_infectivity': np.array([0.02, 0.05])},
+        {'η_exhale': np.array([0.92, 0.95])},
+        {'η_leaks': np.array([0.15, 0.20])},
+
+    ]
+)
+def test_concentration_model_vectorisation(override_params):
+    defaults = {
+        'volume': 75,
+        'air_change': 100,
+        'virus_halflife': 1.1,
+        'viral_load_in_sputum': 1e9,
+        'coefficient_of_infectivity': 0.02,
+        'η_exhale': 0.95,
+        'η_leaks': 0.15,
+    }
+    defaults.update(override_params)
+
+    always = cara.models.PeriodicInterval(240, 240)  # TODO: This should be a thing on an interval.
+    c_model = cara.models.ConcentrationModel(
+        cara.models.Room(defaults['volume']),
+        cara.models.AirChange(always, defaults['air_change']),
+        cara.models.InfectedPopulation(
+            number=1,
+            presence=always,
+            mask=cara.models.Mask(
+                η_exhale=defaults['η_exhale'],
+                η_leaks=defaults['η_leaks'],
+                η_inhale=0.3,
+            ),
+            activity=cara.models.Activity(
+                0.51,
+                0.75,
+            ),
+            virus=cara.models.Virus(
+                halflife=defaults['virus_halflife'],
+                viral_load_in_sputum=defaults['viral_load_in_sputum'],
+                coefficient_of_infectivity=defaults['coefficient_of_infectivity'],
+            ),
+            expiration=cara.models.Expiration(
+                ejection_factor=(0.084, 0.009, 0.003, 0.002),
+            ),
+        )
+    )
+    concentrations = c_model.concentration(10)
+    assert isinstance(concentrations, np.ndarray)
+    assert concentrations.shape == (2, )
--- a/cara/tests/models/test_exposure_model.py
+++ b/cara/tests/models/test_exposure_model.py
--- a/cara/tests/test_ventilation.py
+++ b/cara/tests/test_ventilation.py
@ -1,7 +1,8 @@
 import dataclasses

-import pytest
+import numpy as np
 import numpy.testing as npt
+import pytest

 from cara import models

@ -59,3 +60,32 @@ def test_sliding_window(baseline_slidingwindow):
    room = models.Room(75)

    assert baseline_slidingwindow.discharge_coefficient == 0.6
+
+
+def test_multiple(baseline_slidingwindow, baseline_hingedwindow):
+    v = models.MultipleVentilation([baseline_hingedwindow, baseline_slidingwindow])
+    room = models.Room(75)
+    t = 1
+    assert v.air_exchange(room, t) == (
+            baseline_slidingwindow.air_exchange(room, t) +
+            baseline_hingedwindow.air_exchange(room, t)
+    )
+
+
+def test_multiple_vectorisation():
+    interval = models.SpecificInterval(((0, 4), (5, 9)))
+    v1 = models.AirChange(interval, np.arange(10))
+    v2 = models.AirChange(interval, np.arange(5))
+    v3 = models.AirChange(interval, 10)
+
+    room = models.Room(75)
+    t_active = 2
+    t_inactive = 4.5
+
+    assert models.MultipleVentilation([v1, v2]).air_exchange(room, t_inactive) == 0
+    with pytest.raises(ValueError, match='operands could not be broadcast together'):
+        models.MultipleVentilation([v1, v2]).air_exchange(room, t_active)
+
+    r = models.MultipleVentilation([v2, v3]).air_exchange(room, t_active)
+    assert isinstance(r, np.ndarray)
+    np.testing.assert_array_equal(r, [10, 11, 12, 13, 14])
--- a/requirements.txt
+++ b/requirements.txt
@ -37,6 +37,7 @@ jupyterlab-widgets==1.0.0
 kiwisolver==1.3.1
 MarkupSafe==1.1.1
 matplotlib==3.3.4
+memoization==0.3.2
 mistune==0.8.4
 nbclient==0.5.2
 nbconvert==6.0.7
--- a/setup.py
+++ b/setup.py
@ -22,6 +22,7 @@ REQUIREMENTS: dict = {
        'ipywidgets',
        'Jinja2',
        'matplotlib',
+        'memoization',
        'mistune',
        'numpy',
        'qrcode[pil]',