Merge branch 'feature/diameter_dependence' into 'feature/scientific_model_update'

Monte-Carlo sampling of the aerosol diameters with BLO model See merge request cara/cara!259
2021-09-17 12:11:18 +02:00 · 2021-09-17 12:11:18 +02:00 · 86ab306dd9
commit 86ab306dd9
parent 936c72f76c 396339f7bb
6 changed files with 166 additions and 37 deletions
--- a/cara/apps/calculator/model_generator.py
+++ b/cara/apps/calculator/model_generator.py
@ -12,6 +12,7 @@ import cara.data.weather
 import cara.monte_carlo as mc
 from .. import calculator
 from cara.monte_carlo.data import activity_distributions, virus_distributions, mask_distributions
+from cara.monte_carlo.data import expiration_distribution, expiration_BLO_factors, expiration_distributions


 LOG = logging.getLogger(__name__)
@ -23,7 +24,7 @@ minutes_since_midnight = typing.NewType('minutes_since_midnight', int)
 # Used to declare when an attribute of a class must have a value provided, and
 # there should be no default value used.
 _NO_DEFAULT = object()
-_DEFAULT_MC_SAMPLE_SIZE = 50000
+_DEFAULT_MC_SAMPLE_SIZE = 250000


@dataclasses.dataclass
@ -628,12 +629,14 @@ class FormData:

 def build_expiration(expiration_definition) -> models._ExpirationBase:
    if isinstance(expiration_definition, str):
-        return models._ExpirationBase.types[expiration_definition]
+        return expiration_distributions[expiration_definition]
    elif isinstance(expiration_definition, dict):
-        return models.MultipleExpiration(
-            tuple([build_expiration(exp) for exp in expiration_definition.keys()]),
-            tuple(expiration_definition.values())
-        )
+        total_weight = sum(expiration_definition.values())
+        BLO_factors = np.sum([
+            np.array(expiration_BLO_factors[exp_type]) * weight/total_weight
+            for exp_type, weight in expiration_definition.items()
+            ], axis=0)
+        return expiration_distribution(tuple(BLO_factors))


 def baseline_raw_form_data():
--- a/cara/models.py
+++ b/cara/models.py
@ -580,7 +580,8 @@ class MultipleExpiration(_ExpirationBase):
    Group together different modes of expiration, that represent
    each the main expiration mode for a certain fraction of time (given by
    the weights).
-
+    This class can only be used with single diameters (it cannot
+    be used with diameter distributions).
    """
    expirations: typing.Tuple[_ExpirationBase, ...]
    weights: typing.Tuple[float, ...]
@ -589,6 +590,8 @@ class MultipleExpiration(_ExpirationBase):
        if len(self.expirations) != len(self.weights):
            raise ValueError("expirations and weigths should contain the"
                             "same number of elements")
+        if not all(np.isscalar(e.diameter) for e in self.expirations):
+            raise ValueError("diameters should all be scalars")

    def aerosols(self, mask: Mask):
        return np.array([
@ -897,6 +900,13 @@ class ConcentrationModel:

@dataclass(frozen=True)
 class ExposureModel:
+    """
+    Represents the exposure to a concentration of virions in the air.
+    NOTE: the infection probability formula assumes that if the diameter
+    is an array, then none of the ventilation parameters, room volume or virus
+    decay constant, are arrays as well.
+    TODO: implement a check this is the case, in __post_init__
+    """
    #: The virus concentration model which this exposure model should consider.
    concentration_model: ConcentrationModel

@ -922,9 +932,29 @@ class ExposureModel:
        return normed_exposure * self.repeats

    def exposure(self) -> _VectorisedFloat:
-        """The number of virus per meter^3."""
-        return (self._normed_exposure() *
-                self.concentration_model.infected.emission_rate_when_present())
+        """
+        The number of virus per meter^3. With sampled diameters, the
+        aerosol volume has to be put back in the exposure before taking
+        the mean, to obtain the proper result for the exposure (which
+        corresponds to an integration on diameters).
+        """
+        emission_rate = self.concentration_model.infected.emission_rate_when_present()
+        if (not isinstance(self.concentration_model.infected,InfectedPopulation)
+            or not isinstance(self.concentration_model.infected.expiration,Expiration)
+            or np.isscalar(self.concentration_model.infected.expiration.diameter)
+            ):
+            # in all these cases, there is no distribution of
+            # diameters that need to be integrated over
+            return self._normed_exposure() * emission_rate
+        else:
+            # the mean of the diameter-dependent exposure (including
+            # aerosols volume, but NOT the other factors) has to be
+            # taken first (this is equivalent to integrating over the
+            # diameters)
+            mask = self.concentration_model.infected.mask
+            aerosols = self.concentration_model.infected.expiration.aerosols(mask)
+            return (np.array(self._normed_exposure()*aerosols).mean() *
+                    emission_rate/aerosols)

    def infection_probability(self) -> _VectorisedFloat:
        exposure = self.exposure()
--- a/cara/monte_carlo/data.py
+++ b/cara/monte_carlo/data.py
@ -1,7 +1,68 @@
+from dataclasses import dataclass
+import typing
+
 import numpy as np
+from scipy import special as sp

 import cara.monte_carlo as mc
-from cara.monte_carlo.sampleable import Normal,LogNormal,LogCustomKernel, Uniform
+from cara.monte_carlo.sampleable import Normal,LogNormal,LogCustomKernel,CustomKernel,Uniform
+
+sqrt2pi = np.sqrt(2.*np.pi)
+sqrt2 = np.sqrt(2.)
+
+@dataclass(frozen=True)
+class BLOmodel:
+    """
+    Represents the probability distribution from the BLO model.
+    It is a sum of three lognormal distributions, each of the form
+    A * cn * (1 / d) * (1 / (np.sqrt(2 * np.pi) * sigma)) *
+            np.exp(-(np.log(d)-mu) ** 2 / (2 * sigma ** 2))
+    with A the factor in front of the B, L or O mode.
+    From G. Johnson et al., Modality of human
+    expired aerosol size distributions, Journal of Aerosol Science,
+    vol. 42, no. 12, pp. 839 – 851, 2011,
+    https://doi.org/10.1016/j.jaerosci.2011.07.009).
+    """
+    #: factors assigned to resp. the B, L and O modes. They are
+    # charateristics of the kind of expiratory activity (e.g. breathing,
+    # speaking, singing, or shouting). These are applied on top of the
+    # cn concentrations (see below), and depend on the kind of activity
+    # (breathing, talking, singing/shouting)
+    BLO_factors: typing.Tuple[float, float, float]
+
+    #: cn (cm^-3) for resp. the B, L and O modes. Corresponds to the
+    # total concentration of aerosols for each mode.
+    cn: typing.Tuple[float, float, float] = (0.1, 1., 0.0010008)
+
+    # mean of the underlying normal distributions (represents the log of a
+    # diameter in microns), for resp. the B, L and O modes.
+    mu: typing.Tuple[float, float, float] = (0.989541, 1.38629, 4.97673)
+
+    # std deviation of the underlying normal distribution, for resp.
+    # the B, L and O modes.
+    sigma: typing.Tuple[float, float, float] = (0.262364, 0.506818, 0.585005)
+
+    def distribution(self, d):
+        """
+        Returns the raw value of the probability distribution for a
+        given diameter d (microns).
+        """
+        return sum( (1 / d) * (A * cn / (sqrt2pi * sigma)) *
+                    np.exp(-(np.log(d) - mu) ** 2 / (2 * sigma ** 2))
+                    for A,cn,mu,sigma in zip(self.BLO_factors, self.cn,
+                                             self.mu, self.sigma) )
+
+    def integrate(self, dmin, dmax):
+        """ 
+        Returns the integral between dmin and dmax (in microns) of the 
+        probability distribution.
+        """
+        result = 0.
+        for A,cn,mu,sigma in zip(self.BLO_factors, self.cn, self.mu, self.sigma):
+            ymin = (np.log(dmin)-mu)/(sqrt2*sigma)
+            ymax = (np.log(dmax)-mu)/(sqrt2*sigma)
+            result += A * cn * (sp.erf(ymax)-sp.erf(ymin)) / 2.
+        return result


 # From CERN-OPEN-2021-04 and refererences therein
@ -67,4 +128,33 @@ virus_distributions = {
 mask_distributions = {
    'Type I': mc.Mask(Uniform(0.25, 0.80)),
    'FFP2': mc.Mask(Uniform(0.83, 0.91)),
-}
+}
+
+
+def expiration_distribution(BLO_factors):
+    """
+    Returns an Expiration with an aerosol diameter distribution, defined
+    by the BLO factors (a length-3 tuple).
+    The total concentration of aerosols is computed by integrating
+    the distribution between 0.1 and 30 microns - these boundaries are
+    an historical choice based on previous implementations of the model
+    (it limits the influence of the O-mode).
+    """
+    dscan = np.linspace(0.1, 30. ,3000)
+    return mc.Expiration(CustomKernel(dscan,
+                BLOmodel(BLO_factors).distribution(dscan),kernel_bandwidth=0.1),
+                BLOmodel(BLO_factors).integrate(0.1, 30.))
+
+
+expiration_BLO_factors = {
+    'Breathing': (1., 0., 0.),
+    'Talking':   (1., 1., 1.),
+    'Singing':   (1., 5., 5.),
+    'Shouting':  (1., 5., 5.),
+}
+
+
+expiration_distributions = {
+    exp_type: expiration_distribution(BLO_factors)
+    for exp_type,BLO_factors in expiration_BLO_factors.items()
+}
--- a/cara/tests/apps/calculator/test_model_generator.py
+++ b/cara/tests/apps/calculator/test_model_generator.py
@ -9,7 +9,12 @@ from cara.apps.calculator import model_generator
 from cara.apps.calculator.model_generator import _hours2timestring
 from cara.apps.calculator.model_generator import minutes_since_midnight
 from cara import models
+from cara.monte_carlo.data import expiration_distributions

+# TODO: seed better the random number generators
+np.random.seed(2000)
+SAMPLE_SIZE = 250000
+TOLERANCE = 0.02

 def test_model_from_dict(baseline_form_data):
    form = model_generator.FormData.from_dict(baseline_form_data)
@ -31,11 +36,11 @@ def test_model_from_dict_invalid(baseline_form_data):
 )
 def test_blend_expiration(mask_type):
    blend = {'Breathing': 2, 'Talking': 1}
-    r = model_generator.build_expiration(blend)
+    r = model_generator.build_expiration(blend).build_model(SAMPLE_SIZE)
    mask = models.Mask.types[mask_type]
-    expected = (models.Expiration.types['Breathing'].aerosols(mask)*2/3. +
-                models.Expiration.types['Talking'].aerosols(mask)/3.)
-    npt.assert_allclose(r.aerosols(mask), expected)
+    expected = (expiration_distributions['Breathing'].build_model(SAMPLE_SIZE).aerosols(mask).mean()*2/3. +
+                expiration_distributions['Talking'].build_model(SAMPLE_SIZE).aerosols(mask).mean()/3.)
+    npt.assert_allclose(r.aerosols(mask).mean(), expected, rtol=TOLERANCE)


 def test_ventilation_slidingwindow(baseline_form: model_generator.FormData):
--- a/cara/tests/test_expiration.py
+++ b/cara/tests/test_expiration.py
@ -18,6 +18,18 @@ def test_multiple_wrong_weight_size():
        e = models.MultipleExpiration([e_base, e_base], weights)


+def test_multiple_wrong_diameters():
+    weights = (1., 2., 3.)
+    e1 = models.Expiration(np.array([1., 1.]))
+    e2 = models.Expiration(1.)
+    e3 = models.Expiration(2.)
+    with pytest.raises(
+            ValueError,
+            match=re.escape("diameters should all be scalars")
+    ):
+        e = models.MultipleExpiration([e1, e2, e3], weights)
+
+
 def test_multiple():
    weights = (1., 1.)
    mask = models.Mask.types['Type I']
--- a/cara/tests/test_monte_carlo_full_models.py
+++ b/cara/tests/test_monte_carlo_full_models.py
@ -5,10 +5,12 @@ import pytest
 import cara.monte_carlo as mc
 from cara import models,data
 from cara.monte_carlo.data import activity_distributions, virus_distributions
+from cara.monte_carlo.data import expiration_distribution, expiration_distributions
+from cara.apps.calculator.model_generator import build_expiration

 # TODO: seed better the random number generators
 np.random.seed(2000)
-SAMPLE_SIZE = 50000
+SAMPLE_SIZE = 250000
 TOLERANCE = 0.05

 # references values for infection_probability and expected new cases
@ -42,10 +44,7 @@ def shared_office_mc():
            presence=mc.SpecificInterval(((0., 2.), (2.1, 4.), (5., 7.), (7.1, 9.))),
            mask=models.Mask(η_inhale=0.3),
            activity=activity_distributions['Seated'],
-            expiration=models.MultipleExpiration(
-                    expirations=(models.Expiration.types['Talking'],
-                                 models.Expiration.types['Breathing']),
-                    weights=(0.3, 0.7)),
+            expiration=build_expiration({'Talking': 0.3, 'Breathing': 0.7}),
        ),
    )
    return mc.ExposureModel(
@ -86,7 +85,7 @@ def classroom_mc():
            presence=mc.SpecificInterval(((0., 2.), (2.5, 4.), (5., 7.), (7.5, 9.))),
            mask=models.Mask.types['No mask'],
            activity=activity_distributions['Light activity'],
-            expiration=models.Expiration.types['Talking'],
+            expiration=expiration_distributions['Talking'],
        ),
    )
    return mc.ExposureModel(
@ -117,7 +116,7 @@ def ski_cabin_mc():
            presence=mc.SpecificInterval(((0., 1/3),)),
            mask=models.Mask(η_inhale=0.3),
            activity=activity_distributions['Moderate activity'],
-            expiration=models.Expiration.types['Talking'],
+            expiration=expiration_distributions['Talking'],
        ),
    )
    return mc.ExposureModel(
@ -150,7 +149,7 @@ def gym_mc():
            presence=mc.SpecificInterval(((0., 1.),)),
            mask=models.Mask.types["No mask"],
            activity=activity_distributions['Heavy exercise'],
-            expiration=models.Expiration.types['Breathing'],
+            expiration=expiration_distributions['Breathing'],
        ),
    )
    return mc.ExposureModel(
@ -182,10 +181,7 @@ def waiting_room_mc():
            presence=mc.SpecificInterval(((0., 2.),)),
            mask=models.Mask.types["No mask"],
            activity=activity_distributions['Seated'],
-            expiration=models.MultipleExpiration(
-                    expirations=(models.Expiration.types['Talking'],
-                                 models.Expiration.types['Breathing']),
-                    weights=(0.3, 0.7)),
+            expiration=build_expiration({'Talking': 0.3, 'Breathing': 0.7})
        ),
    )
    return mc.ExposureModel(
@ -218,11 +214,7 @@ def skagit_chorale_mc():
            presence=mc.SpecificInterval(((0., 2.5),)),
            mask=models.Mask.types["No mask"],
            activity=activity_distributions['Light activity'],
-            expiration=models.Expiration(10.0761),
-            # The aerosol diameter given (10.0761 microns) is an equivalent
-            # diameter, chosen in such a way that the aerosol volume is
-            # the same as the total aerosol volume given by the full BLO model
-            # with (5, 5, 5) for the B/L/O weights.
+            expiration=expiration_distribution((5., 5., 5.)),
        ),
    )
    return mc.ExposureModel(
@ -295,10 +287,7 @@ def test_small_shared_office_Geneva(mask_type, month, expected_pi,
            presence=mc.SpecificInterval(((9., 10+2/3), (10+5/6, 12.5), (13.5, 15+2/3), (15+5/6, 18.))),
            mask=models.Mask.types[mask_type],
            activity=activity_distributions['Seated'],
-            expiration=models.MultipleExpiration(
-                    expirations=(models.Expiration.types['Talking'],
-                                 models.Expiration.types['Breathing']),
-                    weights=(0.33, 0.67)),
+            expiration=build_expiration({'Talking': 0.33, 'Breathing': 0.67}),
        ),
    )
    exposure_mc = mc.ExposureModel(