diff --git a/cara/apps/calculator/model_generator.py b/cara/apps/calculator/model_generator.py index 06c29229..0d1ea2b2 100644 --- a/cara/apps/calculator/model_generator.py +++ b/cara/apps/calculator/model_generator.py @@ -12,6 +12,7 @@ import cara.data.weather import cara.monte_carlo as mc from .. import calculator from cara.monte_carlo.data import activity_distributions, virus_distributions, mask_distributions +from cara.monte_carlo.data import expiration_distribution, expiration_BLO_factors, expiration_distributions LOG = logging.getLogger(__name__) @@ -23,7 +24,7 @@ minutes_since_midnight = typing.NewType('minutes_since_midnight', int) # Used to declare when an attribute of a class must have a value provided, and # there should be no default value used. _NO_DEFAULT = object() -_DEFAULT_MC_SAMPLE_SIZE = 50000 +_DEFAULT_MC_SAMPLE_SIZE = 250000 @dataclasses.dataclass @@ -628,12 +629,14 @@ class FormData: def build_expiration(expiration_definition) -> models._ExpirationBase: if isinstance(expiration_definition, str): - return models._ExpirationBase.types[expiration_definition] + return expiration_distributions[expiration_definition] elif isinstance(expiration_definition, dict): - return models.MultipleExpiration( - tuple([build_expiration(exp) for exp in expiration_definition.keys()]), - tuple(expiration_definition.values()) - ) + total_weight = sum(expiration_definition.values()) + BLO_factors = np.sum([ + np.array(expiration_BLO_factors[exp_type]) * weight/total_weight + for exp_type, weight in expiration_definition.items() + ], axis=0) + return expiration_distribution(tuple(BLO_factors)) def baseline_raw_form_data(): diff --git a/cara/models.py b/cara/models.py index 8dbd0e90..7b5d034d 100644 --- a/cara/models.py +++ b/cara/models.py @@ -580,7 +580,8 @@ class MultipleExpiration(_ExpirationBase): Group together different modes of expiration, that represent each the main expiration mode for a certain fraction of time (given by the weights). - + This class can only be used with single diameters (it cannot + be used with diameter distributions). """ expirations: typing.Tuple[_ExpirationBase, ...] weights: typing.Tuple[float, ...] @@ -589,6 +590,8 @@ class MultipleExpiration(_ExpirationBase): if len(self.expirations) != len(self.weights): raise ValueError("expirations and weigths should contain the" "same number of elements") + if not all(np.isscalar(e.diameter) for e in self.expirations): + raise ValueError("diameters should all be scalars") def aerosols(self, mask: Mask): return np.array([ @@ -897,6 +900,13 @@ class ConcentrationModel: @dataclass(frozen=True) class ExposureModel: + """ + Represents the exposure to a concentration of virions in the air. + NOTE: the infection probability formula assumes that if the diameter + is an array, then none of the ventilation parameters, room volume or virus + decay constant, are arrays as well. + TODO: implement a check this is the case, in __post_init__ + """ #: The virus concentration model which this exposure model should consider. concentration_model: ConcentrationModel @@ -922,9 +932,29 @@ class ExposureModel: return normed_exposure * self.repeats def exposure(self) -> _VectorisedFloat: - """The number of virus per meter^3.""" - return (self._normed_exposure() * - self.concentration_model.infected.emission_rate_when_present()) + """ + The number of virus per meter^3. With sampled diameters, the + aerosol volume has to be put back in the exposure before taking + the mean, to obtain the proper result for the exposure (which + corresponds to an integration on diameters). + """ + emission_rate = self.concentration_model.infected.emission_rate_when_present() + if (not isinstance(self.concentration_model.infected,InfectedPopulation) + or not isinstance(self.concentration_model.infected.expiration,Expiration) + or np.isscalar(self.concentration_model.infected.expiration.diameter) + ): + # in all these cases, there is no distribution of + # diameters that need to be integrated over + return self._normed_exposure() * emission_rate + else: + # the mean of the diameter-dependent exposure (including + # aerosols volume, but NOT the other factors) has to be + # taken first (this is equivalent to integrating over the + # diameters) + mask = self.concentration_model.infected.mask + aerosols = self.concentration_model.infected.expiration.aerosols(mask) + return (np.array(self._normed_exposure()*aerosols).mean() * + emission_rate/aerosols) def infection_probability(self) -> _VectorisedFloat: exposure = self.exposure() diff --git a/cara/monte_carlo/data.py b/cara/monte_carlo/data.py index 4415efbb..71498ee0 100644 --- a/cara/monte_carlo/data.py +++ b/cara/monte_carlo/data.py @@ -1,7 +1,68 @@ +from dataclasses import dataclass +import typing + import numpy as np +from scipy import special as sp import cara.monte_carlo as mc -from cara.monte_carlo.sampleable import Normal,LogNormal,LogCustomKernel, Uniform +from cara.monte_carlo.sampleable import Normal,LogNormal,LogCustomKernel,CustomKernel,Uniform + +sqrt2pi = np.sqrt(2.*np.pi) +sqrt2 = np.sqrt(2.) + +@dataclass(frozen=True) +class BLOmodel: + """ + Represents the probability distribution from the BLO model. + It is a sum of three lognormal distributions, each of the form + A * cn * (1 / d) * (1 / (np.sqrt(2 * np.pi) * sigma)) * + np.exp(-(np.log(d)-mu) ** 2 / (2 * sigma ** 2)) + with A the factor in front of the B, L or O mode. + From G. Johnson et al., Modality of human + expired aerosol size distributions, Journal of Aerosol Science, + vol. 42, no. 12, pp. 839 – 851, 2011, + https://doi.org/10.1016/j.jaerosci.2011.07.009). + """ + #: factors assigned to resp. the B, L and O modes. They are + # charateristics of the kind of expiratory activity (e.g. breathing, + # speaking, singing, or shouting). These are applied on top of the + # cn concentrations (see below), and depend on the kind of activity + # (breathing, talking, singing/shouting) + BLO_factors: typing.Tuple[float, float, float] + + #: cn (cm^-3) for resp. the B, L and O modes. Corresponds to the + # total concentration of aerosols for each mode. + cn: typing.Tuple[float, float, float] = (0.1, 1., 0.0010008) + + # mean of the underlying normal distributions (represents the log of a + # diameter in microns), for resp. the B, L and O modes. + mu: typing.Tuple[float, float, float] = (0.989541, 1.38629, 4.97673) + + # std deviation of the underlying normal distribution, for resp. + # the B, L and O modes. + sigma: typing.Tuple[float, float, float] = (0.262364, 0.506818, 0.585005) + + def distribution(self, d): + """ + Returns the raw value of the probability distribution for a + given diameter d (microns). + """ + return sum( (1 / d) * (A * cn / (sqrt2pi * sigma)) * + np.exp(-(np.log(d) - mu) ** 2 / (2 * sigma ** 2)) + for A,cn,mu,sigma in zip(self.BLO_factors, self.cn, + self.mu, self.sigma) ) + + def integrate(self, dmin, dmax): + """ + Returns the integral between dmin and dmax (in microns) of the + probability distribution. + """ + result = 0. + for A,cn,mu,sigma in zip(self.BLO_factors, self.cn, self.mu, self.sigma): + ymin = (np.log(dmin)-mu)/(sqrt2*sigma) + ymax = (np.log(dmax)-mu)/(sqrt2*sigma) + result += A * cn * (sp.erf(ymax)-sp.erf(ymin)) / 2. + return result # From CERN-OPEN-2021-04 and refererences therein @@ -67,4 +128,33 @@ virus_distributions = { mask_distributions = { 'Type I': mc.Mask(Uniform(0.25, 0.80)), 'FFP2': mc.Mask(Uniform(0.83, 0.91)), -} \ No newline at end of file +} + + +def expiration_distribution(BLO_factors): + """ + Returns an Expiration with an aerosol diameter distribution, defined + by the BLO factors (a length-3 tuple). + The total concentration of aerosols is computed by integrating + the distribution between 0.1 and 30 microns - these boundaries are + an historical choice based on previous implementations of the model + (it limits the influence of the O-mode). + """ + dscan = np.linspace(0.1, 30. ,3000) + return mc.Expiration(CustomKernel(dscan, + BLOmodel(BLO_factors).distribution(dscan),kernel_bandwidth=0.1), + BLOmodel(BLO_factors).integrate(0.1, 30.)) + + +expiration_BLO_factors = { + 'Breathing': (1., 0., 0.), + 'Talking': (1., 1., 1.), + 'Singing': (1., 5., 5.), + 'Shouting': (1., 5., 5.), +} + + +expiration_distributions = { + exp_type: expiration_distribution(BLO_factors) + for exp_type,BLO_factors in expiration_BLO_factors.items() +} diff --git a/cara/tests/apps/calculator/test_model_generator.py b/cara/tests/apps/calculator/test_model_generator.py index ba4f3a6a..a01cd3f1 100644 --- a/cara/tests/apps/calculator/test_model_generator.py +++ b/cara/tests/apps/calculator/test_model_generator.py @@ -9,7 +9,12 @@ from cara.apps.calculator import model_generator from cara.apps.calculator.model_generator import _hours2timestring from cara.apps.calculator.model_generator import minutes_since_midnight from cara import models +from cara.monte_carlo.data import expiration_distributions +# TODO: seed better the random number generators +np.random.seed(2000) +SAMPLE_SIZE = 250000 +TOLERANCE = 0.02 def test_model_from_dict(baseline_form_data): form = model_generator.FormData.from_dict(baseline_form_data) @@ -31,11 +36,11 @@ def test_model_from_dict_invalid(baseline_form_data): ) def test_blend_expiration(mask_type): blend = {'Breathing': 2, 'Talking': 1} - r = model_generator.build_expiration(blend) + r = model_generator.build_expiration(blend).build_model(SAMPLE_SIZE) mask = models.Mask.types[mask_type] - expected = (models.Expiration.types['Breathing'].aerosols(mask)*2/3. + - models.Expiration.types['Talking'].aerosols(mask)/3.) - npt.assert_allclose(r.aerosols(mask), expected) + expected = (expiration_distributions['Breathing'].build_model(SAMPLE_SIZE).aerosols(mask).mean()*2/3. + + expiration_distributions['Talking'].build_model(SAMPLE_SIZE).aerosols(mask).mean()/3.) + npt.assert_allclose(r.aerosols(mask).mean(), expected, rtol=TOLERANCE) def test_ventilation_slidingwindow(baseline_form: model_generator.FormData): diff --git a/cara/tests/test_expiration.py b/cara/tests/test_expiration.py index e3497efe..8a600086 100644 --- a/cara/tests/test_expiration.py +++ b/cara/tests/test_expiration.py @@ -18,6 +18,18 @@ def test_multiple_wrong_weight_size(): e = models.MultipleExpiration([e_base, e_base], weights) +def test_multiple_wrong_diameters(): + weights = (1., 2., 3.) + e1 = models.Expiration(np.array([1., 1.])) + e2 = models.Expiration(1.) + e3 = models.Expiration(2.) + with pytest.raises( + ValueError, + match=re.escape("diameters should all be scalars") + ): + e = models.MultipleExpiration([e1, e2, e3], weights) + + def test_multiple(): weights = (1., 1.) mask = models.Mask.types['Type I'] diff --git a/cara/tests/test_monte_carlo_full_models.py b/cara/tests/test_monte_carlo_full_models.py index fbf9f105..b13ea0c7 100644 --- a/cara/tests/test_monte_carlo_full_models.py +++ b/cara/tests/test_monte_carlo_full_models.py @@ -5,10 +5,12 @@ import pytest import cara.monte_carlo as mc from cara import models,data from cara.monte_carlo.data import activity_distributions, virus_distributions +from cara.monte_carlo.data import expiration_distribution, expiration_distributions +from cara.apps.calculator.model_generator import build_expiration # TODO: seed better the random number generators np.random.seed(2000) -SAMPLE_SIZE = 50000 +SAMPLE_SIZE = 250000 TOLERANCE = 0.05 # references values for infection_probability and expected new cases @@ -42,10 +44,7 @@ def shared_office_mc(): presence=mc.SpecificInterval(((0., 2.), (2.1, 4.), (5., 7.), (7.1, 9.))), mask=models.Mask(η_inhale=0.3), activity=activity_distributions['Seated'], - expiration=models.MultipleExpiration( - expirations=(models.Expiration.types['Talking'], - models.Expiration.types['Breathing']), - weights=(0.3, 0.7)), + expiration=build_expiration({'Talking': 0.3, 'Breathing': 0.7}), ), ) return mc.ExposureModel( @@ -86,7 +85,7 @@ def classroom_mc(): presence=mc.SpecificInterval(((0., 2.), (2.5, 4.), (5., 7.), (7.5, 9.))), mask=models.Mask.types['No mask'], activity=activity_distributions['Light activity'], - expiration=models.Expiration.types['Talking'], + expiration=expiration_distributions['Talking'], ), ) return mc.ExposureModel( @@ -117,7 +116,7 @@ def ski_cabin_mc(): presence=mc.SpecificInterval(((0., 1/3),)), mask=models.Mask(η_inhale=0.3), activity=activity_distributions['Moderate activity'], - expiration=models.Expiration.types['Talking'], + expiration=expiration_distributions['Talking'], ), ) return mc.ExposureModel( @@ -150,7 +149,7 @@ def gym_mc(): presence=mc.SpecificInterval(((0., 1.),)), mask=models.Mask.types["No mask"], activity=activity_distributions['Heavy exercise'], - expiration=models.Expiration.types['Breathing'], + expiration=expiration_distributions['Breathing'], ), ) return mc.ExposureModel( @@ -182,10 +181,7 @@ def waiting_room_mc(): presence=mc.SpecificInterval(((0., 2.),)), mask=models.Mask.types["No mask"], activity=activity_distributions['Seated'], - expiration=models.MultipleExpiration( - expirations=(models.Expiration.types['Talking'], - models.Expiration.types['Breathing']), - weights=(0.3, 0.7)), + expiration=build_expiration({'Talking': 0.3, 'Breathing': 0.7}) ), ) return mc.ExposureModel( @@ -218,11 +214,7 @@ def skagit_chorale_mc(): presence=mc.SpecificInterval(((0., 2.5),)), mask=models.Mask.types["No mask"], activity=activity_distributions['Light activity'], - expiration=models.Expiration(10.0761), - # The aerosol diameter given (10.0761 microns) is an equivalent - # diameter, chosen in such a way that the aerosol volume is - # the same as the total aerosol volume given by the full BLO model - # with (5, 5, 5) for the B/L/O weights. + expiration=expiration_distribution((5., 5., 5.)), ), ) return mc.ExposureModel( @@ -295,10 +287,7 @@ def test_small_shared_office_Geneva(mask_type, month, expected_pi, presence=mc.SpecificInterval(((9., 10+2/3), (10+5/6, 12.5), (13.5, 15+2/3), (15+5/6, 18.))), mask=models.Mask.types[mask_type], activity=activity_distributions['Seated'], - expiration=models.MultipleExpiration( - expirations=(models.Expiration.types['Talking'], - models.Expiration.types['Breathing']), - weights=(0.33, 0.67)), + expiration=build_expiration({'Talking': 0.33, 'Breathing': 0.67}), ), ) exposure_mc = mc.ExposureModel(