From 0cc1e85ececa08ce1a69c2ac28a7f63a81a2381b Mon Sep 17 00:00:00 2001 From: Nicolas Mounet Date: Tue, 1 Jun 2021 21:40:07 +0200 Subject: [PATCH] Adding new sampleable distributions: lognormal and custom (with two different methods) - thanks to Markus Rognlien --- cara/monte_carlo/sampleable.py | 70 ++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/cara/monte_carlo/sampleable.py b/cara/monte_carlo/sampleable.py index 4ed49d82..df7a7646 100644 --- a/cara/monte_carlo/sampleable.py +++ b/cara/monte_carlo/sampleable.py @@ -1,6 +1,7 @@ import typing import numpy as np +from sklearn.neighbors import KernelDensity # type: ignore import cara.models @@ -16,6 +17,9 @@ class SampleableDistribution: class Normal(SampleableDistribution): + """ + Defines a normal (i.e. Gaussian) distribution + """ def __init__(self, mean: float, standard_deviation: float): self.mean = mean self.standard_deviation = standard_deviation @@ -24,6 +28,72 @@ class Normal(SampleableDistribution): return np.random.normal(self.mean, self.standard_deviation, size=size) +class LogNormal(SampleableDistribution): + """ + Defines a lognormal distribution (i.e. Gaussian distribution vs. the + natural logarithm of the random variable) + """ + + def __init__(self, mean_gaussian: float, standard_deviation_gaussian: float): + # these are resp. the mean and std. deviation of the underlying + # Gaussian distribution + self.mean_gaussian = mean_gaussian + self.standard_deviation_gaussian = standard_deviation_gaussian + + def generate_samples(self, size: int) -> float_array_size_n: + return np.random.lognormal(self.mean_gaussian, + self.standard_deviation_gaussian, + size=size) + + +class Custom(SampleableDistribution): + """ + Defines a distribution which follows a custom curve vs. the random + variable. Uses a simple algorithm. This is appropriate for a smooth + distribution function (one should know its maximum). + """ + def __init__(self, bounds: typing.Tuple[float, float], + function: typing.Callable, max_function: float): + self.bounds = bounds + self.function = function + self.max_function = max_function + + def generate_samples(self, size: int) -> float_array_size_n: + fvalue = np.random.uniform(0,self.max_function,size) + x = np.random.uniform(*self.bounds,size) + invalid = np.where(fvalue>self.function(x))[0] + while len(invalid)>0: + fvalue[invalid] = np.random.uniform(0,self.max_function,len(invalid)) + x[invalid] = np.random.uniform(*self.bounds,len(invalid)) + invalid = np.where(fvalue>self.function(x))[0] + + return x + + +class CustomKernel(SampleableDistribution): + """ + Defines a distribution which follows a custom curve vs. the + random variable. Uses a Gaussian kernel density fit. This is more + appropriate for a noisy distribution function. + """ + def __init__(self, variable: float_array_size_n, + frequencies: float_array_size_n, + kernel_bandwidth: float): + # these are resp. the random variable, the distribution + # frequencies at these values, and the bandwidth of the Gaussian + # kernel + self.variable = variable + self.frequencies = frequencies + self.kernel_bandwidth = kernel_bandwidth + + def generate_samples(self, size: int) -> float_array_size_n: + kde_model = KernelDensity(kernel='gaussian', + bandwidth=self.kernel_bandwidth) + kde_model.fit(self.variable.reshape(-1, 1), + sample_weight=self.frequencies) + return kde_model.sample(n_samples=size)[:, 0] + + _VectorisedFloatOrSampleable = typing.Union[ SampleableDistribution, cara.models._VectorisedFloat, ]