cara/caimira/monte_carlo/sampleable.py

import typing

import numpy as np
from sklearn.neighbors import KernelDensity # type: ignore

import caimira.models

# Declare a float array type of a given size.
# There is no better way to declare this currently, unfortunately.
float_array_size_n = np.ndarray


class SampleableDistribution:
    def generate_samples(self, size: int) -> float_array_size_n:
        raise NotImplementedError()


class Normal(SampleableDistribution):
    """
    Defines a normal (i.e. Gaussian) distribution
    """
    def __init__(self, mean: float, standard_deviation: float):
        self.mean = mean
        self.standard_deviation = standard_deviation

    def generate_samples(self, size: int) -> float_array_size_n:
        return np.random.normal(self.mean, self.standard_deviation, size=size)


class Uniform(SampleableDistribution):
    """
    Defines a continuous uniform distribution
    """
    def __init__(self, low: float, high: float):
        self.low = low
        self.high = high

    def generate_samples(self, size: int) -> float_array_size_n:
        return np.random.uniform(self.low, self.high, size=size)


class LogNormal(SampleableDistribution):
    """
    Defines a lognormal distribution (i.e. Gaussian distribution vs. the
    natural logarithm of the random variable)
    """

    def __init__(self, mean_gaussian: float, standard_deviation_gaussian: float):
        # these are resp. the mean and std. deviation of the underlying
        # Gaussian distribution
        self.mean_gaussian = mean_gaussian
        self.standard_deviation_gaussian = standard_deviation_gaussian

    def generate_samples(self, size: int) -> float_array_size_n:
        return np.random.lognormal(self.mean_gaussian,
                                   self.standard_deviation_gaussian,
                                   size=size)


class Custom(SampleableDistribution):
    """
    Defines a distribution which follows a custom curve vs. the random
    variable. Uses a simple algorithm. This is appropriate for a smooth
    distribution function.
    Note: in max_function, a value slightly above the maximum of the distribution
    function should be provided.
    """
    def __init__(self, bounds: typing.Tuple[float, float],
                 function: typing.Callable, max_function: float):
        self.bounds = bounds
        self.function = function
        self.max_function = max_function

    def generate_samples(self, size: int) -> float_array_size_n:
        fvalue = np.random.uniform(0,self.max_function,size)
        x = np.random.uniform(*self.bounds,size)
        invalid = np.where(fvalue>self.function(x))[0]
        while len(invalid)>0:
            fvalue[invalid] = np.random.uniform(0,self.max_function,len(invalid))
            x[invalid] = np.random.uniform(*self.bounds,len(invalid))
            invalid = np.where(fvalue>self.function(x))[0]

        return x


class LogCustom(SampleableDistribution):
    """
    Defines a distribution which follows a custom curve vs. the log (in base 10)
    of the random variable. Uses a simple algorithm. This is appropriate for a smooth
    distribution function.
    Note: in max_function, a value slightly above the maximum of the distribution
    function should be provided.
    """
    def __init__(self, bounds: typing.Tuple[float, float],
                 function: typing.Callable, max_function: float):
        self.bounds = bounds
        self.function = function
        self.max_function = max_function

    def generate_samples(self, size: int) -> float_array_size_n:
        fvalue = np.random.uniform(0,self.max_function,size)
        x = np.random.uniform(*self.bounds,size)
        invalid = np.where(fvalue>self.function(x))[0]
        while len(invalid)>0:
            fvalue[invalid] = np.random.uniform(0,self.max_function,len(invalid))
            x[invalid] = np.random.uniform(*self.bounds,len(invalid))
            invalid = np.where(fvalue>self.function(x))[0]

        return 10 ** x


class CustomKernel(SampleableDistribution):
    """
    Defines a distribution which follows a custom curve vs. the
    random variable. Uses a Gaussian kernel density fit. This is more
    appropriate for a noisy distribution function.
    """
    def __init__(self, variable: float_array_size_n,
                 frequencies: float_array_size_n,
                 kernel_bandwidth: float):
        # these are resp. the random variable, the distribution
        # frequencies at these values, and the bandwidth of the Gaussian
        # kernel
        self.variable = variable
        self.frequencies = frequencies
        self.kernel_bandwidth = kernel_bandwidth

    def generate_samples(self, size: int) -> float_array_size_n:
        kde_model = KernelDensity(kernel='gaussian',
                                  bandwidth=self.kernel_bandwidth)
        kde_model.fit(self.variable.reshape(-1, 1),
                      sample_weight=self.frequencies)
        return kde_model.sample(n_samples=size)[:, 0]


class LogCustomKernel(SampleableDistribution):
    """
    Defines a distribution which follows a custom curve vs. the log
    (in base 10) of the random variable. Uses a Gaussian kernel density
    fit. This is more appropriate for a noisy distribution function.
    """
    def __init__(self, log_variable: float_array_size_n,
                 frequencies: float_array_size_n,
                 kernel_bandwidth: float):
        # these are resp. the log of the random variable, the distribution
        # frequencies at these values, and the bandwidth of the Gaussian
        # kernel
        self.log_variable = log_variable
        self.frequencies = frequencies
        self.kernel_bandwidth = kernel_bandwidth

    def generate_samples(self, size: int) -> float_array_size_n:
        kde_model = KernelDensity(kernel='gaussian',
                                  bandwidth=self.kernel_bandwidth)
        kde_model.fit(self.log_variable.reshape(-1, 1),
                      sample_weight=self.frequencies)
        return 10 ** kde_model.sample(n_samples=size)[:, 0]


_VectorisedFloatOrSampleable = typing.Union[
    SampleableDistribution, caimira.models._VectorisedFloat,
]