Source code for phasegen.norms

"""
Norms and likelihood functions for comparing observed and modeled values with
:class:`~phasegen.inference.Inference`.
"""

from abc import ABC
from typing import Any, Iterable

import numpy as np
from fastdfe.likelihood import Likelihood as PoissonLikelihoodFastDFE


[docs] class Norm(ABC): """ Abstract class for norms. """
[docs] def compute(self, a: Any, b: Any) -> float | int: """ Compare two values. :param a: A value. :param b: Another value. :return: A numerical value representing the difference between the two values. """ pass
[docs] class LNorm(Norm): """ Class for L-norms. """
[docs] def __init__(self, p: int): """ Initialize the class with the provided parameters. :param p: The order of the norm. see :func:`numpy.linalg.norm` for details. """ #: The order of the norm. self.p: int = np.inf if np.isinf(p) else int(p)
[docs] def compute(self, a: float | np.ndarray, b: float | np.ndarray) -> float | int: """ Compare two values. :param a: A value. :param b: Another value. :return: A numerical value representing the difference between the two values. """ return np.linalg.norm(a - b, ord=self.p)
[docs] class L2Norm(LNorm): """ Class for L2-norm (Euclidean distance). """
[docs] def __init__(self): """ Initialize the class. """ super().__init__(p=2)
[docs] class L1Norm(LNorm): """ Class for L1-norm (Manhattan distance). """
[docs] def __init__(self): """ Initialize the class. """ super().__init__(p=1)
[docs] class LInfNorm(LNorm): """ Class for L-infinity norm (Chebyshev distance). """
[docs] def __init__(self): """ Initialize the class. """ super().__init__(p=np.inf)
[docs] class Likelihood(Norm, ABC): """ Abstract class for likelihoods. """ pass
[docs] class PoissonLikelihood(Likelihood): """ Class for Poisson likelihoods. Site frequency spectra are often assumed to be independent Poisson random variables. """
[docs] def compute(self, observed: Iterable | float, modelled: Iterable | float) -> float | int: """ Return additive inverse of Poisson log-likelihood assuming independent entries. Note that the returned likelihood is a positive value which ought to be minimized. :param observed: Observed value or values. :param modelled: Modelled value or values. :return: A numerical value representing the difference between the two values. """ # special case: single value if not isinstance(observed, Iterable) or not isinstance(modelled, Iterable): return self.compute(observed=[observed], modelled=[modelled]) return - PoissonLikelihoodFastDFE.log_poisson( mu=np.array(list(modelled)), k=np.array(list(observed)) ).sum()
[docs] class MultinomialLikelihood(Likelihood): """ Class for Multinomial likelihoods. Used when modeling observed counts distributed across categories, given expected probabilities. The modelled values are normalized to form a valid probability distribution (i.e., they sum to 1). """
[docs] def compute(self, observed: Iterable, modelled: Iterable) -> float: """ Return the additive inverse of the Multinomial log-likelihood. The result is a positive value that should be minimized. :param observed: Observed counts per category. :param modelled: Modelled values (will be normalized to probabilities). :return: Negative log-likelihood as a float. """ observed = np.array(list(observed)) modelled = np.array(list(modelled)) modelled = modelled / modelled.sum() mask = observed > 0 return -np.sum(observed[mask] * np.log(modelled[mask]))