Source code for xftsim.effect

"""
Effect specification classes for the new architecture system.

EffectSpec ABC and concrete implementations:
- AdditiveEffects: univariate additive genetic effects
- MultivariateEffects: multivariate correlated effects across traits
- SparseEffects: sparse causal effects (subset of variants)
"""
from __future__ import annotations

import numpy as np
from abc import ABC, abstractmethod
from typing import Optional



[docs]
class EffectSpec(ABC):
    """
    Abstract base class for genetic effect specifications.

    Effects are sampled once at architecture creation and fixed for all generations.
    The HaplotypeOperator picks the right matvec based on the `standardized` flag.

    Attributes
    ----------
    effects : np.ndarray
        Effect sizes — shape (m,) for univariate, (m, k) for multivariate.
    standardized : bool
        If True, effects are for standardized genotypes (use standardized_matvec).
    variant_mask : np.ndarray
        Boolean array indicating which variants are causal.
    """

    def __init__(self, effects: np.ndarray, standardized: bool, variant_mask: np.ndarray):
        self.effects = np.asarray(effects, dtype=np.float64)
        self.standardized = standardized
        self.variant_mask = np.asarray(variant_mask, dtype=bool)

    @property
    def m(self) -> int:
        """Total number of variants (causal + non-causal)."""
        return len(self.variant_mask)

    @property
    def m_causal(self) -> int:
        """Number of causal variants."""
        return int(np.sum(self.variant_mask))

    @property
    def k(self) -> int:
        """Number of traits (1 for univariate)."""
        if self.effects.ndim == 1:
            return 1
        return self.effects.shape[1]




[docs]
class AdditiveEffects(EffectSpec):
    """Univariate additive genetic effects.

    All variants are causal (variant_mask is all-True). Effect sizes
    are stored as a 1D array of shape (m,).

    Examples
    --------
    Generate effects targeting h2 = 0.5 across 100 variants:

    >>> eff = AdditiveEffects.from_h2(h2=0.5, m=100, seed=42)
    >>> eff.m
    100
    >>> eff.k
    1

    Create from a known effect array:

    >>> import numpy as np
    >>> eff = AdditiveEffects.from_array(np.array([0.1, -0.2, 0.05]))
    >>> eff.m
    3
    """


[docs]
    @classmethod
    def from_h2(cls, h2: float, m: int, standardized: bool = True,
                seed: Optional[int] = None) -> "AdditiveEffects":
        """Generate additive effects to target a given heritability.

        Under standardized genotypes, Var(G) = sum(beta^2).
        We draw beta ~ N(0, h2/m) so E[sum(beta^2)] = h2.

        Parameters
        ----------
        h2 : float
            Target heritability.
        m : int
            Number of variants (all causal).
        standardized : bool
            Whether effects are for standardized genotypes.
        seed : int, optional
            Random seed for reproducibility.

        Returns
        -------
        AdditiveEffects
        """
        rng = np.random.RandomState(seed)
        effects = rng.normal(0, np.sqrt(h2 / m), size=m)
        variant_mask = np.ones(m, dtype=bool)
        return cls(effects=effects, standardized=standardized, variant_mask=variant_mask)



[docs]
    @classmethod
    def from_array(cls, effects: np.ndarray, standardized: bool = True) -> "AdditiveEffects":
        """Create from a pre-specified effect array.

        Parameters
        ----------
        effects : np.ndarray
            (m,) array of effect sizes.
        standardized : bool
            Whether effects are for standardized genotypes.

        Returns
        -------
        AdditiveEffects
        """
        effects = np.asarray(effects, dtype=np.float64)
        variant_mask = np.ones(len(effects), dtype=bool)
        return cls(effects=effects, standardized=standardized, variant_mask=variant_mask)


    def __repr__(self) -> str:
        return (f"AdditiveEffects(m={self.m}, m_causal={self.m_causal}, "
                f"standardized={self.standardized})")




[docs]
class MultivariateEffects(EffectSpec):
    """Multivariate correlated effects across k traits.

    Effect sizes are stored as a 2D array of shape (m, k) where k is the
    number of traits. All variants are causal.
    """


[docs]
    @classmethod
    def from_h2_rg(cls, h2: list, rg: float, m: int,
                   standardized: bool = True,
                   seed: Optional[int] = None) -> "MultivariateEffects":
        """
        Generate multivariate effects from heritabilities and genetic correlation.

        Parameters
        ----------
        h2 : list
            Per-trait heritabilities [h2_1, h2_2, ...].
        rg : float
            Genetic correlation between traits.
        m : int
            Number of variants (all causal).
        standardized : bool
            Whether effects are for standardized genotypes.
        seed : int, optional
            Random seed.

        Returns
        -------
        MultivariateEffects
        """
        h2 = np.asarray(h2, dtype=np.float64)
        k = len(h2)
        # Build per-SNP covariance: Cov_snp = diag(sqrt(h2/m)) @ R @ diag(sqrt(h2/m))
        sd = np.sqrt(h2 / m)
        R = np.full((k, k), rg)
        np.fill_diagonal(R, 1.0)
        cov_snp = np.outer(sd, sd) * R
        rng = np.random.RandomState(seed)
        effects = rng.multivariate_normal(np.zeros(k), cov_snp, size=m)
        variant_mask = np.ones(m, dtype=bool)
        return cls(effects=effects, standardized=standardized, variant_mask=variant_mask)



[docs]
    @classmethod
    def from_covg(cls, covg: np.ndarray, m: int,
                  standardized: bool = True,
                  seed: Optional[int] = None) -> "MultivariateEffects":
        """Generate multivariate effects from a full genetic covariance matrix.

        Parameters
        ----------
        covg : np.ndarray
            (k, k) genetic covariance matrix. sum(beta @ beta.T) approx covg.
        m : int
            Number of variants.
        standardized : bool
            Whether effects are for standardized genotypes.
        seed : int, optional
            Random seed.

        Returns
        -------
        MultivariateEffects
        """
        covg = np.asarray(covg, dtype=np.float64)
        cov_snp = covg / m
        rng = np.random.RandomState(seed)
        k = covg.shape[0]
        effects = rng.multivariate_normal(np.zeros(k), cov_snp, size=m)
        variant_mask = np.ones(m, dtype=bool)
        return cls(effects=effects, standardized=standardized, variant_mask=variant_mask)



[docs]
    @classmethod
    def from_array(cls, effects: np.ndarray,
                   standardized: bool = True) -> "MultivariateEffects":
        """Create from a pre-specified (m, k) effect matrix.

        Parameters
        ----------
        effects : np.ndarray
            (m, k) array of effect sizes.
        standardized : bool
            Whether effects are for standardized genotypes.

        Returns
        -------
        MultivariateEffects
        """
        effects = np.asarray(effects, dtype=np.float64)
        variant_mask = np.ones(effects.shape[0], dtype=bool)
        return cls(effects=effects, standardized=standardized, variant_mask=variant_mask)


    def __repr__(self) -> str:
        return (f"MultivariateEffects(m={self.m}, k={self.k}, "
                f"m_causal={self.m_causal}, standardized={self.standardized})")




[docs]
class SparseEffects(EffectSpec):
    """Sparse causal effects -- only a subset of variants are causal.

    Non-causal variants have zero effect sizes. The ``variant_mask``
    indicates which variants are causal.
    """


[docs]
    @classmethod
    def from_h2(cls, h2: float, m: int, k_causal: int,
                standardized: bool = True,
                seed: Optional[int] = None) -> "SparseEffects":
        """
        Generate sparse additive effects.

        Parameters
        ----------
        h2 : float
            Target heritability.
        m : int
            Total number of variants.
        k_causal : int
            Number of causal variants.
        standardized : bool
            Whether effects are for standardized genotypes.
        seed : int, optional
            Random seed.

        Returns
        -------
        SparseEffects

        Raises
        ------
        ValueError
            If ``k_causal > m``.
        """
        if k_causal > m:
            raise ValueError(f"k_causal ({k_causal}) > m ({m})")
        rng = np.random.RandomState(seed)
        # Select causal variants
        causal_idx = rng.choice(m, size=k_causal, replace=False)
        causal_idx.sort()
        variant_mask = np.zeros(m, dtype=bool)
        variant_mask[causal_idx] = True
        # Effects: (m,) with zeros for non-causal
        effects = np.zeros(m, dtype=np.float64)
        effects[causal_idx] = rng.normal(0, np.sqrt(h2 / k_causal), size=k_causal)
        return cls(effects=effects, standardized=standardized, variant_mask=variant_mask)


    def __repr__(self) -> str:
        return (f"SparseEffects(m={self.m}, m_causal={self.m_causal}, "
                f"standardized={self.standardized})")