Source code for xftsim.effect

"""
Effect specification classes for the new architecture system.

EffectSpec ABC and concrete implementations:
- AdditiveEffects: univariate additive genetic effects
- MultivariateEffects: multivariate correlated effects across traits
- SparseEffects: sparse causal effects (subset of variants)
"""
from __future__ import annotations

import numpy as np
from abc import ABC, abstractmethod
from typing import Optional


[docs] class EffectSpec(ABC): """ Abstract base class for genetic effect specifications. Effects are sampled once at architecture creation and fixed for all generations. The HaplotypeOperator picks the right matvec based on the `standardized` flag. Attributes ---------- effects : np.ndarray Effect sizes — shape (m,) for univariate, (m, k) for multivariate. standardized : bool If True, effects are for standardized genotypes (use standardized_matvec). variant_mask : np.ndarray Boolean array indicating which variants are causal. """ def __init__(self, effects: np.ndarray, standardized: bool, variant_mask: np.ndarray): self.effects = np.asarray(effects, dtype=np.float64) self.standardized = standardized self.variant_mask = np.asarray(variant_mask, dtype=bool) @property def m(self) -> int: """Total number of variants (causal + non-causal).""" return len(self.variant_mask) @property def m_causal(self) -> int: """Number of causal variants.""" return int(np.sum(self.variant_mask)) @property def k(self) -> int: """Number of traits (1 for univariate).""" if self.effects.ndim == 1: return 1 return self.effects.shape[1]
[docs] class AdditiveEffects(EffectSpec): """Univariate additive genetic effects. All variants are causal (variant_mask is all-True). Effect sizes are stored as a 1D array of shape (m,). Examples -------- Generate effects targeting h2 = 0.5 across 100 variants: >>> eff = AdditiveEffects.from_h2(h2=0.5, m=100, seed=42) >>> eff.m 100 >>> eff.k 1 Create from a known effect array: >>> import numpy as np >>> eff = AdditiveEffects.from_array(np.array([0.1, -0.2, 0.05])) >>> eff.m 3 """
[docs] @classmethod def from_h2(cls, h2: float, m: int, standardized: bool = True, seed: Optional[int] = None) -> "AdditiveEffects": """Generate additive effects to target a given heritability. Under standardized genotypes, Var(G) = sum(beta^2). We draw beta ~ N(0, h2/m) so E[sum(beta^2)] = h2. Parameters ---------- h2 : float Target heritability. m : int Number of variants (all causal). standardized : bool Whether effects are for standardized genotypes. seed : int, optional Random seed for reproducibility. Returns ------- AdditiveEffects """ rng = np.random.RandomState(seed) effects = rng.normal(0, np.sqrt(h2 / m), size=m) variant_mask = np.ones(m, dtype=bool) return cls(effects=effects, standardized=standardized, variant_mask=variant_mask)
[docs] @classmethod def from_array(cls, effects: np.ndarray, standardized: bool = True) -> "AdditiveEffects": """Create from a pre-specified effect array. Parameters ---------- effects : np.ndarray (m,) array of effect sizes. standardized : bool Whether effects are for standardized genotypes. Returns ------- AdditiveEffects """ effects = np.asarray(effects, dtype=np.float64) variant_mask = np.ones(len(effects), dtype=bool) return cls(effects=effects, standardized=standardized, variant_mask=variant_mask)
def __repr__(self) -> str: return (f"AdditiveEffects(m={self.m}, m_causal={self.m_causal}, " f"standardized={self.standardized})")
[docs] class MultivariateEffects(EffectSpec): """Multivariate correlated effects across k traits. Effect sizes are stored as a 2D array of shape (m, k) where k is the number of traits. All variants are causal. """
[docs] @classmethod def from_h2_rg(cls, h2: list, rg: float, m: int, standardized: bool = True, seed: Optional[int] = None) -> "MultivariateEffects": """ Generate multivariate effects from heritabilities and genetic correlation. Parameters ---------- h2 : list Per-trait heritabilities [h2_1, h2_2, ...]. rg : float Genetic correlation between traits. m : int Number of variants (all causal). standardized : bool Whether effects are for standardized genotypes. seed : int, optional Random seed. Returns ------- MultivariateEffects """ h2 = np.asarray(h2, dtype=np.float64) k = len(h2) # Build per-SNP covariance: Cov_snp = diag(sqrt(h2/m)) @ R @ diag(sqrt(h2/m)) sd = np.sqrt(h2 / m) R = np.full((k, k), rg) np.fill_diagonal(R, 1.0) cov_snp = np.outer(sd, sd) * R rng = np.random.RandomState(seed) effects = rng.multivariate_normal(np.zeros(k), cov_snp, size=m) variant_mask = np.ones(m, dtype=bool) return cls(effects=effects, standardized=standardized, variant_mask=variant_mask)
[docs] @classmethod def from_covg(cls, covg: np.ndarray, m: int, standardized: bool = True, seed: Optional[int] = None) -> "MultivariateEffects": """Generate multivariate effects from a full genetic covariance matrix. Parameters ---------- covg : np.ndarray (k, k) genetic covariance matrix. sum(beta @ beta.T) approx covg. m : int Number of variants. standardized : bool Whether effects are for standardized genotypes. seed : int, optional Random seed. Returns ------- MultivariateEffects """ covg = np.asarray(covg, dtype=np.float64) cov_snp = covg / m rng = np.random.RandomState(seed) k = covg.shape[0] effects = rng.multivariate_normal(np.zeros(k), cov_snp, size=m) variant_mask = np.ones(m, dtype=bool) return cls(effects=effects, standardized=standardized, variant_mask=variant_mask)
[docs] @classmethod def from_array(cls, effects: np.ndarray, standardized: bool = True) -> "MultivariateEffects": """Create from a pre-specified (m, k) effect matrix. Parameters ---------- effects : np.ndarray (m, k) array of effect sizes. standardized : bool Whether effects are for standardized genotypes. Returns ------- MultivariateEffects """ effects = np.asarray(effects, dtype=np.float64) variant_mask = np.ones(effects.shape[0], dtype=bool) return cls(effects=effects, standardized=standardized, variant_mask=variant_mask)
def __repr__(self) -> str: return (f"MultivariateEffects(m={self.m}, k={self.k}, " f"m_causal={self.m_causal}, standardized={self.standardized})")
[docs] class SparseEffects(EffectSpec): """Sparse causal effects -- only a subset of variants are causal. Non-causal variants have zero effect sizes. The ``variant_mask`` indicates which variants are causal. """
[docs] @classmethod def from_h2(cls, h2: float, m: int, k_causal: int, standardized: bool = True, seed: Optional[int] = None) -> "SparseEffects": """ Generate sparse additive effects. Parameters ---------- h2 : float Target heritability. m : int Total number of variants. k_causal : int Number of causal variants. standardized : bool Whether effects are for standardized genotypes. seed : int, optional Random seed. Returns ------- SparseEffects Raises ------ ValueError If ``k_causal > m``. """ if k_causal > m: raise ValueError(f"k_causal ({k_causal}) > m ({m})") rng = np.random.RandomState(seed) # Select causal variants causal_idx = rng.choice(m, size=k_causal, replace=False) causal_idx.sort() variant_mask = np.zeros(m, dtype=bool) variant_mask[causal_idx] = True # Effects: (m,) with zeros for non-causal effects = np.zeros(m, dtype=np.float64) effects[causal_idx] = rng.normal(0, np.sqrt(h2 / k_causal), size=k_causal) return cls(effects=effects, standardized=standardized, variant_mask=variant_mask)
def __repr__(self) -> str: return (f"SparseEffects(m={self.m}, m_causal={self.m_causal}, " f"standardized={self.standardized})")