import sys
from limix._display import session_line
from .._bits import unvec
from .._data import asarray as _asarray, conform_dataset, normalize_likelihood
from .._display import session_block
from ._assert import assert_finite
from ._result import IScanResultFactory
[docs]def iscan(G, y, lik="normal", K=None, M=None, idx=None, E0=None, E1=None, verbose=True):
r"""
Single-trait association with interaction test via generalized linear mixed models.
The general formulae for normally distributed traits is
.. math::
๐ฒ = ๐ผ๐ + (๐ถโ๐ดโ)๐โ + (๐ถโ๐ดโ)๐โ + ๐ฎ + ๐,\\
\text{where}~~ ๐ฎโผ๐(๐, ๐โ๐บ) ~~\text{and}~~ ๐โผ๐(๐, ๐โ๐ธ).
The operator โ works as follows:
.. math::
๐ฐโ๐ฑ = [๐ฐโ๐ฑโ ~~...~~ ๐ฐโ๐ฑโ ~~ ๐ฐโ๐ฑโ ~~...~~ ๐ฐโ๐ฑโ ~~...~~ ๐ฐโ๐ฑโ]
The covariates is enconded in matrix ๐ผ while the candidate set is enconded in matrix
๐ถ. The parameters are the effect sizes ๐, ๐โ, and ๐โ, and the variances ๐โ and ๐โ.
It performs likelihood-ratio tests for the following cases, where the first
hypothesis is the null one while the second hypothesis is the alternative one:
- Hโ vs Hโ: testing for vec(๐โ) โ ๐ while vec(๐โ) = ๐
- Hโ vs Hโ: testing for [vec(๐โ) vec(๐โ)] โ ๐
- Hโ vs Hโ: testing for vec(๐โ) โ ๐
It also supports generalized linear mixed models (GLMM). In this case, the following
likelihoods are implemented:
- Bernoulli
- Probit
- Binomial
- Poisson
Formally, let p(๐) be one of the supported probability distributions where ๐ is
its mean. The Hโ model is defined as follows:
.. math::
yแตข โผ p(๐แตข=g(zแตข)) ~~\text{for}~~ ๐ณ โผ ๐(๐ผ๐ + (๐ถโ๐ดโ)๐โ + (๐ถโ๐ดโ)๐โ, ๐โ๐บ + ๐โ๐ธ).
g(โ
) is the corresponding canonical link function for the Bernoulli, Binomial, and
Poisson likelihoods. The Probit likelihood, on the other hand, is a Bernoulli
likelihood with probit link function.
Parameters
----------
G : nรm array_like
Genetic candidates.
Y : nรp array_like
Rows are samples and columns are phenotypes.
lik : tuple, "normal", "bernoulli", "probit", "binomial", "poisson"
Sample likelihood describing the residual distribution.
Either a tuple or a string specifying the likelihood is required. The Normal,
Bernoulli, Probit, and Poisson likelihoods can be selected by providing a
string. Binomial likelihood on the other hand requires a tuple because of the
number of trials: ``("binomial", array_like)``. Defaults to ``"normal"``.
K : nรn array_like
Sample covariance, often the so-called kinship matrix.
M : nรc array_like
Covariates matrix.
idx : list
List of candidate indices that defines the set of candidates to be used in the
tests.
E0 : array_like
Matrix representing the first environment.
E1 : array_like
Matrix representing the second environment.
verbose : bool, optional
``True`` to display progress and summary; ``False`` otherwise.
Returns
-------
result : :class:`limix.qtl._result.IScanResult`
P-values, log of marginal likelihoods, effect sizes, and associated statistics.
Notes
-----
It will raise a ``ValueError`` exception if non-finite values are passed. Please,
refer to the :func:`limix.qc.mean_impute` function for missing value imputation.
"""
from numpy_sugar.linalg import economic_qs
from xarray import concat
from numpy import asarray, empty, ones
lik = normalize_likelihood(lik)
lik_name = lik[0]
with session_block("QTL analysis", disable=not verbose):
with session_line("Normalising input... ", disable=not verbose):
data = conform_dataset(y, M, G=G, K=K)
Y = data["y"]
M = data["M"]
G = data["G"]
K = data["K"]
assert_finite(y, M, K)
nsamples = y.shape[0]
if E1 is None:
E1 = ones((nsamples, 1))
if E0 is None:
E0 = empty((nsamples, 0))
E0 = _asarray(E0, "env0", ["sample", "env"])
E1 = _asarray(E1, "env1", ["sample", "env"])
E01 = concat([E0, E1], dim="env")
if K is not None:
QS = economic_qs(K)
else:
QS = None
if lik_name == "normal":
scanner, v0, v1 = _lmm(Y.values.ravel(), M.values, QS, verbose)
else:
scanner, v0, v1 = _glmm(Y.values.ravel(), lik, M.values, QS, verbose)
r = IScanResultFactory(
lik_name,
Y.trait,
M.covariate,
G.candidate,
E0.env,
E1.env,
scanner.null_lml,
scanner.null_beta,
scanner.null_beta_se,
v0,
v1,
)
if idx is None:
assert E1.shape[1] > 0
idx = range(G.shape[1])
if E0.shape[1] == 0:
r1 = scanner.fast_scan(G, verbose)
for i in idx:
i = _2d_sel(i)
g = asarray(G[:, i], float)
if E0.shape[1] > 0:
r1 = scanner.scan(g, E0)
h1 = _normalise_scan_names(r1)
else:
h1 = _normalise_scan_names({k: v[i] for k, v in r1.items()})
h1["covariate_effsizes"] = h1["covariate_effsizes"].ravel()
h1["covariate_effsizes_se"] = h1["covariate_effsizes_se"].ravel()
r2 = scanner.scan(g, E01)
h2 = _normalise_scan_names(r2)
r.add_test(i, h1, h2)
else:
for i in idx:
i = _2d_sel(i)
g = asarray(G[:, i], float)
r1 = scanner.scan(g, E0)
r2 = scanner.scan(g, E01)
h1 = _normalise_scan_names(r1)
h2 = _normalise_scan_names(r2)
r.add_test(i, h1, h2)
r = r.create()
if verbose:
print(r)
return r
def _normalise_scan_names(r):
return {
"lml": r["lml"],
"covariate_effsizes": r["effsizes0"],
"covariate_effsizes_se": r["effsizes0_se"],
"candidate_effsizes": r["effsizes1"],
"candidate_effsizes_se": r["effsizes1_se"],
"scale": r["scale"],
}
def _2d_sel(idx):
from collections.abc import Iterable
if not isinstance(idx, (slice, Iterable)):
return [idx]
return idx
class ScannerWrapper:
def __init__(self, scanner):
self._scanner = scanner
@property
def null_lml(self):
return self._scanner.null_lml()
@property
def null_beta(self):
return self._scanner.null_beta
@property
def null_beta_se(self):
from numpy import sqrt
se = sqrt(self._scanner.null_beta_covariance.diagonal())
return se
def fast_scan(self, G, verbose):
r = self._scanner.fast_scan(G, verbose=verbose)
r["effsizes1"] = unvec(r["effsizes1"], (-1, G.shape[1])).T
r["effsizes1_se"] = unvec(r["effsizes1_se"], (-1, G.shape[1])).T
return r
def scan(self, G, E):
from .._bits import cdot
r = self._scanner.scan(cdot(G, E))
r["effsizes1"] = unvec(r["effsizes1"], (-1, G.shape[1])).T
r["effsizes1_se"] = unvec(r["effsizes1_se"], (-1, G.shape[1])).T
return r
def _lmm(y, M, QS, verbose):
from glimix_core.lmm import LMM
lmm = LMM(y, M, QS, restricted=False)
lmm.fit(verbose=verbose)
sys.stdout.flush()
if QS is None:
v0 = None
else:
v0 = lmm.v0
v1 = lmm.v1
scanner = ScannerWrapper(lmm.get_fast_scanner())
return scanner, v0, v1
def _glmm(y, lik, M, QS, verbose):
from glimix_core.glmm import GLMMExpFam, GLMMNormal
glmm = GLMMExpFam(y.ravel(), lik, M, QS)
glmm.fit(verbose=verbose)
v0 = glmm.v0
v1 = glmm.v1
sys.stdout.flush()
eta = glmm.site.eta
tau = glmm.site.tau
gnormal = GLMMNormal(eta, tau, M, QS)
gnormal.fit(verbose=verbose)
scanner = ScannerWrapper(gnormal.get_fast_scanner())
return scanner, v0, v1