Source code for limix.stats._kinship

from __future__ import division


[docs]def linear_kinship(G, out=None, verbose=True): """ Estimate Kinship matrix via linear kernel. Let ๐‘‘ be the number of columns of ``G``. The resulting matrix is given by: .. math:: ๐™บ = ๐š‡๐š‡แต€/๐‘‘ where .. math:: ๐š‡แตขโฑผ = (๐™ถแตขโฑผ - ๐‘šโฑผ) / ๐‘ โฑผ is the matrix ``G`` column-wise normalized by means ๐‘šโฑผ and standard deviations ๐‘ โฑผ. NaNs are ignored so as to produce matrix ``K`` having only real numbers. This functions is specially designed to also handle large matrices ``G`` that would otherwise require a large amount of memory if it were to be loaded in memory first. For those cases, libraries like Dask come in handy. Parameters ---------- G : array_like Samples-by-variants matrix. out : ndarray A location into which the result is stored. verbose : bool, optional ``True`` for showing progress; ``False`` otherwise. Defauts to ``True``. Examples -------- .. doctest:: >>> from numpy.random import RandomState >>> from limix.stats import linear_kinship >>> >>> random = RandomState(1) >>> X = random.randn(4, 100) >>> K = linear_kinship(X, verbose=False) >>> print(K) # doctest: +FLOAT_CMP [[ 0.91314823 -0.19283362 -0.34133897 -0.37897564] [-0.19283362 0.89885153 -0.2356003 -0.47041761] [-0.34133897 -0.2356003 0.95777313 -0.38083386] [-0.37897564 -0.47041761 -0.38083386 1.23022711]] """ from numpy import sqrt, zeros, asfortranarray, where, asarray, nanmean, std, isnan from scipy.linalg.blas import get_blas_funcs from tqdm import tqdm (n, p) = G.shape if out is None: out = zeros((n, n), order="F") else: out = asfortranarray(out) chunks = _get_chunks(G) gemm = get_blas_funcs("gemm", [out]) start = 0 for chunk in tqdm(chunks, desc="Kinship", disable=not verbose): end = start + chunk g = asarray(G[:, start:end]) m = nanmean(g, 0) g = where(isnan(g), m, g) g = g - m g /= std(g, 0) g /= sqrt(p) gemm(1.0, g, g, 1.0, out, 0, 1, 1) start = end return out
def _get_chunks(G): from numpy import isfinite if hasattr(G, "chunks") and G.chunks is not None: if len(G.chunks) > 1 and all(isfinite(G.chunks[0])): return G.chunks[1] siz = G.shape[1] // 100 sizl = G.shape[1] - siz * 100 chunks = [siz] * 100 if sizl > 0: chunks += [sizl] return tuple(chunks)