Source code for limix.io.plink

[docs]def read(prefix, verbose=True): """ Read PLINK files into Pandas data frames. Parameters ---------- prefix : str Path prefix to the set of PLINK files. verbose : bool ``True`` for progress information; ``False`` otherwise. Returns ------- alleles : pandas dataframe samples : pandas dataframe genotype : ndarray Examples -------- .. doctest:: >>> from os.path import join >>> from limix.io import plink >>> from pandas_plink import get_data_folder >>> >>> (bim, fam, bed) = plink.read(join(get_data_folder(), "data"), verbose=False) >>> print(bim.head()) chrom snp cm pos a0 a1 i candidate rs10399749 1 rs10399749 0.00000 45162 G C 0 rs2949420 1 rs2949420 0.00000 45257 C T 1 rs2949421 1 rs2949421 0.00000 45413 0 0 2 rs2691310 1 rs2691310 0.00000 46844 A T 3 rs4030303 1 rs4030303 0.00000 72434 0 G 4 >>> print(fam.head()) fid iid father mother gender trait i sample Sample_1 Sample_1 Sample_1 0 0 1 -9.00000 0 Sample_2 Sample_2 Sample_2 0 0 2 -9.00000 1 Sample_3 Sample_3 Sample_3 Sample_1 Sample_2 2 -9.00000 2 >>> print(bed.compute()) # doctest: +FLOAT_CMP [[ 2. 2. 1.] [ 2. 1. 2.] [nan nan nan] [nan nan 1.] [ 2. 2. 2.] [ 2. 2. 2.] [ 2. 1. 0.] [ 2. 2. 2.] [ 1. 2. 2.] [ 2. 1. 2.]] Notice the ``i`` column in bim and fam data frames. It maps to the corresponding position of the bed matrix: .. doctest:: >>> from os.path import join >>> from limix.io import plink >>> from pandas_plink import get_data_folder >>> >>> (bim, fam, bed) = plink.read(join(get_data_folder(), "data"), verbose=False) >>> chrom1 = bim.query("chrom=='1'") >>> X = bed[chrom1.i.values, :].compute() >>> print(X) # doctest: +FLOAT_CMP [[ 2. 2. 1.] [ 2. 1. 2.] [nan nan nan] [nan nan 1.] [ 2. 2. 2.] [ 2. 2. 2.] [ 2. 1. 0.] [ 2. 2. 2.] [ 1. 2. 2.] [ 2. 1. 2.]] """ from pandas_plink import read_plink from .._display import session_line with session_line("Reading `{}`...\n".format(prefix), disable=not verbose): data = read_plink(prefix, verbose=verbose) if verbose: # Clear up the progress bar and get back to the initial line. print("\033[1A\033[K\033[1A", end="") data[1].name = "fam" data[1].index = data[1]["iid"] data[1].index.name = "sample" data[0].name = "bim" data[0].index = data[0]["snp"].astype(str).values data[0].index.name = "candidate" return data
def read_pheno(filepath): from numpy import atleast_2d, asarray from os.path import basename, splitext from xarray import DataArray from .csv import read y = read(filepath, header=None, verbose=False) sample_ids = y.iloc[:, 1].tolist() name = splitext(basename(filepath))[0] y = atleast_2d(asarray(y.iloc[:, 2].values, float)).T y = DataArray(y, dims=["sample", "trait"], coords=[sample_ids, [name]]) return y def _read_dosage(prefix, verbose): from pandas_plink import read_plink return read_plink(prefix, verbose=verbose)[2].T def _see_bed(filepath, verbose): from .._display import draw_dataframe (bim, fam, _) = read(filepath, verbose=verbose) print(draw_dataframe("Samples", bim)) print(draw_dataframe("Genotype", fam)) def _see_kinship(filepath, verbose): from .. import plot from .._display import session_line if filepath.endswith(".grm.raw"): with session_line("Reading {}... ".format(filepath), disable=not verbose): K = _read_grm_raw(filepath) else: print("File %s not found." % filepath) return if verbose: print("Plotting...") plot.kinship(K) def _read_grm_raw(filepath): from numpy import loadtxt return loadtxt(filepath) def _read_bed(filepath): pass