I/O module¶

BIMBAM¶

>>> import limix
>>>
>>> url = "http://rest.s3for.me/limix/example/phenotype.gemma"
>>> filepath = limix.sh.download(url, verbose=False)
>>> print(limix.io.bimbam.read_phenotype(filepath, verbose=False))
trait         0        1        2
sample
0       1.20000 -0.30000 -1.50000
1           nan  1.50000  0.30000
2       2.70000  1.10000      nan
3      -0.20000 -0.70000  0.80000
4       3.30000  2.40000  2.10000
>>> limix.sh.remove(filepath)

BGEN reader¶

>>> url = "http://rest.s3for.me/bgen-reader/haplotypes.bgen"
>>> filepath = limix.sh.download(url, verbose=False)
>>>
>>> data = limix.io.bgen.read(filepath, verbose=False)
>>> print(data.keys())
dict_keys(['variants', 'samples', 'genotype'])
>>> print(data["variants"].head(4))
         id rsid chrom  pos  nalleles allele_ids  vaddr
0  SNP1  RS1     1    1         2        A,G    102
1  SNP2  RS2     1    2         2        A,G    159
2  SNP3  RS3     1    3         2        A,G    216
3  SNP4  RS4     1    4         2        A,G    273
>>> print(data["samples"])
0    sample_0
1    sample_1
2    sample_2
3    sample_3
Name: id, dtype: object
>>> geno = data["genotype"][0].compute()
>>> print(geno.keys())
dict_keys(['probs', 'phased', 'ploidy', 'missing'])
>>> print(geno["probs"])
[[1. 0. 1. 0.]
 [0. 1. 1. 0.]
 [1. 0. 0. 1.]
 [0. 1. 0. 1.]]
>>> limix.sh.remove(filepath)
>>> limix.sh.remove(filepath + ".metadata")

CSV reader¶

>>> url = "http://rest.s3for.me/limix/expr.csv"
>>> filepath = limix.sh.download(url, verbose=False)
>>> data = limix.io.csv.read(filepath, verbose=False)
>>> print(data.head())
       HG00111  HG00112  HG00116  HG00121  HG00133  HG00135  HG00142  HG00143  \
gene1 -3.75235 -0.42113 -0.53629 -0.90768 -0.25189 -0.60300 -0.31069  0.28849
gene2 -0.35145  1.28258 -2.83505  0.32953 -0.50711 -0.81912 -1.37971  0.26906
gene3 -1.31997  1.08197  0.28400 -0.41318  0.14609 -0.14714  0.30255  0.69654
gene4 -0.75163  0.37668 -0.23564  1.06111  0.58524  0.60962 -2.02384 -1.29969
gene5  0.06464  0.64204 -0.81127 -1.42806 -0.89599 -0.01391  0.34385 -0.48492

       HG00151  HG00152  HG00154  HG00159  HG00160  HG00171  HG00173  HG00179  \
gene1 -1.72944 -1.69063  0.23706 -0.70690  0.31554  0.41398  1.53933  1.64413
gene2 -0.98741 -1.38205 -1.49273  1.43818  0.60203  0.72495  1.33730 -0.54032
gene3  0.92997 -0.97183  0.73781  0.13841 -0.27796 -0.30850 -1.37364  0.02908
gene4  0.74860  0.35024  0.46494  0.26519 -1.04980 -0.10405  0.24636  0.39698
gene5 -1.56242 -0.69343 -0.67140 -0.97220  0.51523  0.84428  0.37633  0.20097

       HG00189  HG00190  HG00232  HG00233  HG00239  HG00245  HG00253  HG00257  \
gene1  0.65387  0.68840 -0.31933 -0.05787  0.18941 -0.96866  0.95566  0.09096
gene2  0.23202  0.62984  0.36502 -0.54166 -0.17954  1.56841  0.41138  0.85125
gene3  0.38357  1.53011  1.03765  1.66079  0.47459 -0.61217 -0.75705 -1.23464
gene4  0.22995 -0.74410  0.13555  1.11415 -0.27491 -0.63577  0.65697 -0.78026
gene5  0.22179 -0.88524 -0.90036 -0.30308 -0.47617 -0.65303  0.31526  0.21940

       HG00263  HG00274  HG00281  HG00284  HG00309  HG00318  HG00319  HG00330  \
gene1 -0.84906 -0.38422 -0.20839  1.76988 -0.08126 -0.11885  0.06195 -0.39166
gene2 -0.68103  1.49606  0.03634  0.30436 -0.81669 -0.41680 -0.68541  1.05676
gene3 -0.39850 -0.04262  0.58753 -1.79663  0.06896  0.26583 -0.09755  1.30798
gene4 -0.03511  1.17201 -0.60239 -0.04585  1.52125 -0.01333 -1.03177 -1.04258
gene5 -1.44331  1.77727  0.03294 -0.90860  2.01101  1.30201  1.47212 -1.02498

       HG00331  HG00332  HG00343  HG00344  HG00351  HG00357  HG00369  ...  NA19819  \
gene1  1.19368 -2.60319 -0.00044  0.26917  0.35906  0.32333 -0.58944  ...  1.14834
gene2 -1.35361 -1.17153 -0.75354  1.08028  0.30128  1.19826  2.25417  ...  0.70334
gene3  0.84178  0.28865 -1.17596 -0.16204  0.24943  1.08654 -0.82590  ... -2.01485
gene4  0.63385 -1.60319  0.49055  0.53223 -0.00883 -1.39597 -0.24307  ...  0.09122
gene5 -0.48581 -1.59411 -1.25390 -0.78523  2.15585  1.15443  0.16866  ...  0.64129

       NA19834  NA19908  NA19920  NA20127  NA20278  NA20281  NA20287  NA20291  \
gene1 -0.33572 -0.48395  0.28840 -0.19132  1.46027 -0.49558 -0.05913 -0.67027
gene2 -1.12835 -1.56760 -1.53615  1.30383 -0.83380  0.08954  1.35263 -0.76398
gene3 -0.65052  1.78800  0.54407 -0.19086 -0.54339  0.00340  0.93919  2.35539
gene4 -1.28366  0.49670  0.51998  0.01628  0.65825  1.42326  0.75003 -0.46688
gene5 -0.38025 -0.99212  0.86387 -1.30035  0.36494 -0.74369  0.97632 -1.41972

       NA20314  NA20334  NA20339  NA20341  NA20344  NA20348  NA20357  NA20359  \
gene1  1.14834  0.03650 -0.39326  0.28873 -0.82685 -0.33570  0.35864 -0.88629
gene2 -0.73860  0.97905 -0.03794  0.52208  0.15237  0.05513 -0.43515  1.78638
gene3 -0.24083 -0.03304 -0.02394  0.63280 -0.14199  0.21436  0.01104  0.19409
gene4  1.02439  0.44723 -1.26319  0.04781  0.42133 -2.11834  0.59976 -0.45331
gene5 -0.68538  0.98479  0.58614  1.32101 -1.28100 -0.24468 -1.50173 -0.23772

       NA20412  NA20414  NA20505  NA20507  NA20508  NA20517  NA20518  NA20521  \
gene1 -0.27868 -0.88236  0.85370  0.12215 -0.22804  0.11440  1.15530 -0.64616
gene2 -0.13907 -1.79076 -1.06388  1.84062  0.81188  0.74989  1.13613 -0.67296
gene3  0.20491 -1.09471 -0.41158 -0.19403  0.62979  1.45114 -0.68828 -0.21306
gene4  2.07913  0.64254  1.19309  1.36727  1.36291 -0.20257 -1.91187 -0.80394
gene5  0.55915 -0.70109  1.10276  0.19700 -1.00590 -0.19778  0.70381  1.32831

       NA20525  NA20527  NA20534  NA20537  NA20581  NA20582  NA20753  NA20754  \
gene1 -1.00890  0.33830 -1.18606 -2.50144  1.11857 -1.35514 -0.45410 -1.40787
gene2 -0.52930 -0.08037  1.49958  0.48022  1.90088  1.19142 -1.07944  1.06962
gene3  1.28033 -1.03548  0.30717 -0.60877  0.14828 -0.02566  0.68297 -1.41462
gene4  0.84287  2.31155 -0.45076  0.27237  0.25196  0.13814 -1.58961 -0.61954
gene5 -0.13948 -0.59769  1.28226  0.56941 -3.31790  0.63016  0.17360  2.28942

       NA20768  NA20771  NA20772  NA20774  NA20775  NA20804
gene1  0.74605 -1.92301  0.52952  0.59285 -0.25449 -0.42643
gene2  0.21579  0.84464  0.72602  0.17902 -1.18471  1.22427
gene3  0.02000 -0.00026 -0.43102  1.03429 -2.04323 -0.61358
gene4  0.20742 -1.52664 -0.02818  0.29817  0.01488  0.26651
gene5 -0.82307 -1.15892 -1.14967 -0.13977  0.84840  0.99755

[5 rows x 274 columns]
>>> limix.sh.remove(filepath)

HDF5 reader¶

>>> url = "http://rest.s3for.me/limix/smith08.hdf5.bz2"
>>> filepath = limix.sh.download(url, verbose=False)
>>> filepath = limix.sh.extract(filepath, verbose=False)
>>> data = limix.io.hdf5.read_limix(filepath)
>>> print(data)
{'phenotype': <xarray.DataArray 'phenotype' (sample: 109, outcome: 10986)>
array([[-0.037339, -0.078165,  0.042936, ...,  0.095596, -0.132385, -0.274954],
       [-0.301376,  0.066055,  0.338624, ..., -0.142661, -0.238349,  0.732752],
       [ 0.002661,  0.121835, -0.137064, ..., -0.144404,  0.257615,  0.015046],
       ...,
       [-0.287339,  0.351835,  0.072936, ...,  0.097339, -0.038349,  0.162752],
       [-0.577339,  0.011835, -0.007064, ...,  0.135596,  0.107615,  0.245046],
       [-0.277339,  0.061835,  0.132936, ...,  0.015596, -0.142385, -0.124954]])
Coordinates:
  * sample        (sample) int64 0 1 2 3 4 5 6 7 ... 102 103 104 105 106 107 108
    environment   (outcome) float64 0.0 0.0 0.0 0.0 0.0 ... 1.0 1.0 1.0 1.0 1.0
    gene_ID       (outcome) object 'YOL161C' 'YJR107W' ... 'YLR118C' 'YBR242W'
    gene_chrom    (outcome) object '15' '10' '16' '7' '4' ... '3' '10' '12' '2'
    gene_end      (outcome) int64 11548 628319 32803 ... 315049 384726 705381
    gene_start    (outcome) int64 11910 627333 30482 ... 315552 385409 704665
    gene_strand   (outcome) object 'C' 'W' 'W' 'W' 'W' ... 'W' 'W' 'C' 'C' 'W'
    phenotype_ID  (outcome) object 'YOL161C:0' 'YJR107W:0' ... 'YBR242W:1'
Dimensions without coordinates: outcome, 'genotype': <xarray.DataArray 'genotype' (sample: 109, candidate: 2956)>
array([[1., 1., 1., ..., 0., 0., 0.],
       [1., 0., 1., ..., 1., 1., 1.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 1., 1.],
       [0., 0., 0., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.]])
Coordinates:
  * sample   (sample) int64 0 1 2 3 4 5 6 7 ... 101 102 103 104 105 106 107 108
    chrom    (candidate) int64 1 1 1 1 1 1 1 1 1 ... 16 16 16 16 16 16 16 16 16
    pos      (candidate) int64 483 484 3220 3223 ... 927506 932310 932535 932538
    pos_cum  (candidate) int64 483 484 3220 3223 ... 12055570 12055795 12055798
Dimensions without coordinates: candidate}
>>> limix.sh.remove(filepath)

NumPy reader¶

>>> url = "http://rest.s3for.me/limix/example.npy"
>>> filepath = limix.sh.download(url, verbose=False)
>>> K = limix.io.npy.read(filepath, verbose=False)
>>> print(K)
[[0.67003303 0.09512837 0.09346511 0.09252165 0.09679249]
 [0.09512837 0.66972454 0.09344451 0.09109398 0.09347495]
 [0.09346511 0.09344451 0.67305621 0.08987969 0.09689215]
 [0.09252165 0.09109398 0.08987969 0.67209248 0.09378162]
 [0.09679249 0.09347495 0.09689215 0.09378162 0.66773896]]
>>> limix.sh.remove(filepath)

PLINK reader¶

>>> from os.path import join
>>> from pandas_plink import get_data_folder
>>>
>>> (bim, fam, bed) = limix.io.plink.read(join(get_data_folder(), "data"),
...                                       verbose=False)
>>> print(bim.head())
           chrom         snp       cm    pos a0 a1  i
candidate
rs10399749     1  rs10399749  0.00000  45162  G  C  0
rs2949420      1   rs2949420  0.00000  45257  C  T  1
rs2949421      1   rs2949421  0.00000  45413  0  0  2
rs2691310      1   rs2691310  0.00000  46844  A  T  3
rs4030303      1   rs4030303  0.00000  72434  0  G  4
>>> print(fam.head())
               fid       iid    father    mother gender    trait  i
sample
Sample_1  Sample_1  Sample_1         0         0      1 -9.00000  0
Sample_2  Sample_2  Sample_2         0         0      2 -9.00000  1
Sample_3  Sample_3  Sample_3  Sample_1  Sample_2      2 -9.00000  2
>>> print(bed.compute())
[[ 2.  2.  1.]
 [ 2.  1.  2.]
 [nan nan nan]
 [nan nan  1.]
 [ 2.  2.  2.]
 [ 2.  2.  2.]
 [ 2.  1.  0.]
 [ 2.  2.  2.]
 [ 1.  2.  2.]
 [ 2.  1.  2.]]