I/O module¶
BIMBAM¶
>>> import limix
>>>
>>> url = "http://rest.s3for.me/limix/example/phenotype.gemma"
>>> filepath = limix.sh.download(url, verbose=False)
>>> print(limix.io.bimbam.read_phenotype(filepath, verbose=False))
trait 0 1 2
sample
0 1.20000 -0.30000 -1.50000
1 nan 1.50000 0.30000
2 2.70000 1.10000 nan
3 -0.20000 -0.70000 0.80000
4 3.30000 2.40000 2.10000
>>> limix.sh.remove(filepath)
BGEN reader¶
>>> url = "http://rest.s3for.me/bgen-reader/haplotypes.bgen"
>>> filepath = limix.sh.download(url, verbose=False)
>>>
>>> data = limix.io.bgen.read(filepath, verbose=False)
>>> print(data.keys())
dict_keys(['variants', 'samples', 'genotype'])
>>> print(data["variants"].head(4))
id rsid chrom pos nalleles allele_ids vaddr
0 SNP1 RS1 1 1 2 A,G 102
1 SNP2 RS2 1 2 2 A,G 159
2 SNP3 RS3 1 3 2 A,G 216
3 SNP4 RS4 1 4 2 A,G 273
>>> print(data["samples"])
0 sample_0
1 sample_1
2 sample_2
3 sample_3
Name: id, dtype: object
>>> geno = data["genotype"][0].compute()
>>> print(geno.keys())
dict_keys(['probs', 'phased', 'ploidy', 'missing'])
>>> print(geno["probs"])
[[1. 0. 1. 0.]
[0. 1. 1. 0.]
[1. 0. 0. 1.]
[0. 1. 0. 1.]]
>>> limix.sh.remove(filepath)
>>> limix.sh.remove(filepath + ".metadata")
CSV reader¶
>>> url = "http://rest.s3for.me/limix/expr.csv"
>>> filepath = limix.sh.download(url, verbose=False)
>>> data = limix.io.csv.read(filepath, verbose=False)
>>> print(data.head())
HG00111 HG00112 HG00116 HG00121 HG00133 HG00135 HG00142 HG00143 \
gene1 -3.75235 -0.42113 -0.53629 -0.90768 -0.25189 -0.60300 -0.31069 0.28849
gene2 -0.35145 1.28258 -2.83505 0.32953 -0.50711 -0.81912 -1.37971 0.26906
gene3 -1.31997 1.08197 0.28400 -0.41318 0.14609 -0.14714 0.30255 0.69654
gene4 -0.75163 0.37668 -0.23564 1.06111 0.58524 0.60962 -2.02384 -1.29969
gene5 0.06464 0.64204 -0.81127 -1.42806 -0.89599 -0.01391 0.34385 -0.48492
HG00151 HG00152 HG00154 HG00159 HG00160 HG00171 HG00173 HG00179 \
gene1 -1.72944 -1.69063 0.23706 -0.70690 0.31554 0.41398 1.53933 1.64413
gene2 -0.98741 -1.38205 -1.49273 1.43818 0.60203 0.72495 1.33730 -0.54032
gene3 0.92997 -0.97183 0.73781 0.13841 -0.27796 -0.30850 -1.37364 0.02908
gene4 0.74860 0.35024 0.46494 0.26519 -1.04980 -0.10405 0.24636 0.39698
gene5 -1.56242 -0.69343 -0.67140 -0.97220 0.51523 0.84428 0.37633 0.20097
HG00189 HG00190 HG00232 HG00233 HG00239 HG00245 HG00253 HG00257 \
gene1 0.65387 0.68840 -0.31933 -0.05787 0.18941 -0.96866 0.95566 0.09096
gene2 0.23202 0.62984 0.36502 -0.54166 -0.17954 1.56841 0.41138 0.85125
gene3 0.38357 1.53011 1.03765 1.66079 0.47459 -0.61217 -0.75705 -1.23464
gene4 0.22995 -0.74410 0.13555 1.11415 -0.27491 -0.63577 0.65697 -0.78026
gene5 0.22179 -0.88524 -0.90036 -0.30308 -0.47617 -0.65303 0.31526 0.21940
HG00263 HG00274 HG00281 HG00284 HG00309 HG00318 HG00319 HG00330 \
gene1 -0.84906 -0.38422 -0.20839 1.76988 -0.08126 -0.11885 0.06195 -0.39166
gene2 -0.68103 1.49606 0.03634 0.30436 -0.81669 -0.41680 -0.68541 1.05676
gene3 -0.39850 -0.04262 0.58753 -1.79663 0.06896 0.26583 -0.09755 1.30798
gene4 -0.03511 1.17201 -0.60239 -0.04585 1.52125 -0.01333 -1.03177 -1.04258
gene5 -1.44331 1.77727 0.03294 -0.90860 2.01101 1.30201 1.47212 -1.02498
HG00331 HG00332 HG00343 HG00344 HG00351 HG00357 HG00369 ... NA19819 \
gene1 1.19368 -2.60319 -0.00044 0.26917 0.35906 0.32333 -0.58944 ... 1.14834
gene2 -1.35361 -1.17153 -0.75354 1.08028 0.30128 1.19826 2.25417 ... 0.70334
gene3 0.84178 0.28865 -1.17596 -0.16204 0.24943 1.08654 -0.82590 ... -2.01485
gene4 0.63385 -1.60319 0.49055 0.53223 -0.00883 -1.39597 -0.24307 ... 0.09122
gene5 -0.48581 -1.59411 -1.25390 -0.78523 2.15585 1.15443 0.16866 ... 0.64129
NA19834 NA19908 NA19920 NA20127 NA20278 NA20281 NA20287 NA20291 \
gene1 -0.33572 -0.48395 0.28840 -0.19132 1.46027 -0.49558 -0.05913 -0.67027
gene2 -1.12835 -1.56760 -1.53615 1.30383 -0.83380 0.08954 1.35263 -0.76398
gene3 -0.65052 1.78800 0.54407 -0.19086 -0.54339 0.00340 0.93919 2.35539
gene4 -1.28366 0.49670 0.51998 0.01628 0.65825 1.42326 0.75003 -0.46688
gene5 -0.38025 -0.99212 0.86387 -1.30035 0.36494 -0.74369 0.97632 -1.41972
NA20314 NA20334 NA20339 NA20341 NA20344 NA20348 NA20357 NA20359 \
gene1 1.14834 0.03650 -0.39326 0.28873 -0.82685 -0.33570 0.35864 -0.88629
gene2 -0.73860 0.97905 -0.03794 0.52208 0.15237 0.05513 -0.43515 1.78638
gene3 -0.24083 -0.03304 -0.02394 0.63280 -0.14199 0.21436 0.01104 0.19409
gene4 1.02439 0.44723 -1.26319 0.04781 0.42133 -2.11834 0.59976 -0.45331
gene5 -0.68538 0.98479 0.58614 1.32101 -1.28100 -0.24468 -1.50173 -0.23772
NA20412 NA20414 NA20505 NA20507 NA20508 NA20517 NA20518 NA20521 \
gene1 -0.27868 -0.88236 0.85370 0.12215 -0.22804 0.11440 1.15530 -0.64616
gene2 -0.13907 -1.79076 -1.06388 1.84062 0.81188 0.74989 1.13613 -0.67296
gene3 0.20491 -1.09471 -0.41158 -0.19403 0.62979 1.45114 -0.68828 -0.21306
gene4 2.07913 0.64254 1.19309 1.36727 1.36291 -0.20257 -1.91187 -0.80394
gene5 0.55915 -0.70109 1.10276 0.19700 -1.00590 -0.19778 0.70381 1.32831
NA20525 NA20527 NA20534 NA20537 NA20581 NA20582 NA20753 NA20754 \
gene1 -1.00890 0.33830 -1.18606 -2.50144 1.11857 -1.35514 -0.45410 -1.40787
gene2 -0.52930 -0.08037 1.49958 0.48022 1.90088 1.19142 -1.07944 1.06962
gene3 1.28033 -1.03548 0.30717 -0.60877 0.14828 -0.02566 0.68297 -1.41462
gene4 0.84287 2.31155 -0.45076 0.27237 0.25196 0.13814 -1.58961 -0.61954
gene5 -0.13948 -0.59769 1.28226 0.56941 -3.31790 0.63016 0.17360 2.28942
NA20768 NA20771 NA20772 NA20774 NA20775 NA20804
gene1 0.74605 -1.92301 0.52952 0.59285 -0.25449 -0.42643
gene2 0.21579 0.84464 0.72602 0.17902 -1.18471 1.22427
gene3 0.02000 -0.00026 -0.43102 1.03429 -2.04323 -0.61358
gene4 0.20742 -1.52664 -0.02818 0.29817 0.01488 0.26651
gene5 -0.82307 -1.15892 -1.14967 -0.13977 0.84840 0.99755
[5 rows x 274 columns]
>>> limix.sh.remove(filepath)
HDF5 reader¶
>>> url = "http://rest.s3for.me/limix/smith08.hdf5.bz2"
>>> filepath = limix.sh.download(url, verbose=False)
>>> filepath = limix.sh.extract(filepath, verbose=False)
>>> data = limix.io.hdf5.read_limix(filepath)
>>> print(data)
{'phenotype': <xarray.DataArray 'phenotype' (sample: 109, outcome: 10986)>
array([[-0.037339, -0.078165, 0.042936, ..., 0.095596, -0.132385, -0.274954],
[-0.301376, 0.066055, 0.338624, ..., -0.142661, -0.238349, 0.732752],
[ 0.002661, 0.121835, -0.137064, ..., -0.144404, 0.257615, 0.015046],
...,
[-0.287339, 0.351835, 0.072936, ..., 0.097339, -0.038349, 0.162752],
[-0.577339, 0.011835, -0.007064, ..., 0.135596, 0.107615, 0.245046],
[-0.277339, 0.061835, 0.132936, ..., 0.015596, -0.142385, -0.124954]])
Coordinates:
* sample (sample) int64 0 1 2 3 4 5 6 7 ... 102 103 104 105 106 107 108
environment (outcome) float64 0.0 0.0 0.0 0.0 0.0 ... 1.0 1.0 1.0 1.0 1.0
gene_ID (outcome) object 'YOL161C' 'YJR107W' ... 'YLR118C' 'YBR242W'
gene_chrom (outcome) object '15' '10' '16' '7' '4' ... '3' '10' '12' '2'
gene_end (outcome) int64 11548 628319 32803 ... 315049 384726 705381
gene_start (outcome) int64 11910 627333 30482 ... 315552 385409 704665
gene_strand (outcome) object 'C' 'W' 'W' 'W' 'W' ... 'W' 'W' 'C' 'C' 'W'
phenotype_ID (outcome) object 'YOL161C:0' 'YJR107W:0' ... 'YBR242W:1'
Dimensions without coordinates: outcome, 'genotype': <xarray.DataArray 'genotype' (sample: 109, candidate: 2956)>
array([[1., 1., 1., ..., 0., 0., 0.],
[1., 0., 1., ..., 1., 1., 1.],
[0., 0., 0., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 0., 1., 1.],
[0., 0., 0., ..., 1., 1., 1.],
[1., 1., 1., ..., 1., 1., 1.]])
Coordinates:
* sample (sample) int64 0 1 2 3 4 5 6 7 ... 101 102 103 104 105 106 107 108
chrom (candidate) int64 1 1 1 1 1 1 1 1 1 ... 16 16 16 16 16 16 16 16 16
pos (candidate) int64 483 484 3220 3223 ... 927506 932310 932535 932538
pos_cum (candidate) int64 483 484 3220 3223 ... 12055570 12055795 12055798
Dimensions without coordinates: candidate}
>>> limix.sh.remove(filepath)
NumPy reader¶
>>> url = "http://rest.s3for.me/limix/example.npy"
>>> filepath = limix.sh.download(url, verbose=False)
>>> K = limix.io.npy.read(filepath, verbose=False)
>>> print(K)
[[0.67003303 0.09512837 0.09346511 0.09252165 0.09679249]
[0.09512837 0.66972454 0.09344451 0.09109398 0.09347495]
[0.09346511 0.09344451 0.67305621 0.08987969 0.09689215]
[0.09252165 0.09109398 0.08987969 0.67209248 0.09378162]
[0.09679249 0.09347495 0.09689215 0.09378162 0.66773896]]
>>> limix.sh.remove(filepath)
PLINK reader¶
>>> from os.path import join
>>> from pandas_plink import get_data_folder
>>>
>>> (bim, fam, bed) = limix.io.plink.read(join(get_data_folder(), "data"),
... verbose=False)
>>> print(bim.head())
chrom snp cm pos a0 a1 i
candidate
rs10399749 1 rs10399749 0.00000 45162 G C 0
rs2949420 1 rs2949420 0.00000 45257 C T 1
rs2949421 1 rs2949421 0.00000 45413 0 0 2
rs2691310 1 rs2691310 0.00000 46844 A T 3
rs4030303 1 rs4030303 0.00000 72434 0 G 4
>>> print(fam.head())
fid iid father mother gender trait i
sample
Sample_1 Sample_1 Sample_1 0 0 1 -9.00000 0
Sample_2 Sample_2 Sample_2 0 0 2 -9.00000 1
Sample_3 Sample_3 Sample_3 Sample_1 Sample_2 2 -9.00000 2
>>> print(bed.compute())
[[ 2. 2. 1.]
[ 2. 1. 2.]
[nan nan nan]
[nan nan 1.]
[ 2. 2. 2.]
[ 2. 2. 2.]
[ 2. 1. 0.]
[ 2. 2. 2.]
[ 1. 2. 2.]
[ 2. 1. 2.]]