import numpy as np ref_genotypes=np.genfromtxt('ImpG_1KG_AFR.hap') #Reference AFR haplotypes from 1KG typed=np.genfromtxt('ImpG_graffetal.typed',usecols=0) #List of variants typed by Graff et al. zsc=np.genfromtxt('ImpG_graffetal.typed',usecols=1) #z-scores reported by Graff et al. pos=np.genfromtxt('ImpG_graffetal.map',usecols=1,skip_header=3,dtype=int) L=0.1 mask=np.concatenate([[1,1,1],(pos>88.9e6)*(pos<89.9e6)]).astype(bool) ld_thresh=-1 #Optional parameter to mask variants in low-LD with the VNTR during imputation R=np.corrcoef(ref_genotypes[mask])+np.identity( (ref_genotypes[mask]).shape[0])*L ld_mask=((R[2]**2>ld_thresh) + (np.arange(len(R[2]))<3)).astype(bool) R_typed=R[(typed[mask]!=0)*ld_mask][:,(typed[mask]!=0)*ld_mask] R_typed_inv=np.linalg.inv(R_typed) R_imp_typed=R[(typed[mask]==0)*ld_mask][:,(typed[mask]!=0)*ld_mask] z_imp=R_imp_typed.dot(R_typed_inv.dot((zsc[mask][ld_mask]*typed[mask][ld_mask])[typed[mask][ld_mask]!=0])) for n,z in zip(['VNTR_6ALLELE','VNTR_24BINARIZED','VNTR'],z_imp): print("%s imputed z^2=%.3f" %(n,z**2))