import cyvcf2 import tsinfer from IPython.display import SVG, display chrom = str(10) ##assign chromosome number for iste in question pos = str(101163627) ##assign position number of site in # QUESTION: ##function and analysis as supplied by tsinfer documentation for generating trees def add_diploid_sites(vcf, samples): """ Read the sites in the vcf and add them to the samples object, reordering the alleles to put the ancestral allele first, if it is available. """ pos = 0 for variant in vcf: # Loop over variants, each assumed at a unique site if pos == variant.POS: raise ValueError("Duplicate positions for variant at position", pos) else: pos = variant.POS if any([not phased for _, _, phased in variant.genotypes]): raise ValueError("Unphased genotypes for variant at position", pos) alleles = [variant.REF] + variant.ALT ancestral = variant.INFO.get('AA', variant.REF) # Ancestral state must be first in the allele list. ordered_alleles = [ancestral] + list(set(alleles) - {ancestral}) allele_index = {old_index: ordered_alleles.index(allele) for old_index, allele in enumerate(alleles)} # Map original allele indexes to their indexes in the new alleles list. genotypes = [allele_index[old_index] for row in variant.genotypes for old_index in row[0:2]] samples.add_site(pos, genotypes=genotypes, alleles=alleles) def chromosome_length(vcf): assert len(vcf.seqlens) == 1 return vcf.seqlens[0] url="/home/arb38/loh_home/IBD_calculations/site_"+ chrom + "_" + pos + "_carrier_flip.vcf" vcf = cyvcf2.VCF(url) with tsinfer.SampleData(path="/home/arb38/loh_home/IBD_calculations/site_"+ chrom + "_" + pos + "_carrier.vcf", sequence_length=chromosome_length(vcf)) as samples: add_diploid_sites(vcf, samples) ts = tsinfer.infer(samples) ##to determine if all variant carrier haplotypes cluster together node_type={} number_even_odd={} sample_num=0 for node in tree.nodes(order='preorder'): if sample_num < tree.num_samples(node): sample_num=int(tree.num_samples(node)) count_both=0 b="" num_orphans = 0 for node in tree.nodes(order='postorder'): children=tree.children(node) if len(children) == 0: if int(node)%2==0: number_even_odd[node]=(1,0) else: number_even_odd[node]=(0,1) else: even_number=0 odd_number=0 for child in children: even_number += int(number_even_odd[child][0]) odd_number += int(number_even_odd[child][1]) if (even_number == sample_num/2 and odd_number == 0): b+=" found even "+str(node) elif (odd_number == sample_num/2 and even_number == 0): b+=" found odd "+str(node) number_even_odd[node] = (even_number,odd_number) if odd_number > 0: for child in children: if child <= sample_num and int(child)%2==0: num_orphans += 1 if b == "": b+="didn't find single cluster" a = str(chrom) + "_" + str(pos)+"\t" + b + "\tnumber of orphans\t" + str(num_orphans) print(a) ##build trees as svg to visualize clustering tree.draw(path="/home/arb38/loh_home/IBD_calculations/tree_at_site_"+ chrom + "_" + pos + "_carriers.svg", height=700, width=1200)