#V1.1 # only use H3K4me1, H3K27ac, and H3K27me3 #which regulate gene expression #links are between AREs and genes based on genetic evidence # # visualize information from different layer gene by gene by signal tracks #Bulk annotation #GWAS signal #haQTL-ARE from Marks #ARE MR and coloc results #DP signal #use genes with promoter region overlapping with either TssA and TssFlank #consider H3K27ac peaks associated within gene neighborhood #genomic region is in format as "chr2\t1\t223" #integrate signals in a gene centered manner #a) how genomic regions is associated with genes #b) how the haQTL, differential peak signal is related to each peak, which is in turn related to each gene #check haQTL effect size VS GWAS for peaks for each gene #prmotoer regions are defined as up/downstream 2kb exntended from tss #gene neighborhood are defined as hic regions overlap with promoter regions, and Hi-C one step neighbors of these regions, if no overlapping found, promoter itself is defined as gene neighborhood # args = commandArgs(trailingOnly=TRUE) # HM.LINK = args[1] # library(Sushi) source("~/lhou.compbio/codes/mylib/RCode/Util_zqtl_byYongjin.R") # DISEASES = c("CD", "UC", "RA") # DIS2COLOR = c("brown", "purple", "orange") # names(DIS2COLOR) = DISEASES PRMOTER.WINDOW = 2000 PK.WIND = 10000 GWAS.PVAL.CUTOFF = 1e-6 GWAS.Z.CUTOFF = abs(qnorm(GWAS.PVAL.CUTOFF/2)) DP.Q.CUTOFF=0.05 COLOC.ABF.H4.CUTOFF=0.5 MR.P.CUTOFF=0.01 hQTL.P.CUTOFF = "emp.p.fdr.cutoff0.2" gLinks.GSP.cutoffs = c("coloc.H4.ABF" =0.5, "coloc.H4vsH3.ratio" =5, "MR.qval" =-0.05, "FMeQTL2ARE.DistanceInv" = 1/2000) HMs= c("H3K27ac","H3K4me1", "H3K27me3") #"H3K36me3" "H3K4me3", #HAQTL.COLs = c(rgb(1, 0, 0, 0.5), rgb(1, 1, 0, 0.5), rgb(0, 1, 0, 0.5), rgb(0, 1, 1, 0.5), rgb(0, 0, 1, 0.5), rgb(1, 0, 1, 0.5)) files.in.gLink.RData = paste0("b_summary_Links.coloc.MR.FMeQTL_V1.3_gARE/Blood_", HMs, ".linkFromColoc.MR.RData") names(files.in.gLink.RData)=HMs #file.in.g2pk.RData = paste0("./a_gAREsNeareGene_emp.p.fdr.cutoff0.2/Blood_1000k/", HM.LINK, ".eGene2gARE.hg38.RData") file.in.eGene.info = paste0("/broad/compbio/data/GTEx/v8/eqtl/GTEx_Analysis_v8_eQTL_significant/Whole_Blood.v8.egenes.txt.gz") # files.in.TssA = c(E047= "~/lhou.compbio/data/Roadmap/15CoreChromStates127epigen_annot/gencodeV26.Promoter_ovlpWithE047TssAFlank.bed", # E048="~/lhou.compbio/data/Roadmap/15CoreChromStates127epigen_annot/gencodeV26.Promoter_ovlpWithE048TssAFlank.bed", # E039 = "~/lhou.compbio/data/Roadmap/15CoreChromStates127epigen_annot/gencodeV26.Promoter_ovlpWithE039TssAFlank.bed", # E040 = "~/lhou.compbio/data/Roadmap/15CoreChromStates127epigen_annot/gencodeV26.Promoter_ovlpWithE040TssAFlank.bed", # E029 = "~/lhou.compbio/data/Roadmap/15CoreChromStates127epigen_annot/gencodeV26.Promoter_ovlpWithE029TssAFlank.bed", # BPNeutrop = "~/lhou.compbio/data/Roadmap/15CoreChromStates127epigen_annot/gencodeV26.Promoter_ovlpWithE030TssAFlank.bed",# # E032 = "~/lhou.compbio/data/Roadmap/15CoreChromStates127epigen_annot/gencodeV26.Promoter_ovlpWithE032TssAFlank.bed" # ) #file.in.TSS = "~/lhou.compbio/data/gene/human/gencode.v26lift37.basic.TSSUp0Down1bp.geneName.ENSG.bed" files.in.pk.RData= paste0("../peakMergeNormal/a_2_peakHeightsNormalized_V1.4/", HMs, ".MayoWithRef.heightsMean.depthGCNormed.RData") names(files.in.pk.RData)= HMs files.in.diffPeak.RData = paste0("../peakMergeNormal/c_bulkDiffPeak_limma_V1.4.1_sva_noEHR/", HMs, "/", HMs, ".diffPeak.RData") names(files.in.diffPeak.RData) = HMs files.in.haQTLPeak.hg38.RData = paste("../haQTL/a_5_haQTL_multTest_FixedFactorNum_V1.2.3_DP.BG_100k/", HMs, ".haQTLPeak.cutoffs.hg38.RData", sep="") names(files.in.haQTLPeak.hg38.RData)= HMs files.in.haQTLPeak.coloc.hg38.RData=paste0("../haQTLvsGWAS/a_bulkhQTLvsGWAS_bycoloc_V1.1_allGWAS/PGC.BP/", HMs, "_100k/", HMs, ".bulkhQTLvsGWAS.RData") names(files.in.haQTLPeak.coloc.hg38.RData) = HMs files.in.haQTLPeak.MR.hg38.RData=paste0("../haQTLvsGWAS/a_bulkhaQTLvsGWAS_byMR_Egger_V1.1_allGWAS/PGC.BP/", HMs, "_100k/", HMs, ".bulkHaQTLvsGWAS.RData") names(files.in.haQTLPeak.MR.hg38.RData) = HMs file.in.BP.gz = "~/lhou.compbio/data/GWAS/pgc_bipolar/pgc_bip_hg19.sorted.bed.gz" file.in.geneStructure.RData = "~/lhou.compbio/data/gene/human/gencode.v26lift37.basic.annotation.forSushi_exonAndUTR.RData" dir.tmp = "~/hptmp/MayoBipolar/c_geneNeighb_geneticLink/" dir.create(dir.tmp, recursive = T, showWarnings = F) #file.tmp.cellTypePeak = paste(dir.tmp, REF, ".hg19.nodup.filt.narrowPeak.merged.bed", sep="") #file.tmp.TssA.bed = paste(dir.tmp, REF, ".TSSA.bed", sep="") #file.tmp.hicRegs.bed = paste(dir.tmp, "hicRegions.bed", sep="") file.tmp.gRegion.bed= paste(dir.tmp, "gRegion.bed", sep="") file.tmp.gFilt.bed= paste(dir.tmp, "gFilt.bed", sep="") file.tmp.genesStructure.bed = paste(dir.tmp, "genesStructure.bed", sep="") file.tmp.gStructure.bed = paste(dir.tmp, "gStructure.bed", sep="") file.tmp.pk.bed = paste(dir.tmp, "pk.bed", sep="") file.tmp.BP.hits.bed = paste0(dir.tmp, "BP.hits.hg19.bed") file.tmp.pk.ovlp.BP.hits.bed= paste0(dir.tmp, "pkOVLP.BP.hits.hg19.bed") dir.ou="c_geneNeighb_visGWASANDBulkAnnot_byGene_GeneticLink_V1.1_3HM/" dir.create(dir.ou, showWarnings = F, recursive = T) #file.out.pref = paste(dir.ou, "geneNB", sep="") file.ou.gNeighbor.link.RData = paste(dir.ou, "3HM.gLinks.RData", sep="") file.ou.gNeighbor.annot.RData = paste(dir.ou, "3HM.geneNB.peaks.annot_gLinks.RData", sep="") #file.ou.pdf = paste(dir.ou, "geneNB.GWASp", GWAS.PVAL.CUTOFF, ".haQTLDiffPks.", HM.LINK, "_geneticlink.pdf", sep="") #file.ou.haQTLDiffPks2GWAS.RData = paste(file.out.pref, ".haQTLDiffPks2GWAS.RData", sep="") #file.ou.geneNeighb = paste(file.out.pref, ".txt", sep="") ################################################################################ load(file.in.geneStructure.RData) write.table(transcripts.bed, file=file.tmp.genesStructure.bed, sep="\t", col.names = F, row.names = F, quote=F) ############################################################################### # gene2hicReg = list() #gene 2 neighbor regions based on hiC and promoters # # hicReg2Annot = matrix() peaks2haQTL = list() #peak 2 haQTL, snpid and zscore peaks2diffZ = list() #peaks 2 differential Zscore for each disease g.neighb2peaks = list() #neighbor mapped 2 peaks gene2peaks = list() #gene 2 peaks associated with its neighbors ####################################################### HM.hg38tohg19= lapply(HMs, FUN=function(HM) { load(files.in.pk.RData[HM]) return(pks.hg382hg19.uniq) }) names(HM.hg38tohg19)=HMs #annotate################################################### HM.DP.score.hg19=lapply(HMs, FUN=function(HM) { load(files.in.diffPeak.RData[HM]) dp.signals=cbind(log2FoldChange= res$diseasedisease$logFC, pvalue=res$diseasedisease$P.Value, padj = res$diseasedisease$adj.P.Val) rownames(dp.signals) = rownames(res$diseasedisease) return(dp.signals) }) names(HM.DP.score.hg19)=HMs HM.PK.slct.hg19 = lapply(HMs, FUN=function(HM) { rownames(HM.DP.score.hg19[[HM]]) }) names(HM.PK.slct.hg19)=HMs HM.DP.hg19= lapply(HM.DP.score.hg19, FUN=function(dp.signals) { rownames(dp.signals)[dp.signals[,"padj"]<=DP.Q.CUTOFF] }) names(HM.DP.hg19) = names(HM.DP.score.hg19) HM.hQTLPK.hg19 = lapply(HMs, FUN=function(HM) { load(files.in.haQTLPeak.hg38.RData[HM]) pks.hg38=hQTLPeaks.list[[hQTL.P.CUTOFF]] #buf=read.table(f.i.bed, sep="\t", row.names = NULL, header = F, stringsAsFactors = F) #= paste0(buf[,1], ":", buf[,2], "-", buf[,3]) pks.hg19 = HM.hg38tohg19[[HM]][pks.hg38] pks.hg19.slct = intersect(HM.PK.slct.hg19[[HM]], pks.hg19) return(pks.hg19.slct) }) names(HM.hQTLPK.hg19) = HMs HM.hQTLPK.coloc.hg19 = lapply(HMs, FUN=function(HM) { f.i.RData=files.in.haQTLPeak.coloc.hg38.RData[HM] load(f.i.RData) pks.hg38=names(hQTLPeaks.QTLvsGWAS)[sapply(hQTLPeaks.QTLvsGWAS, FUN=function(x){if(is.na(x)) return(NA);x$coloc$summary["PP.H4.abf"]>=COLOC.ABF.H4.CUTOFF})] pks.hg38=pks.hg38[!is.na(pks.hg38)] pks.hg19 = HM.hg38tohg19[[HM]][pks.hg38] pks.hg19.slct = intersect(HM.PK.slct.hg19[[HM]], pks.hg19) return(pks.hg19.slct) }) names(HM.hQTLPK.coloc.hg19) = HMs HM.hQTLPK.MR.hg19 = lapply(HMs, FUN=function(HM) { f.i.RData=files.in.haQTLPeak.MR.hg38.RData[HM] load(f.i.RData) pks.hg38=names(hQTLPeaks.QTLvsGWAS)[sapply(hQTLPeaks.QTLvsGWAS, FUN=function(x){if(is.na(x)) return(NA); x$MR@Causal.pval<=MR.P.CUTOFF})] pks.hg38=pks.hg38[!is.na(pks.hg38)] pks.hg19 = HM.hg38tohg19[[HM]][pks.hg38] pks.hg19.slct = intersect(HM.PK.slct.hg19[[HM]], pks.hg19) return(pks.hg19.slct) }) names(HM.hQTLPK.MR.hg19)=HMs # cmd = paste0("zcat ", file.in.BP.gz, "|awk '($10<", GWAS.PVAL.CUTOFF, "){print }'>", file.tmp.BP.hits.bed) system(cmd) HM.PK.GWAS.hg19 = lapply(HMs, FUN=function(HM) { print(HM) write(paste(gsub(":|-", "\t", HM.PK.slct.hg19[[HM]]), HM.PK.slct.hg19[[HM]], sep="\t"), file.tmp.pk.bed) cmd = paste0("bedtools window -w ", PK.WIND, " -a ", file.tmp.pk.bed, " -b ", file.tmp.BP.hits.bed, ">", file.tmp.pk.ovlp.BP.hits.bed) system(cmd) buf = read.table(file.tmp.pk.ovlp.BP.hits.bed, "\t", header=F, row.names = NULL) peaks.ovlpGWAS = levels(factor(buf[,4])) }) names(HM.PK.GWAS.hg19) = HMs HM.PK.hg19.annot = lapply(HMs, FUN=function(HM) { pk.annot.matrix = matrix(0, ncol=5, nrow=length(HM.PK.slct.hg19[[HM]])) colnames(pk.annot.matrix)=c("DP", "hQTLPk", "coloc", "MR", "GWAS") rownames(pk.annot.matrix) = HM.PK.slct.hg19[[HM]] pk.annot.matrix[HM.DP.hg19[[HM]], "DP"]=1 pk.annot.matrix[HM.hQTLPK.hg19[[HM]], "hQTLPk"]=1 pk.annot.matrix[HM.hQTLPK.coloc.hg19[[HM]], "coloc"]=1 pk.annot.matrix[HM.hQTLPK.MR.hg19[[HM]], "MR"]=1 pk.annot.matrix[HM.PK.GWAS.hg19[[HM]], "GWAS"]=1 pk.annot.matrix.filt=pk.annot.matrix[apply(pk.annot.matrix, 1, FUN=function(x) any(x==1)),] return(pk.annot.matrix.filt) }) names(HM.PK.hg19.annot)=HMs save(HM.PK.hg19.annot, #hic.reg2HM.PK.annot.matrix, file=file.ou.gNeighbor.annot.RData) # save(HM.hg38tohg19, # HM.DP.hg19, # HM.DP.score.hg19, # HM.hQTLPK.hg19, # HM.hQTLPK.coloc.hg19, # HM.hQTLPK.MR.hg19, # HM.PK.hg19.annot, # file=file.ou.gNeighbor.RData # ) ################################################################################ # eGene.info = read.table(gzfile(file.in.eGene.info), header = T, sep="\t", row.names = 1) gESGid2gName = eGene.info$gene_name names(gESGid2gName) = rownames(eGene.info) # print("generating gene neighborhood") ################################################################ hm.gLinks.hg19=lapply(HMs, FUN=function(hm) { load(files.in.gLink.RData[hm]) gAndPkAndlinkScores = cbind(data.frame(t(sapply(rownames(scores.all), FUN=function(x){unlist(strsplit(x, split=";"))}))), scores.all) gAndPkAndlinkScores.filt= gAndPkAndlinkScores[gAndPkAndlinkScores$coloc.H4.ABF >=gLinks.GSP.cutoffs["coloc.H4.ABF"] | gAndPkAndlinkScores$coloc.H4vsH3.ratio >=gLinks.GSP.cutoffs["coloc.H4vsH3.ratio"] | gAndPkAndlinkScores$MR.qval >=gLinks.GSP.cutoffs["MR.qval"] | gAndPkAndlinkScores$FMeQTL2ARE.DistanceInv >=gLinks.GSP.cutoffs["FMeQTL2ARE.DistanceInv"],] print(nrow(gAndPkAndlinkScores.filt)) gene2Neighb.scores.filt.hg19 = lapply(split(gAndPkAndlinkScores.filt[,2], gAndPkAndlinkScores.filt[,1]), FUN=function(pks.hg38) { HM.hg38tohg19[[hm]][pks.hg38] }) }) names(hm.gLinks.hg19) = HMs for(hm in names(hm.gLinks.hg19)) { geneAndCRE.df = do.call(rbind, lapply(names(hm.gLinks.hg19[[hm]]), FUN=function(g) { data.frame(gid=g, gene=gESGid2gName[g], CRE=hm.gLinks.hg19[[hm]][[g]], HM=hm, stringsAsFactors=F) })) file.ou.geneCRELink.tsv = paste(dir.ou, hm, ".geneCRELink.gLinks.hg19.tsv", sep="") write.table(geneAndCRE.df, file=file.ou.geneCRELink.tsv, sep="\t", quote=F, row.names=F) } save(hm.gLinks.hg19, file=file.ou.gNeighbor.link.RData) ####################################################### # #gene neighbor filter # eGene.info = read.table(gzfile(file.in.eGene.info), header = T, sep="\t", row.names = 1) # gESGid2gName = eGene.info$gene_name # names(gESGid2gName) = rownames(eGene.info) buf= split(transcripts.bed, transcripts.bed$gene) gName2range.hg19=lapply(buf, FUN=function(x) { if(any(x$chrom!=x$chrom[1])) { return(NA) } return(data.frame(chr=x$chrom[1], start=min(x$start), end=max(x$stop), strand = x$strand[1])) }) gName2range.hg19=do.call(rbind, gName2range.hg19[!is.na(gName2range.hg19)]) hm.gNB2annotSum.list = lapply(names(hm.gLinks.hg19), FUN=function(hm) { t(sapply(hm.gLinks.hg19[[hm]], FUN=function(pks) { pks.ovlp = intersect(pks, rownames(HM.PK.hg19.annot[[hm]])) if(length(pks.ovlp)==0) { return(rep(0, ncol(HM.PK.hg19.annot[[hm]]))) } apply(rbind(HM.PK.hg19.annot[[hm]][pks.ovlp, ]), 2 ,sum) })) }) names(hm.gNB2annotSum.list) = names(hm.gLinks.hg19) genes.all= levels(factor(unlist(lapply(hm.gNB2annotSum.list, rownames)))) gNB2annotSum=t(sapply(genes.all, FUN=function(g) { g.hm.annotSum=lapply(names(hm.gLinks.hg19), FUN=function(hm) { if(g %in% rownames(hm.gNB2annotSum.list[[hm]])) { return(hm.gNB2annotSum.list[[hm]][g,]) }else { return(rep(0, ncol(hm.gNB2annotSum.list[[hm]]))) } }) names(g.hm.annotSum) = names(hm.gLinks.hg19) unlist(g.hm.annotSum) })) gNB2annotSumHM = cbind(apply(gNB2annotSum[, grepl(".DP", colnames(gNB2annotSum))], 1, sum), apply(gNB2annotSum[, grepl(".hQTLPk", colnames(gNB2annotSum))], 1, sum), apply(gNB2annotSum[, grepl(".coloc", colnames(gNB2annotSum))], 1, sum), apply(gNB2annotSum[, grepl(".MR", colnames(gNB2annotSum))], 1, sum), apply(gNB2annotSum[, grepl(".GWAS", colnames(gNB2annotSum))], 1, sum) ) colnames(gNB2annotSumHM)=c("DP", "hQTLPk", "coloc", "MR", "GWAS") genes.filt = rownames(gNB2annotSumHM)[gNB2annotSumHM[,"DP"]>=1 & #gNB2annotSumHM[,"hQTLPk"]>=1 & gNB2annotSumHM[,"GWAS"]>=1 & (gNB2annotSumHM[,"coloc"]>=1 &gNB2annotSumHM[,"MR"]>=1 )] #enrichment test #test enrichment gs.bg = rownames(gNB2annotSumHM) genes.DiffReg.NO=sum(gNB2annotSumHM[gs.bg,"DP"]>=1) genes.Genetics.NO= sum(gNB2annotSumHM[gs.bg,"hQTLPk"]>=1 & gNB2annotSumHM[gs.bg,"GWAS"]>=1 & (gNB2annotSumHM[gs.bg,"coloc"]>=1 & gNB2annotSumHM[gs.bg,"MR"]>=1 )) genes.ovlp.NO=length(genes.filt) fish.p=fisher.test(matrix(c(genes.ovlp.NO, genes.DiffReg.NO-genes.ovlp.NO, genes.Genetics.NO-genes.ovlp.NO, nrow(gNB2annotSumHM)-genes.DiffReg.NO-genes.Genetics.NO+genes.ovlp.NO), ncol=2))$p.val print(fish.p) #genes.filt = rownames(gNB2annotSumHM)[apply(gNB2annotSumHM>=1, 1, sum)>=2] #genes.filt = rownames(gNB2annotSum)[apply(gNB2annotSum>=1, 1, sum)>2] gNB2range.hg19=sapply(genes.all, FUN=function(g) { g.range = gName2range.hg19[gESGid2gName[g],] pks = unlist(lapply(hm.gLinks.hg19, FUN=function(gLinks) { if(g %in% names(gLinks)) return(gLinks[[g]]) return(NULL) })) buf= sapply(pks, FUN=function(x) unlist(strsplit(x, split=":|-"))) buf.chr= buf[1,] buf.start= min(c(as.numeric(buf[2,]), g.range$start)) buf.end= max(c(as.numeric(buf[3,]), g.range$end)) if(all(buf.chr==buf.chr[1])) { return(paste0(buf.chr[1], ":", buf.start, "-", buf.end)) }else { return(NA) } }) genes.filt = genes.filt[!is.na(gNB2range.hg19[genes.filt])] write.table(cbind(gsub(":|-", "\t", gNB2range.hg19[genes.filt]), genes.filt), file=file.tmp.gFilt.bed, sep="\t", row.names = F, col.names = F, quote=F) genes.filt.sorted = sapply(system(paste0("sortBed -i ", file.tmp.gFilt.bed), intern = T), FUN=function(x) { unlist(strsplit(x, split="\t"))[4] }) # gNB2GWAS.topPval =sapply(genes.filt, # FUN=function(g) # { # print(g) # range=gNB2range[[g]] # if(is.na(range)) # { # return(NA) # } # gwas.tab = seqminer::tabix.read.table(file.in.BP.gz, range) # min(gwas.tab$V10) # }) # names(gNB2GWAS.topPval) =genes.filt # genes.filt = rownames(gNB2annotSumHM)[apply(gNB2annotSumHM>=1, 1, sum)>=2 & gNB2GWAS.topPval<=GWAS.PVAL.CUTOFF] # genes.filt = genes.filt[!is.na(genes.filt)] save(HM.PK.hg19.annot, gESGid2gName, gName2range.hg19, gNB2annotSum, gNB2annotSumHM, gNB2range.hg19, #gNB2GWAS.topPval, genes.filt.sorted, file=file.ou.gNeighbor.annot.RData) print("tracks for each gene neighborhood")########################################## load(file.ou.gNeighbor.link.RData) # load(file.ou.gNeighbor.annot.RData) # #FUNCTION############################################################################## # getChrFromRegions = function(regions) #vector of regions with format "chr\tstart\tend" # { # chrs = sapply(regions, FUN=function(x){unlist(strsplit(x, split="\t"))[1]}) # # return(levels(factor(chrs))) # } # # # getHiCLinks = function(regions, hic.reg2neighb)#vector of regions with format "chr\tstart\tend" # # { # # nb.links = c() # # for(nb in intersect(regions, names(hic.reg2neighb))) # # { # # nb.partners = hic.reg2neighb[[nb]] # # for(nb.p in intersect(nb.partners, nbs)) # # { # # nb.links=c(nb.links, paste(nb, nb.p, NA, 1, ".", ".", 1, sep="\t")) # # if(nb > nb.p) # # { # # nb.links=c(nb.links, paste(nb.p, nb, NA, 1, ".", ".", 1, sep="\t")) # # } # # } # # } # # nb.links = levels(factor(nb.links)) # # return(nb.links) # # } # # getGenetLinksBEDPE.hg19=function(g, pks) # { # #g is ensemb gid # pk.loc = rbind(t(sapply(pks, FUN=function(x)unlist(strsplit(x, split=":|-", perl=T))))) # # gID.range= gName2range[gESGid2gName[g],] # link.type = gsub("NA", "", paste(c("", "coloc")[(gene2Neighb.scores.filt.hg19[[g]][pks,"coloc.H4.ABF"]>=COLOC.ABF.H4.CUTOFF)+1], # c("", "MR")[(gene2Neighb.scores.filt.hg19[[g]][pks,"MR.log10pval"]>= -log10(MR.P.CUTOFF)) +1], # sep=";")) # if(gID.range$strand=="+") # { # start1=gID.range$start-1 # end1 = gID.range$start # }else # { # start1=gID.range$end # end1 = gID.range$end+1 # } # nb.links.bedpe = data.frame(chrom1 = gID.range$chr, # start1 = start1, # end1 = end1, # chrom2=pk.loc[,1], # start2=as.numeric(pk.loc[,2]), # end2=as.numeric(pk.loc[,3]), # name= link.type, # score = 1, # strand1 = ".", # strand2 =".", # samplenumber=1 # ) # return(nb.links.bedpe) # } # # getNBGenesBED = function(gName, g.chr, chr.start, chr.end) # { # # write(paste(g.chr, "\t", chr.start, "\t", chr.end, sep=""), file.tmp.gRegion.bed) # cmd =paste( "intersectBed -a ", file.tmp.genesStructure.bed, " -b ", file.tmp.gRegion.bed," -u>", file.tmp.gStructure.bed, sep="") # system(cmd) # buf = read.table(file.tmp.gStructure.bed, head=F, sep="\t", row.names = NULL, stringsAsFactors = F) # colnames(buf) = c("chrom", "start", "stop", "gene", "gene.type", "transcript", "score", "strand", "type") # buf = buf[buf$gene.type=="protein_coding" | buf$gene==gName ,]#only keep protein coding genes and the gene we are focusing on now # buf$strand[as.character(buf$strand)=="+"] = 1 # buf$strand[as.character(buf$strand)=="-"] = -1 # buf$strand = as.numeric(buf$strand) # # NB.genes.bed = list() # NB.genes.bed$g = buf[buf$gene==gName, ] # NB.genes.bed$others = buf[buf$gene!=gName, ] # # return(NB.genes.bed) # } # # # gethaQTLFiltBedgraph = function(peaks2haQTL, g.pks, g.chr) #sorted # # { # # haQTL.posAndT.bed = data.frame() # # for(i in 1:length(g.pks)) # # { # # for(posID in names(peaks2haQTL[[g.pks[i]]])) # # { # # t= abs(peaks2haQTL[[g.pks[i]]][posID]) # # pos = as.numeric(unlist(strsplit(posID, split="_"))[2]) # # chr = unlist(strsplit(posID, split="_"))[1] # # haQTL.posAndT.bed = rbind(haQTL.posAndT.bed, data.frame(chrom = chr, start = pos-1, end = pos, value = t,stringsAsFactors = F)) # # } # # # # } # # haQTL.posAndT.bed = haQTL.posAndT.bed[ haQTL.posAndT.bed$chrom==g.chr, ] # # haQTL.posAndT.bed = haQTL.posAndT.bed[order(haQTL.posAndT.bed$start, decreasing = F),] # # return(haQTL.posAndT.bed) # # } # # # getGWASFiltBedgraph = function(g.haQTL.gwas, dis, g.chr) # { # haQTL.posAndGWAS.bed = data.frame(stringsAsFactors = F) # for(posID in names(g.haQTL.gwas[[dis]])) # { # # z= abs(g.haQTL.gwas[[dis]][posID]) # pos = as.numeric(unlist(strsplit(posID, split="_"))[2]) # chr = unlist(strsplit(posID, split="_"))[1] # haQTL.posAndGWAS.bed = rbind(haQTL.posAndGWAS.bed, data.frame(chrom = chr, start = pos-1, end = pos, value = z,stringsAsFactors = F)) # # } # haQTL.posAndGWAS.bed = haQTL.posAndGWAS.bed[haQTL.posAndGWAS.bed$chrom==g.chr,] # if(nrow(haQTL.posAndGWAS.bed)>=1) # { # haQTL.posAndGWAS.bed = haQTL.posAndGWAS.bed[order(haQTL.posAndGWAS.bed$start, decreasing = F),] # } # return(haQTL.posAndGWAS.bed) # } # # # # # # pdf(file.ou.pdf, width =25, height=12) # layout(matrix(1:5, ncol=1)) # par(mar=c(3,15,2,3)) # for(g in genes.filt.sorted) # { # print(g) # g.range = gNB2range.hg19[[g]] # if(is.na(g.range)) next # # g.chr = getChrFromRegions(gsub(":|-", "\t", g.range)) # if(length(g.chr)>1) # { # print(paste(g, "mapping to multiple chroms")) # next # } # # #filt by gwas # gwas.tab = seqminer::tabix.read.table(file.in.BP.gz, g.range) # if(length(gwas.tab)==0) next # gwas.bedgraph = data.frame(chr= gwas.tab$V1, # start = gwas.tab$V2, # end = gwas.tab$V3, # value = -log10(gwas.tab$V10) # ) # if(all(gwas.bedgraph$value <= -log10(GWAS.PVAL.CUTOFF))) next # # # #range # pks = rownames(gene2Neighb.scores.filt.hg19[[g]]) # # nb.links.bedpe = getGenetLinksBEDPE.hg19(g, pks) # if(nrow(nb.links.bedpe)<1) # { # print(paste(g, "no links around on the same chrom")) # next # } # print(g) # # chr.start = max(1,min(c(nb.links.bedpe$start1, nb.links.bedpe$start2))-5000) # chr.end = max(c(nb.links.bedpe$end1, nb.links.bedpe$end2))+5000 # # # # #axis(side=2,las=2,tcl=.2) # #mtext("",side=2,line=1.75,cex=.75,font=2) # # #neighbor regions # # nbs.bed = data.frame(t(sapply(nbs, FUN=function(x){unlist(strsplit(x, split="\t"))})), score = 1 , strand = ".",stringsAsFactors = F) # # colnames(nbs.bed)[1:3] = c("chrom", "chromstart", "chromend") # # nbs.bed$chromstart = as.numeric(nbs.bed$chromstart) # # nbs.bed$chromend = as.numeric(nbs.bed$chromend) # # plotBed(beddata=nbs.bed, chrom = g.chr, chromstart=chr.start, chromend=chr.end, row = "auto", wiggle=0.001) # # labelgenome(chrom=g.chr, chromstart= chr.start, chromend = chr.end, n=5, scale="Mb") # # # #gene # NB.genes.bed = getNBGenesBED(gESGid2gName[g], g.chr, chr.start, chr.end) # # # if(nrow(NB.genes.bed$others)==0) # { # plot.new() # }else # { # pg = plotGenes(NB.genes.bed$others[,c("chrom", "start", "stop", "gene", "score", "strand")], # chrom = g.chr, chromstart = chr.start,chromend=chr.end, # #colorby = c(0,1)[(NB.transcripts.bed$gene==g) +1], # #colorbycol=SushiColors(2), # col="blue", # types="exon",#NB.transcripts.bed$type, # maxrows=8, bheight=0.2, # plotgenetype="box",bentline=FALSE, wigglefactor = 0.1, # labeloffset=.4, fontsize=0.6,# arrowlength = 0.025, # labeltext=TRUE) # } # # pg = plotGenes(NB.genes.bed$g[,c("chrom", "start", "stop", "gene", "score", "strand")], # chrom = g.chr, chromstart = chr.start,chromend=chr.end, # #colorby = c(0,1)[(NB.transcripts.bed$gene==g) +1], # #colorbycol=SushiColors(2), # col="red", # types="exon",#NB.transcripts.bed$type, # maxrows=1, bheight=0.2, # plotgenetype="box",bentline=FALSE, wigglefactor = 0.1, # labeloffset=.4, fontsize=1.2,# arrowlength = 0.025, # labeltext=TRUE) # labelgenome(chrom=g.chr, chromstart= chr.start, chromend = chr.end, n=5, scale="Mb") # # # #axis(side=2,las=2,tcl=.2) # #mtext("",side=2,line=1.75,cex=.75,font=2) # # #hiC # pbpe = plotBedpe(nb.links.bedpe, chrom = g.chr, # chromstart = chr.start, # chromend = chr.end, # heights = nb.links.bedpe$score, # plottype="loops", # main=g) # labelgenome(chrom=g.chr, chromstart= chr.start, chromend = chr.end, n=5, scale="Mb") # # #annotations bed # annot.bed.df= data.frame() # pks.ovlp = intersect(pks, rownames(HM.PK.hg19.annot[[HM.LINK]])) # pks.annot = rbind(HM.PK.hg19.annot[[HM.LINK]][pks.ovlp,]) # for(annot.nm in colnames(HM.PK.hg19.annot[[HM.LINK]])) # { # pks.filt = pks.ovlp[pks.annot[pks.ovlp, annot.nm]!=0] # if(length(pks.filt)==0) next # # buf= cbind(sapply(pks.filt, FUN=function(x) unlist(strsplit(x, split=":|-", perl=T)))) # buf.chr = buf[1,] # buf.start = as.numeric(buf[2,]) # buf.end = as.numeric(buf[3,]) # # df =data.frame(chrom=buf.chr, # start = buf.start, # end = buf.end, # name=annot.nm, # score=0, # strand = ".", stringsAsFactors = F) # annot.bed.df = rbind(annot.bed.df, df) # } # annot.bed.df$row = as.numeric(factor(annot.bed.df$name)) # annot.bed.df$color = maptocolors(annot.bed.df$row, col=SushiColors(6)) # annot.bed.df.sorted = annot.bed.df[order(annot.bed.df$row),] # plotBed(beddata = annot.bed.df.sorted, # chrom = g.chr, # chromstart = chr.start, # chromend =chr.end, # rownumber = annot.bed.df.sorted$row, # type = "region", # color=annot.bed.df.sorted$color, # row="given", # plotbg="white", # rowlabels=unique(annot.bed.df.sorted$name), # rowlabelcol=unique(annot.bed.df.sorted$color), # rowlabelcex=0.75) # labelgenome(chrom=g.chr, chromstart= chr.start, chromend = chr.end, n=5, scale="Mb") # mtext(paste0(HM.LINK, " bulk annotate"), side=3, adj=-0.065,line=0.5,font=2) # # #GWAS # # plotBedgraph(gwas.bedgraph, # chrom = g.chr, # chromstart = chr.start, # chromend = chr.end, # colorbycol= SushiColors(5)) # labelgenome(chrom=g.chr, chromstart= chr.start, chromend = chr.end, n=5, scale="Mb") # mtext("BP GWAS", side=3, adj=-0.065,line=0.5,font=2) # axis(side=2,las=2, tcl=.2) # # # # # # # # } # dev.off() #