#V1.2 #since strong overlap of markers between different clusters for each histone marks from V1.1 #remove overlapped markers witth each other #add genetic colocalization with different GWAS #V1.1 #find marker DP based on clusters from all samples #compare cluster against others for cases only taking into consideration of age and sex #find marker DP that distinguish patient cluster from the rest args = commandArgs(trailingOnly=TRUE) HM = args[1] if(!grepl("R version 4.0", R.version$version.string)) { stop("use R-4.0") } R.LIB.MYPATH = "~/lhou.compbio/libs/R/x86_64-pc-linux-gnu-library/4.0/" R.LIBS.PATH = .libPaths() .libPaths(c(R.LIB.MYPATH, R.LIBS.PATH)) library(ComplexHeatmap) library(RColorBrewer) library(circlize) colors = brewer.pal(10, "Set3") HMs= c("H3K27ac", "H3K36me3", "H3K4me1", "H3K4me3", "H3K27me3") GWAS.NMS=c("DrinksPerWeek", "SmokingInitiation", "GWASAtlas_ukb2.diabetes", "GWASAtlas_ukb2.highBloodPressure", "PGC.BP") HiCBlock.DPEnrich.Q.CUTOFF=0.1 DP.PVAL.CUTOFF=0.01 DP.MARKER.Q.CUTOFF = 0.05 CLU.RES = "clu.corHclust5" file.in.norm.RData = paste0("../peakMergeNormal/a_2_peakHeightsNormalized_V1.4/", HMs, ".MayoWithRef.heightsMean.depthGCNormed.RData") names(file.in.norm.RData) = HMs files.in.DP.RData = paste0("../peakMergeNormal/c_bulkDiffPeak_limma_V1.4.1_sva_noEHR/", HMs, "/", HMs, ".diffPeak.RData") names(files.in.DP.RData) = HMs file.in.HiC.block.RData="../hiC.DiffPeakandDEGandTF/a_2_cmpBulkDP.limma_addLitium.AcrossHMs_hiCBlock/Bulk.hicBlocks.DPstat.RData" file.in.HiCRegion2Pk.RData="../hiC.DiffPeakandDEGandTF/a_hiCLinks_withPeakAnnot_bulk/hiCLinks.RData" file.in.clu.RData=paste0("./b_factorAnalysis_across5HMs_MOFA_V1.2_pvalCutoff/HiC.DPs/ind.MOFA.DP-pval0.01.varProp0.005.sampClu_V1.1.RData") file.in.meta="~/lhou.compbio/data/Mayo_Bipolar/Batch_Feb.2019/broad_wgs_metadata.csv" file.in.hm.gwas.coloc.RData="../haQTLvsGWAS/a_bulkhQTLvsGWAS_bycoloc_V1.1_allGWAS_summary/bulkhQTLvsGWAS.coloc.RData" file.in.ARE.hg38toHg19.RDS=paste0("../peakMergeNormal/a_2_peakHeightsNormalized_V1.4/", HM, ".pk.hg38tohg19.RDS") dir.ou=paste0("c_markerDPForCluster_V1.2_uniq/HiC.DPs/", CLU.RES, "/") dir.create(dir.ou, showWarnings = F, recursive = T) file.ou.RData=paste0(dir.ou, HM, ".markerDP.", CLU.RES, ".RData") file.ou.pdf=paste0(dir.ou, HM, ".markerDP.", CLU.RES, ".pdf") # load(file.in.HiC.block.RData) #load(file.in.HiCRegion2DP.RData) load(file.in.HiCRegion2Pk.RData) hiCBlock.slct = rownames(hiCBlock.DP.summary.adjp)[apply(hiCBlock.DP.summary.adjp<=HiCBlock.DPEnrich.Q.CUTOFF,1, any)] #select differential peaks in side DP.enriched hicBlocks hiCRegion.slct = unlist(hicBlock2hicRegs[hiCBlock.slct]) # load(file.in.clu.RData) # clu2samp=names(samp2clu) # names(clu2samp)=paste0("clu_", samp2clu) # buf =read.table(file.in.meta, sep=",", row.names = NULL, header = T, stringsAsFactors = F) # samp2group=buf$group # names(samp2group)=paste0("id_", rownames(buf)) samp.case=rownames(samp2grp)[samp2grp$group=="case"] clus.all = levels(factor(samp2grp[samp.case, CLU.RES])) clu2Rest.samp =lapply(clus.all, FUN=function(clu.i) { res=data.frame(clu.grp=rep("control.other", nrow(samp2grp)), stringsAsFactors = F) rownames(res)=rownames(samp2grp) res$clu.grp[samp2grp$group=="case" & samp2grp[[CLU.RES]]==clu.i] = "case.cluster" res$clu.grp[samp2grp$group=="control" & samp2grp[[CLU.RES]]==clu.i] = "control.cluster" res$clu.grp[samp2grp$group=="case" & samp2grp[[CLU.RES]]!=clu.i] = "case.other" res$clu.grp =factor(res$clu.grp, levels=c("case.cluster", "case.other", "control.cluster", "control.other")) return(res) }) names(clu2Rest.samp) = paste0("clu_", clus.all) # clu2Rest.samp = lapply(clu.all, # FUN=function(clu) # { # res=samp2clu # res[samp2clu!=clu]="rest" # res[samp2clu==clu]="clu" # return(res) # }) # names(clu2Rest.samp) = paste0("clu_", clu.all) ##marker print(HM) load(file.in.norm.RData[HM]) load(files.in.DP.RData[HM]) samps.ovlp = intersect(rownames(samp2grp), colnames(samp2HeightsMean$depth.GCnormTeng)) hicReg2pks=lapply(hicReg.df[[HM]], FUN=function(x) { unlist(strsplit(x, split=",")) }) names(hicReg2pks) = rownames(hicReg.df) hic.pks.slct = levels(factor(unlist(hicReg2pks[hiCRegion.slct]))) DPs = rownames(res$diseasedisease)[res$diseasedisease[, "P.Value"] <= DP.PVAL.CUTOFF] hic.DPs.slct =intersect(hic.pks.slct, DPs) #intersect(rownames(res$disease), rownames(samp2HeightsMean$depth.GCnormTeng)) HM.markDP.hg19.res=lapply(clu2Rest.samp, FUN=function(clu2Rest) { cases.ovlp= intersect(samp.case, samps.ovlp) print(length(hic.DPs.slct)) t(sapply(hic.DPs.slct, FUN=function(pk) { df =data.frame(pk=samp2HeightsMean$depth.GCnormTeng[pk, cases.ovlp], sex =design.matrix.mod2[cases.ovlp, "GenderM"], age =design.matrix.mod2[cases.ovlp, "age_atsamp"], clu=clu2Rest[cases.ovlp, "clu.grp"]) mod.summary=summary(lm(pk~clu+sex+age, data=df)) mod.t = mod.summary$coef[2, "t value"] mod.p =mod.summary$coef[2, "Pr(>|t|)"] return(c(t=mod.t, p=mod.p, p.adj=p.adjust(mod.p, method = "BH"))) })) }) HM.markDP.hg19.list=lapply(HM.markDP.hg19.res, FUN=function(dps) { rownames(dps)[dps[,"p.adj"] <= DP.MARKER.Q.CUTOFF] }) HM.markDP.uniq.hg19.list=lapply(names(HM.markDP.hg19.list), FUN=function(clu) { setdiff(HM.markDP.hg19.list[[clu]], unlist(HM.markDP.hg19.list[setdiff(names(HM.markDP.hg19.list), clu)])) }) names(HM.markDP.uniq.hg19.list) = names(HM.markDP.hg19.list) save(clu2Rest.samp, samp2grp, HM.markDP.hg19.res, HM.markDP.uniq.hg19.list, file=file.ou.RData) # # load(file.in.hm.gwas.coloc.RData) pk.hg38tohg19=readRDS(file.in.ARE.hg38toHg19.RDS) col_fun.coloc = colorRamp2(c(0, 0.1, 0.5), c("grey", "white", "red")) pdf(file.ou.pdf, width=12, height=12) layout(matrix(1:20, ncol=4, byrow = T)) for(clu in names(HM.markDP.hg19.list)) { hist(HM.markDP.hg19.res[[clu]][,"p"], main=paste(HM, clu), xlab="pval") } colors.corHclust =colors[1:length(levels(factor(samp2grp[[CLU.RES]])))] names(colors.corHclust) = as.character(1:length(colors.corHclust)) for(clu in names(HM.markDP.uniq.hg19.list)) { samps.ovlp= intersect(rownames(clu2Rest.samp[[clu]]), colnames(samp2HeightsMean$depth.GCnormTeng)) f.o.pk= paste0(dir.ou, HM, ".", clu, ".markDP.uniq.hg19.bed") dp.clu.marker = HM.markDP.uniq.hg19.list[[clu]] if(length(dp.clu.marker)!=0) { dp.clu.marker.GWASColoc=matrix(0, nrow=length(dp.clu.marker), ncol=length(hm2gwas2coloc.hg38.list[[HM]])) rownames(dp.clu.marker.GWASColoc)=dp.clu.marker colnames(dp.clu.marker.GWASColoc)=GWAS.NMS for(gwas in GWAS.NMS) { pk2coloc.hg19=hm2gwas2coloc.hg38.list[[HM]][[gwas]] names(pk2coloc.hg19) = pk.hg38tohg19[names(pk2coloc.hg19)] pks.ovlp=intersect(dp.clu.marker, names(pk2coloc.hg19)) dp.clu.marker.GWASColoc[pks.ovlp, gwas] = pk2coloc.hg19[pks.ovlp] } write.table(cbind(gsub(":|-", "\t", dp.clu.marker), dp.clu.marker), file=f.o.pk, sep="\t", quote=F, col.names = F, row.names = F) pk.norm.signal = t(apply(samp2HeightsMean$depth.GCnormTeng[dp.clu.marker, samps.ovlp], 1, FUN=function(x){(x-mean(x))/sd(x)})) ha.top=HeatmapAnnotation(clu=samp2grp[samps.ovlp, CLU.RES], col=list(clu=colors.corHclust)) ha.left=rowAnnotation(coloc=dp.clu.marker.GWASColoc, col=list(coloc=col_fun.coloc)) ht= Heatmap(pk.norm.signal, column_split= clu2Rest.samp[[clu]][samps.ovlp, "clu.grp"], cluster_column_slices=F, top_annotation = ha.top, left_annotation = ha.left, row_title = paste(HM, clu), show_row_names=F) draw(ht) } } dev.off()