#visualize the gseapy results if(!grepl("R version 4.0", R.version$version.string)) { stop("use R-4.0") } R.LIB.MYPATH = "~/lhou.compbio/libs/R/x86_64-pc-linux-gnu-library/4.0/" R.LIBS.PATH = .libPaths() .libPaths(c(R.LIB.MYPATH, R.LIBS.PATH)) library(ComplexHeatmap) library(circlize) library(data.table) #library(pheatmap) library(RNOmni) #pids.TFs=c(RORA="BRD-K20401833", FLI1="BRD-A62182663") TFs.inferred=c("FLI1", "IRF8", "JDP2", "SPI1", "RFX4", "E2F4", "CTCF", "ATF1", "MBD2", "MECP2", "NRF1", "FEV", "EGR3", "CEBPB", "RFX2", "RORA", "ZBT14", "MYBL1", "NFIA") dir.in="b_comp2CompoundSig_cMap_V1.1/" file.in.slct.exp=paste0(dir.in, "b3_slctExp/exp.slct.p0.001.tsv") file.in.gmt=paste0(dir.in, "EpiDEGs.gmt") file.in.potentialTargets=paste0(dir.in, "GSEA_EpiDEGs.vs.cMap_level5_beta_trt_cp_blood.potentialDrugTargets.txt") file.in.drivers="../haQTLvseQTL/c_2_targetGenes_comb_V1.2_bulkAndcellsortedEQTL/215drivergenes.cellsorted.coloc.txt" file.in.NES.RData=paste0(dir.in, "GSEA_EpiDEGs.vs.cMap_level5_beta_trt_cp_blood.RData") file.ou.RData=paste0(dir.in, "b3_slctExp/exp.p0.001.heatmap.RData") file.ou.pdf=paste0(dir.in, "b3_slctExp/exp.p0.001.slct.DEGs.heatmap.pdf") file.test.pdf=paste0(dir.in, "b3_slctExp/exp.p0.001.test.pdf") file.ou.slct.DEGs=paste0(dir.in, "b3_slctExp/exp.p0.001.slct.DEGs.txt") file.ou.slct.DEGs.id2name=paste0(dir.in, "b3_slctExp/exp.p0.001.slct.DEGs.id2name.txt") #from DAVID file.ou.slct.DEGs.cluster=paste0(dir.in, "b3_slctExp/exp.p0.001.slct.DEGs.cluster") #from DAVID # load(file.in.NES.RData) rownames(deg.gsea.res.df)=deg.gsea.res.df$experiment # exp.df= fread(file.in.slct.exp, sep="\t", data.table=F, header=T) rownames(exp.df)=gsub("^\"\"|\"\"$", "", exp.df$rid, perl=T) exp.mat=as.matrix(exp.df[,-1]) exp.mat.INT = apply(exp.mat,2, rankNorm) # buf= read.table(file.in.gmt, sep=",", row.names=NULL, header=F, stringsAsFactors=F) gsets.list=lapply(buf[,1], FUN=function(x) { gs=unlist(strsplit(x, split="\t")) gs[-2] }) names(gsets.list)=sapply(gsets.list, FUN=function(x)x[1]) gsets.list=lapply(gsets.list, FUN=function(x) x[-1]) gsets.ovlp.list=lapply(gsets.list, FUN=function(x) intersect(x, rownames(exp.df))) gene2label= unlist(lapply(names(gsets.ovlp.list), FUN=function(nm) { x=rep(nm, length(gsets.ovlp.list[[nm]])) names(x)=gsets.ovlp.list[[nm]] return(x) })) # pdf(file.test.pdf, width=20, height=12) layout(matrix(1:15, ncol=3)) par(mar=c(3,3,3,3)) for(i in sample(1:ncol(exp.mat.INT), 100)) { density.all <- density(exp.mat.INT[,i]) # density.dn <- density(exp.mat[gsets.ovlp.list$dnDEG,i]) # density.up <- density(exp.mat[gsets.ovlp.list$upDEG,i]) # # Plot the first density # plot(density.all, col = "red", lwd = 2, xlim = c(-10, 10), ylim = c(0, 0.5), # main = "", xlab = "Values", ylab = "Density") # # Add the second and third densities # lines(density.dn, col = "green", lwd = 2) # lines(density.up, col = "blue", lwd = 2) # hist(exp.mat[,i], breaks = 40, col = rgb(0.5, 0.5, 0.5, 0.1), freq = FALSE, xlim = c(-10, 10), ylim = c(0, 0.4), # main = "Normalized Overlaid Histograms", xlab = "Values", ylab = "Density") # Add the second histogram hist(exp.mat.INT[gsets.ovlp.list$dnDEG,i], breaks = 40, col = rgb(0, 0, 1, 0.5), freq = FALSE, xlim = c(-10, 10), ylim = c(0, 1), main="", xlab = "Values", ylab = "Density") # Add the third histogram hist(exp.mat.INT[gsets.ovlp.list$upDEG,i], breaks = 40, col = rgb(1, 0, 0, 0.5), freq = FALSE, add = TRUE) lines(density.all, col = rgb(0, 0, 0, 1), lwd = 2) # Add a legend legend("topright", legend = c("all", "dn", "up"), fill = c(rgb(.5, 0.5, 0.5, 0.5), rgb(0, 0, 1, 0.5), rgb(1, 0, 0, 0.5))) } dev.off() # exp.mat[exp.mat>5]=5 # exp.mat[exp.mat< -5]=-5 # exp.z.mat= apply(exp.mat, 2, # FUN=function(x) # { # (x-mean(x))/sd(x) # }) # exp.z.mat[exp.z.mat>5]=5 # exp.z.mat[exp.z.mat< -5]=-5 # # #filter genes to find the robust differential genes gs.ovlp=unlist(gsets.ovlp.list) gs.ovlp.topDEG.list=apply(exp.mat.INT, 2, FUN=function(x) { x.quant=quantile(x, prob=c(0.1, 0.9)) top.upDEGs= names(x)[x>x.quant[2]] top.dnDEGs= names(x)[x(length(gs2topDEGCounts)-499)] write(gs.ovlp.topDEGs, file.ou.slct.DEGs) # buf=read.table(file.ou.slct.DEGs.id2name, sep="\t", header=T, row.names=1) gid2name=buf[,1] names(gid2name)=rownames(buf) gname2id=rownames(buf) names(gname2id)=buf[,1] # # # targets=read.table(file.in.potentialTargets)[,1] # drivers=read.table(file.in.drivers, sep="\t", row.names=1, head=T)[,1] # DEGs.ovlpTFDriver=intersect(gid2name, c(drivers, TFs.inferred)) # DEGs.ovlp.index=which(gs.ovlp.topDEGs %in% gname2id[DEGs.ovlpTFDriver]) # row_dist <- dist(exp.mat.INT[gs.ovlp.topDEGs, ]) row_hclust <- hclust(row_dist) # Cut the dendrogram into two clusters row_clusters <- cutree(row_hclust, k = 2) row_split <- factor(row_clusters, levels = c(1, 2)) clu2genes=split(gid2name[names(row_split)], row_split) for(clu in names(clu2genes)) { write(clu2genes[[clu]],file=paste0(file.ou.slct.DEGs.cluster, clu, ".txt")) } # col_dist <- dist(t(exp.mat.INT[gs.ovlp.topDEGs, ])) col_hclust <- hclust(col_dist) # Cut the dendrogram into two clusters col_clusters <- cutree(col_hclust, k = 2) col_split <- factor(col_clusters, levels = c(1, 2)) # samp2type=rep("rest", ncol(exp.mat.INT)) names(samp2type)=colnames(exp.mat.INT) samp2type[deg.gsea.res.df[colnames(exp.mat.INT), "upDEG.NES"]<0 & deg.gsea.res.df[colnames(exp.mat.INT), "dnDEG.NES"]>0]="reverseBDSignatures" # pdf(file.ou.pdf, width=6, height=6) #pheatmap(exp.mat.INT[gs.ovlp,]) row_anno <- rowAnnotation( BD.DEGs = gene2label[gs.ovlp.topDEGs], col = list(BD.DEGs = c("dnDEG"="purple", "upDEG"="orange")) # mark = anno_mark( # Add the anno_mark annotation # at = DEGs.ovlp.index, # labels = gid2name[gs.ovlp.topDEGs[DEGs.ovlp.index]], # labels_gp = gpar(fontsize = 5, col = "red"), # Custom font size and color # link_width = unit(1, "mm") # Thickness of the arrow lines #) ) col_anno <- columnAnnotation( SampleType = samp2type, # Replace 'sample.types' with your vector col = list(SampleType = c("reverseBDSignatures" = "purple", "rest" = "orange")), annotation_name_side = "left" ) ht = Heatmap(exp.mat.INT[gs.ovlp.topDEGs,], col= colorRamp2(c(-2, 0, 2), c("blue", "white", "red")), border =T, #cluster_rows=F, row_split= row_split, column_split=col_split, cluster_rows = FALSE, cluster_columns = FALSE, #cluster_row_slices = FALSE, show_row_names=F, show_column_names=F, #row_labels = gid2name[gs.ovlp.topDEGs], #row_names_gp = gpar(fontsize = 3), #row_title_rot = 0, right_annotation = row_anno, top_annotation = col_anno #column_names_gp = gpar(fontsize = 8), #row_gap = unit(3, "mm"), ) print(ht) dev.off() save(exp.mat.INT, gene2label, gs.ovlp.topDEGs, row_hclust, row_split, col_hclust, col_split, samp2type, file=file.ou.RData)