#combine previous result 
#remove clusters that are empty
#reorder the traits that shows up


#for the baseline they get different number of SNPs as I got.
#need to use the overlapping snps, ignore possible change to .M, .M_5_50
#should guarantee all the new annotations share the same set of SNPs


library(pheatmap)
library("RColorBrewer")
library(corrplot)
#

dir.in.LDSC = "a_2_heritPartition_byLDSC_AREGrp.bulkDP.gCRE_slctGWAS/"
files.in.GWAS=c()

files.in.GWAS["T1D_UKBS"]= "~/lhou.compbio/data/GWAS/GTEx.v8.imputedGWAS.withRSID/imputed_UKB_20002_1222_self_reported_type_1_diabetes.bed"
files.in.GWAS["COVID19.HG"] = "~/lhou.compbio/data/GWAS/COVID19_hg/COVID19_HGI_ANA_C2_V2.sumstats.gz"
files.in.GWAS["IBD_UKBS"]= "~/lhou.compbio/data/GWAS/GTEx.v8.imputedGWAS.withRSID/imputed_UKB_20002_1461_self_reported_inflammatory_bowel_disease.bed"
files.in.GWAS["RA_UKBS"]= "~/lhou.compbio/data/GWAS/GTEx.v8.imputedGWAS.withRSID/imputed_UKB_20002_1464_self_reported_rheumatoid_arthritis.bed"
files.in.GWAS["MS_UKBS"]= "~/lhou.compbio/data/GWAS/GTEx.v8.imputedGWAS.withRSID/imputed_UKB_20002_1261_self_reported_multiple_sclerosis.bed"
files.in.GWAS["Neurot_UKB"]= "~/lhou.compbio/data/GWAS/GTEx.v8.imputedGWAS.withRSID/imputed_UKB_20127_Neuroticism_score.bed"
files.in.GWAS["AD_NG2022"]="~/lhou.compbio/data/GWAS/AD/AD_Bellenguez_NG2022/GCST90027158_buildGRCh38.sumstats.gz"
files.in.GWAS["PD_UKBS"]= "~/lhou.compbio/data/GWAS/GTEx.v8.imputedGWAS.withRSID/imputed_UKB_20002_1262_self_reported_parkinsons_disease.bed"
files.in.GWAS["PGC_SCZ"]= "~/lhou.compbio/data/GWAS/GTEx.v8.imputedGWAS.withRSID/imputed_pgc.scz2.bed"
files.in.GWAS["PGC_BP"]="~/lhou.compbio/data/GWAS/pgc_bipolar/daner_PGC_BIP32b_mds7a_0416a.hg38.sumstats.gz"
files.in.GWAS["HighBloodPressure"]="~/lhou.compbio/data/GWAS/GWASAtlas_ukb2_NG_2019/highBloodPressure_6150_4_logistic.EUR.sumstats.gz"
files.in.GWAS["SmokeInit"]="~/lhou.compbio/data/GWAS/Alcohol_tobacco_NG_2019/SmokingInitiation.txt.hg38.sumstats.gz"
files.in.GWAS["DrinkPerWeek"]="~/lhou.compbio/data/GWAS/Alcohol_tobacco_NG_2019/DrinksPerWeek.txt.hg38.sumstats.gz"
files.in.GWAS["T2D_UKBS"]= "~/lhou.compbio/data/GWAS/GTEx.v8.imputedGWAS.withRSID/imputed_UKB_20002_1223_self_reported_type_2_diabetes.bed"


dirs.in.annot = c("a_annotations_bulkDP/limma_V1.4.1/", "a_annotations_gARE_emp.p.fdr.cutoff0.05/", "a_annotations_3actHM_AREGrp/")

annot.nms=unlist(lapply(dirs.in.annot,
FUN=function(d.i)
{
  levels(factor(sapply(dir(d.i, paste0("annot.gz"), full.names=F), #"haQTL.*\\.1.annot.gz"  "diffPeak.*\\.1.annot.gz""
  FUN=function(f.i)
  {
   return(gsub(".\\d+.annot.gz", "", f.i, perl=T))
  })))
}
)) #"haQTL.*\\.1.annot.gz"  "diffPeak.*\\.1.annot.g)


dir.ou = paste0("a_3_visHeritPartition_AREGrp.bulkDP.gARE_slctGWAS/")
dir.create(dir.ou, showWarnings = F, recursive = T)

file.ou.RData = paste0(dir.ou, "enrichAndpval.RData")
file.ou.pval.pdf = paste0(dir.ou, "enrichAndpval.pdf") 
file.ou.enrich.pdf = paste0(dir.ou, "enrichment.pdf") 


#LDSC --h2
ldsc.pval = matrix(1, nrow=length(files.in.GWAS), ncol=length(annot.nms))
rownames(ldsc.pval) = names(files.in.GWAS)
colnames(ldsc.pval) = annot.nms
ldsc.enrich =matrix(0, nrow=length(files.in.GWAS), ncol=length(annot.nms))
rownames(ldsc.enrich) = names(files.in.GWAS)
colnames(ldsc.enrich) = annot.nms


for(annot.nm in annot.nms)
{
  for(pheno in names(files.in.GWAS))
  {
    f.i.res = paste0(dir.in.LDSC, pheno,"_", annot.nm, ".results")
    if(file.exists(f.i.res))  
    {
      d = as.matrix(read.table(f.i.res, sep="\t", row.names=1, header=T))
    
      ldsc.enrich[pheno, annot.nm] = d["L2_1", "Enrichment"]
      ldsc.pval[pheno, annot.nm] = d["L2_1", "Enrichment_p"]
    }
  }
}
# colnames(ldsc.enrich) = gsub("PASS_", "", colnames(ldsc.enrich))
# colnames(ldsc.pval) = gsub("PASS_", "", colnames(ldsc.pval))
ldsc.pval[ldsc.enrich<0]=1
ldsc.enrich[ldsc.enrich<0]=0
#gwas.kept = apply(ldsc.enrich, 2, FUN=function(x){any(x!=0)}) 

ldsc.qval = t(apply(ldsc.pval, 1, FUN=function(x){p.adjust(x, method = "BH")}))
  
save(#ARE.ModGrp2COLOR,
     ldsc.enrich,
     ldsc.pval,
     ldsc.qval,
     file=file.ou.RData)

# pdf(file.ou.enrich.pdf, width=20, height=9)
# pheatmap(t(log10(ldsc.enrich+1)),
#            col=  colorRampPalette(brewer.pal(n = 7, name = "OrRd" ))(200),#colorRampPalette(c('#FFFFFF', "#FF0000"))(n = 200),#
#            cluster_cols = T, show_rownames = T, show_colnames = T, cluster_rows=T, 
#            main = paste0("log10 enrichment of GWAS signal at peaks")
#   )
# 
# 
# 
# dev.off()


# row.dists = as.dist(1-cor(t(ldsc.enrich[, gwas.kept]), method = "spearman"))
# row.hclust = hclust(row.dists)

#row.ordered = c(setdiff(rownames(ldsc.enrich), ARES.Modules.ordered), ARES.Modules.ordered)

pdf(file.ou.pval.pdf, width=20, height=15)  

ldsc.enrich.filt= ldsc.enrich
ldsc.enrich.filt[ldsc.pval>0.05]=1
row.order= order(apply(ldsc.enrich.filt, 1, which.max), decreasing = F)
corrplot(log10(ldsc.enrich+1)[row.order, ], 
         p.mat=ldsc.pval[row.order, ],
         #addCoef.col=1,
         is.corr=F, 
         insig = "blank", 
         sig.level = .05,
         method = "square"
         #, order = "hclust"
         )


#show enrichment
ldsc.enrich.log.filt= log10(ldsc.enrich+1)
ldsc.enrich.log.filt[ldsc.qval>0.2]=0 #ldsc.pval<=0.05
ldsc.enrich.log.filt= ldsc.enrich.log.filt[apply(ldsc.enrich.log.filt>0,1,any), ] #apply(t(ldsc.pval)<=0.05,2,any)
ldsc.enrich.log.filt = ldsc.enrich.log.filt#[, names(ARE.ModGrp2COLOR)]
row.order= order(apply(ldsc.enrich.log.filt, 1, which.max), decreasing = F)
ldsc.enrich.log.filt= ldsc.enrich.log.filt[row.order,]
corrplot(ldsc.enrich.log.filt, 
        # p.mat=t(ldsc.pval)[rownames(ldsc.enrich.filt), colnames(ldsc.enrich.filt)],
        is.corr=F, 
        #insig = "blank", sig.level = .05,
        method = "square"
         #, order = "hclust"
         )


#show log p
ldsc.p.log.filt = -log10(ldsc.pval)
#ldsc.p.log.filt[t(ldsc.enrich)<2]=0
ldsc.p.log.filt[ldsc.qval>0.2]= 0 #ldsc.p.log.filt< -log10(0.05)
ldsc.p.log.filt= ldsc.p.log.filt[apply(ldsc.p.log.filt>0,1,any), ] #apply(t(ldsc.pval)<=0.05,2,any)
ldsc.p.log.filt = ldsc.p.log.filt#[, names(ARE.ModGrp2COLOR)]
row.order= order(apply(ldsc.p.log.filt, 1, which.max), decreasing = F)
ldsc.p.log.filt= ldsc.p.log.filt[row.order,]
corrplot(ldsc.p.log.filt, 
        # p.mat=t(ldsc.pval)[rownames(ldsc.enrich.filt), colnames(ldsc.enrich.filt)],
        is.corr=F, 
        #insig = "blank", sig.level = .05,
        method = "square"
         #, order = "hclust"
         )


#
dev.off()