#V1.1
#links are from all possible cell type

# visualize information from different layer gene by gene by signal tracks
#Bulk annotation

#GWAS signal
#haQTL-ARE from Marks
#ARE MR and coloc results
#DP signal 

#use genes with promoter region overlapping with either TssA and TssFlank
#consider H3K27ac peaks associated within gene neighborhood
#genomic region is in format as "chr2\t1\t223"
#integrate signals in a gene centered manner
#a) how genomic regions is associated with genes
#b) how the haQTL, differential peak signal is related to each peak, which is in turn related to each gene
#check haQTL effect size VS GWAS for peaks for each gene

#prmotoer regions are defined as up/downstream 2kb exntended from tss
#gene neighborhood are defined as hic regions overlap with promoter regions, and Hi-C one step neighbors of these regions, if no overlapping found,  promoter itself is defined as gene neighborhood


#
library(Sushi)
source("~/lhou.compbio/codes/mylib/RCode/Util_zqtl_byYongjin.R")

library(pheatmap)

# DISEASES = c("CD", "UC", "RA")
# DIS2COLOR = c("brown", "purple", "orange")
# names(DIS2COLOR) = DISEASES
PRMOTER.WINDOW = 2000
PK.WIND = 10000
GWAS.PVAL.CUTOFF = 1e-6
#GWAS.PVAL.CUTOFF.LOW = 1e-4
GWAS.Z.CUTOFF = abs(qnorm(GWAS.PVAL.CUTOFF/2))
hQTL.P.CUTOFF = "emp.p.fdr.cutoff0.2"
DP.Q.CUTOFF=0.05
COLOC.ABF.H4.CUTOFF=0.5
MR.P.CUTOFF=0.01

HMs= c("H3K27ac","H3K4me1", "H3K4me3", "H3K27me3","H3K36me3")

#HAQTL.COLs = c(rgb(1, 0, 0, 0.5), rgb(1, 1, 0, 0.5), rgb(0, 1, 0, 0.5), rgb(0, 1, 1, 0.5), rgb(0, 0, 1, 0.5), rgb(1, 0, 1, 0.5))

file.in.hic = paste("~/lhou.compbio/data/HiC/Promoter_HiC_blood_bluePrint_2016/CHiCAGO5/allCellTypes.links.merged.uniq.sorted.bed.gz")
# files.in.TssA = c(E047= "~/lhou.compbio/data/Roadmap/15CoreChromStates127epigen_annot/gencodeV26.Promoter_ovlpWithE047TssAFlank.bed",
#                   E048="~/lhou.compbio/data/Roadmap/15CoreChromStates127epigen_annot/gencodeV26.Promoter_ovlpWithE048TssAFlank.bed",
#                   E039 = "~/lhou.compbio/data/Roadmap/15CoreChromStates127epigen_annot/gencodeV26.Promoter_ovlpWithE039TssAFlank.bed",
#                   E040 = "~/lhou.compbio/data/Roadmap/15CoreChromStates127epigen_annot/gencodeV26.Promoter_ovlpWithE040TssAFlank.bed",
#                   E029 = "~/lhou.compbio/data/Roadmap/15CoreChromStates127epigen_annot/gencodeV26.Promoter_ovlpWithE029TssAFlank.bed",
#                   BPNeutrop = "~/lhou.compbio/data/Roadmap/15CoreChromStates127epigen_annot/gencodeV26.Promoter_ovlpWithE030TssAFlank.bed",#
#                   E032 = "~/lhou.compbio/data/Roadmap/15CoreChromStates127epigen_annot/gencodeV26.Promoter_ovlpWithE032TssAFlank.bed"
#                   )
file.in.TSS = "~/lhou.compbio/data/gene/human/gencode.v26lift37.basic.TSSUp0Down1bp.geneName.ENSG.bed"

files.in.pk.hg38to19.RDS= paste0("../peakMergeNormal/a_2_peakHeightsNormalized_V1.4/", HMs, ".pk.hg38tohg19.RDS")
names(files.in.pk.hg38to19.RDS)= HMs

files.in.diffPeak.RData = paste0("~/lhou.compbio/codes/MayoBipolar_2019_10/peakMergeNormal/c_bulkDiffPeak_limma_V1.4.1_sva_noEHR/", HMs, "/", HMs, ".diffPeak.RData")
names(files.in.diffPeak.RData) = HMs

files.in.haQTLPeak.hg38.RData = paste0("../haQTL/a_5_haQTL_multTest_FixedFactorNum_V1.2.3_DP.BG_100k/", HMs, ".haQTLPeak.cutoffs.hg38.RData")
names(files.in.haQTLPeak.hg38.RData)= HMs

files.in.hQTLPeak.coloc.hg38.RData=paste0("../haQTLvsGWAS/a_bulkhQTLvsGWAS_bycoloc_V1.1_allGWAS/PGC.BP/", HMs, "_100k/", HMs, ".bulkhQTLvsGWAS.RData")
names(files.in.hQTLPeak.coloc.hg38.RData) = HMs

files.in.haQTLPeak.MR.hg38.RData=paste0("../haQTLvsGWAS/a_bulkhaQTLvsGWAS_byMR_Egger_V1.1_allGWAS/PGC.BP/", HMs, "_100k/", HMs, ".bulkHaQTLvsGWAS.RData")
names(files.in.haQTLPeak.MR.hg38.RData) = HMs

file.in.BP.gz = "~/lhou.compbio/data/GWAS/pgc_bipolar/pgc_bip_hg19.sorted.bed.gz"

file.in.geneStructure.RData = "~/lhou.compbio/data/gene/human/gencode.v26lift37.basic.annotation.forSushi_exonAndUTR.RData"

dir.tmp = "~/hptmp/MayoBipolar/a_geneNeighb_V1.1_bulkLink/"
dir.create(dir.tmp, recursive = T, showWarnings = F)

#file.tmp.cellTypePeak = paste(dir.tmp, REF, ".hg19.nodup.filt.narrowPeak.merged.bed", sep="")
#file.tmp.TssA.bed = paste(dir.tmp, REF, ".TSSA.bed", sep="")
file.tmp.hicRegs.bed = paste(dir.tmp, "hicRegions.bed", sep="")
file.tmp.gRegion.bed= paste(dir.tmp, "gRegion.bed", sep="")
file.tmp.gFilt.bed= paste(dir.tmp, "gFilt.bed", sep="")
file.tmp.genesStructure.bed = paste(dir.tmp, "genesStructure.bed", sep="")
file.tmp.gStructure.bed = paste(dir.tmp, "gStructure.bed", sep="")
file.tmp.pk.bed = paste(dir.tmp, "pk.bed", sep="")
file.tmp.BP.hits.bed = paste0(dir.tmp, "BP.hits.hg19.bed")
file.tmp.pk.ovlp.BP.hits.bed= paste0(dir.tmp, "pkOVLP.BP.hits.hg19.bed")

  
dir.ou="a_geneNeighb_visGWASANDBulkAnnot_byGene_V1.1_bulkLink/"
dir.create(dir.ou, showWarnings = F, recursive = T)

file.out.pref = paste(dir.ou, "geneNB", sep="")
file.ou.gNeighbor.link.RData = paste(file.out.pref, ".link.RData", sep="")
file.ou.gNeighbor.annot.RData = paste(file.out.pref, ".annot.RData", sep="")
file.ou.geneCRELink.RData = paste(dir.ou, "geneCRELink.HiC.hg19.RData", sep="")

file.ou.pdf = paste(file.out.pref, ".GWASp", GWAS.PVAL.CUTOFF, ".haQTLDiffPks.pdf", sep="")
file.ou.summary.pdf = paste(file.out.pref, ".GWASp", GWAS.PVAL.CUTOFF, ".haQTLDiffPks.summary.pdf", sep="")
#file.ou.haQTLDiffPks2GWAS.RData = paste(file.out.pref, ".haQTLDiffPks2GWAS.RData", sep="")
#file.ou.geneNeighb = paste(file.out.pref, ".txt", sep="")


################################################################################
load(file.in.geneStructure.RData)
write.table(transcripts.bed, file=file.tmp.genesStructure.bed, sep="\t", col.names = F, row.names = F, quote=F)
################################################################################
gene2hicReg = list() #gene 2 neighbor regions based on hiC and promoters

hicReg2Annot =  matrix()

peaks2haQTL = list() #peak 2 haQTL, snpid and zscore
peaks2diffZ = list() #peaks 2 differential Zscore for each disease
g.neighb2peaks = list() #neighbor mapped 2 peaks
gene2peaks = list() #gene 2 peaks associated with its neighbors

#######################################################
HM.hg38tohg19= lapply(HMs,
FUN=function(HM)
{
  readRDS(files.in.pk.hg38to19.RDS[HM])
})
names(HM.hg38tohg19)=HMs

#annotate###################################################

HM.DP.score.hg19=lapply(HMs,
FUN=function(HM)
{
  load(files.in.diffPeak.RData[HM])

  dp.signals=cbind(log2FoldChange= res$diseasedisease$logFC, pvalue=res$diseasedisease$P.Value, padj = res$diseasedisease$adj.P.Val)
  rownames(dp.signals) = rownames(res$diseasedisease)

  return(dp.signals)
})
names(HM.DP.score.hg19)=HMs

HM.PK.slct.hg19 = lapply(HMs,
FUN=function(HM)
{
  rownames(HM.DP.score.hg19[[HM]])
})
names(HM.PK.slct.hg19)=HMs

HM.DP.hg19= lapply(HM.DP.score.hg19,
FUN=function(dp.signals)
{
  rownames(dp.signals)[dp.signals[,"padj"]<=DP.Q.CUTOFF]
})
names(HM.DP.hg19) = names(HM.DP.score.hg19)


HM.hQTLPK.hg19 = lapply(HMs,
FUN=function(HM)
{
  load(files.in.haQTLPeak.hg38.RData[HM])
  pks.hg38=hQTLPeaks.list[[hQTL.P.CUTOFF]]
  #buf=read.table(f.i.bed, sep="\t", row.names = NULL, header = F, stringsAsFactors = F)
   #= paste0(buf[,1], ":", buf[,2], "-", buf[,3])
  pks.hg19 = HM.hg38tohg19[[HM]][pks.hg38]
  pks.hg19.slct = intersect(HM.PK.slct.hg19[[HM]], pks.hg19)
  return(pks.hg19.slct)
})
names(HM.hQTLPK.hg19) = HMs

HM.hQTLPK.coloc.hg19 = lapply(HMs,
FUN=function(HM)
{
  f.i.RData=files.in.hQTLPeak.coloc.hg38.RData[HM]
  load(f.i.RData)

  pks.hg38=names(hQTLPeaks.QTLvsGWAS)[sapply(hQTLPeaks.QTLvsGWAS, FUN=function(x){if(is.na(x)) return(NA);x$coloc$summary["PP.H4.abf"]>=COLOC.ABF.H4.CUTOFF})]
  pks.hg38=pks.hg38[!is.na(pks.hg38)]
  pks.hg19 = HM.hg38tohg19[[HM]][pks.hg38]
  pks.hg19.slct = intersect(HM.PK.slct.hg19[[HM]], pks.hg19)
  return(pks.hg19.slct)

})
names(HM.hQTLPK.coloc.hg19) = HMs

HM.hQTLPK.MR.hg19 = lapply(HMs,
FUN=function(HM)
{
  f.i.RData=files.in.haQTLPeak.MR.hg38.RData[HM]
  load(f.i.RData)

  pks.hg38=names(hQTLPeaks.QTLvsGWAS)[sapply(hQTLPeaks.QTLvsGWAS, FUN=function(x){if(is.na(x)) return(NA); x$MR@Causal.pval<=MR.P.CUTOFF})]
  pks.hg38=pks.hg38[!is.na(pks.hg38)]
  pks.hg19 = HM.hg38tohg19[[HM]][pks.hg38]
  pks.hg19.slct = intersect(HM.PK.slct.hg19[[HM]], pks.hg19)
  return(pks.hg19.slct)
})
names(HM.hQTLPK.MR.hg19)=HMs

#
cmd = paste0("zcat ", file.in.BP.gz, "|awk '($10<", GWAS.PVAL.CUTOFF, "){print }'>", file.tmp.BP.hits.bed)
system(cmd)
HM.PK.GWAS.hg19 = lapply(HMs,
FUN=function(HM)
{
  print(HM)
  write(paste(gsub(":|-", "\t", HM.PK.slct.hg19[[HM]]), HM.PK.slct.hg19[[HM]], sep="\t"), file.tmp.pk.bed)
  cmd = paste0("bedtools window -w ", PK.WIND, " -a ", file.tmp.pk.bed, " -b ", file.tmp.BP.hits.bed, ">", file.tmp.pk.ovlp.BP.hits.bed)
  system(cmd)
  buf = read.table(file.tmp.pk.ovlp.BP.hits.bed, "\t", header=F, row.names = NULL)
  peaks.ovlpGWAS = levels(factor(buf[,4]))


})
names(HM.PK.GWAS.hg19) = HMs


HM.PK.hg19.annot = lapply(HMs,
FUN=function(HM)
{
  pk.annot.matrix = matrix(0, ncol=5, nrow=length(HM.PK.slct.hg19[[HM]]))
  colnames(pk.annot.matrix)=c("DP", "hQTLPk", "coloc", "MR", "GWAS")
  rownames(pk.annot.matrix) = HM.PK.slct.hg19[[HM]]

  pk.annot.matrix[HM.DP.hg19[[HM]], "DP"]=1
  pk.annot.matrix[HM.hQTLPK.hg19[[HM]], "hQTLPk"]=1
  pk.annot.matrix[HM.hQTLPK.coloc.hg19[[HM]], "coloc"]=1
  pk.annot.matrix[HM.hQTLPK.MR.hg19[[HM]], "MR"]=1
  pk.annot.matrix[HM.PK.GWAS.hg19[[HM]], "GWAS"]=1


  pk.annot.matrix.filt=pk.annot.matrix[apply(pk.annot.matrix, 1, FUN=function(x) any(x==1)),]


  return(pk.annot.matrix.filt)

})
names(HM.PK.hg19.annot)=HMs


# save(HM.hg38tohg19,
#      HM.DP.hg19,
#      HM.DP.score.hg19,
#      HM.hQTLPK.hg19,
#      HM.hQTLPK.coloc.hg19,
#      HM.hQTLPK.MR.hg19,
#      HM.PK.hg19.annot,
#      file=file.ou.gNeighbor.RData
#      )
################################################################################


print("generating gene neighborhood") ################################################################
#remove regions from other chromosome
hic.reg1 = system(paste("zcat ", file.in.hic, "|awk '($1==$5){print $1 \"\\t\" $2 \"\\t\" $3}' ", sep=""), intern=T)
hic.reg2 = system(paste("zcat ", file.in.hic, "|awk '($1==$5){print $5 \"\\t\" $6 \"\\t\" $7}' ", sep=""), intern=T)
# names(hic.reg1) = gsub("\t", "_", hic.reg1)
# names(hic.reg2) = gsub("\t", "_", hic.reg2)
hic.regCmb = levels(factor(c(hic.reg1, hic.reg2)))
write(hic.regCmb, file=file.tmp.hicRegs.bed) # they are non-overlapped regions
hic.reg2neighb = split(c(hic.reg1, hic.reg2), c(hic.reg2, hic.reg1))

########################################################################
#hic to peak
hic.reg2HM.Pks= lapply(HMs,
FUN=function(HM)
{
  pk.hg19 = rownames(HM.DP.score.hg19[[HM]])#HM.hg38tohg19[[HM]]
  write(gsub(":|-", "\t", pk.hg19), file.tmp.pk.bed)
  
  cmd =paste0("intersectBed -wa -wb -a ", file.tmp.pk.bed, " -b ", file.tmp.hicRegs.bed)
  buf=t(sapply(system(cmd, intern=T), FUN=function(x){unlist(strsplit(x, split="\t"))}))
  
  buf.pk = paste0(buf[,1], ":", buf[,2], "-", buf[,3])
  buf.hicReg = paste(buf[,4], buf[,5], buf[,6], sep="\t")
  hic.reg2pk =split(buf.pk, buf.hicReg)

})
names(hic.reg2HM.Pks) = HMs


hic.reg2HM.PK.annot.df = lapply(HMs,
FUN=function(HM)
{
  pk.hg19 = rownames(HM.PK.hg19.annot[[HM]])
  write(gsub(":|-", "\t", pk.hg19), file.tmp.pk.bed)

  cmd =paste0("intersectBed -wa -wb -a ", file.tmp.pk.bed, " -b ", file.tmp.hicRegs.bed)
  buf=t(sapply(system(cmd, intern=T), FUN=function(x){unlist(strsplit(x, split="\t"))}))

  buf.pk = paste0(buf[,1], ":", buf[,2], "-", buf[,3])
  buf.hicReg = paste(buf[,4], buf[,5], buf[,6], sep="\t")
  hic.reg2pk =split(buf.pk, buf.hicReg)

  hic.reg2annot = matrix(0, nrow=length(hic.regCmb), ncol=5)
  rownames(hic.reg2annot) =hic.regCmb
  for(hic.reg in names(hic.reg2pk))
  {
    pks = hic.reg2pk[[hic.reg]]
    pks.annot = rbind(HM.PK.hg19.annot[[HM]][pks, ])

    pks.annot.sum = apply(pks.annot, 2, sum)
    pks.annot.sum[pks.annot.sum>=1]=1
    hic.reg2annot[hic.reg,]=pks.annot.sum
  }
  colnames(hic.reg2annot) = paste(HM, c("DP", "hQTLPk", "coloc", "MR", "GWAS"), sep="_")

  return(hic.reg2annot)

})

hic.reg2HM.PK.annot.matrix=do.call(cbind, hic.reg2HM.PK.annot.df)
#hic.reg2HM.PK.annot.filt.matrix = hic.reg2HM.PK.annot.matrix[apply(hic.reg2HM.PK.annot.matrix, 1, FUN=function(x){any(x==1)}), ]

save(HM.PK.hg19.annot,
     hic.reg2HM.PK.annot.matrix,
     file=file.ou.gNeighbor.annot.RData)

###################################################################################
#promoter
cmd =paste("bedtools window -w ", PRMOTER.WINDOW, " -a ", file.tmp.hicRegs.bed, " -b ", file.in.TSS,  sep="")
buf=t(sapply(system(cmd, intern=T), FUN=function(x){unlist(strsplit(x, split="\t"))}))
buf.hicReg = paste(buf[,1], buf[,2], buf[,3], sep="\t")
buf.gene.name = buf[,8]#paste(buf[,4], buf[,5], buf[,6], sep="_")
gene2hicReg = split(buf.hicReg, buf.gene.name)
#geneProm2hicReg = split(buf.hicReg, buf.geneProm)
# buf = read.table(file.tmp.TssA.bed, sep="\t", header=F, row.names = NULL, colClasses = "character")
# promoters = paste(buf[,1], buf[,2], buf[,3], sep="\t")
# gene2promot = split(promoters, buf[,4])

gene2Neighb = lapply(names(gene2hicReg),
FUN=function(g)
{
  nb=c()
  g.hic.regs = gene2hicReg[[g]]
  for(reg in g.hic.regs)
  {
    nb = c(nb, reg, hic.reg2neighb[[reg]])
  }
  return(levels(factor(nb)))
})
names(gene2Neighb) = names(gene2hicReg)

save(hic.reg2neighb,
     gene2hicReg,
     gene2Neighb,
     file=file.ou.gNeighbor.link.RData)


hm.gene2CRE.HiC.hg19= lapply(names(hic.reg2HM.Pks),
FUN=function(hm)
{
  print(hm)
  reg2pk=hic.reg2HM.Pks[[hm]]

  g2CREs=lapply(gene2Neighb,
  FUN=function(hicRs)
  {
    hicRs.ovlp=intersect(hicRs, names(reg2pk))
    CREs=unlist(reg2pk[hicRs.ovlp])
    CREs=CREs[!duplicated(CREs)]
  })

  
  g2CREs=g2CREs[sapply(g2CREs, length)!=0]
  #
  return(g2CREs)
})
names(hm.gene2CRE.HiC.hg19)=names(hic.reg2HM.Pks)


for(hm in names(hm.gene2CRE.HiC.hg19))
{
  geneAndCRE.df = do.call(rbind, lapply(names(hm.gene2CRE.HiC.hg19[[hm]]), 
  FUN=function(g)
  {
    data.frame(
                gene=g,
                CRE=hm.gene2CRE.HiC.hg19[[hm]][[g]],
                HM=hm, 
                stringsAsFactors=F)
  }))

  file.ou.geneCRELink.tsv = paste(dir.ou, hm, ".geneCRELink.HiC.hg19.tsv", sep="")
  write.table(geneAndCRE.df, file=file.ou.geneCRELink.tsv, sep="\t", quote=F, row.names=F)
}


save(hm.gene2CRE.HiC.hg19,
    file=file.ou.geneCRELink.RData
  )


save(HM.DP.score.hg19,
     HM.PK.hg19.annot,
     hic.reg2HM.Pks,
     hic.reg2HM.PK.annot.matrix,
     #gNB2annotSum,
     #gNB2range,
     #gNB2GWAS.topPval,
     #genes.filt.sorted,
     file=file.ou.gNeighbor.annot.RData)


#


#######################################################
#gene neighbor filter
gNB2annotSum = t(sapply(gene2Neighb,
FUN=function(hic.regs)
{
  apply(rbind(hic.reg2HM.PK.annot.matrix[hic.regs, ]), 2 ,sum)
}))


gNB2annotSumHM = cbind(apply(gNB2annotSum[, grepl("_DP", colnames(gNB2annotSum))], 1, sum),
                       apply(gNB2annotSum[, grepl("_hQTLPk", colnames(gNB2annotSum))], 1, sum),
                       apply(gNB2annotSum[, grepl("_coloc", colnames(gNB2annotSum))], 1, sum),
                       apply(gNB2annotSum[, grepl("_MR", colnames(gNB2annotSum))], 1, sum),
                        apply(gNB2annotSum[, grepl("_GWAS", colnames(gNB2annotSum))], 1, sum)
                       )
colnames(gNB2annotSumHM)=c("DP", "hQTLPk", "coloc", "MR", "GWAS")


gNB2annot.prop=gNB2annotSum/sapply(gene2Neighb, length)


genes.filt = rownames(gNB2annotSumHM)[gNB2annotSumHM[,"DP"]>=1 &
                                       gNB2annotSumHM[,"hQTLPk"]>=1 &
                                        gNB2annotSumHM[,"GWAS"]>=1 &
                                        (gNB2annotSumHM[,"coloc"]>=1  & gNB2annotSumHM[,"MR"]>=1 )]
genes.filt=setdiff(genes.filt, c("Y_RNA", "snoU13"))


#enrichment test
#test enrichment
gs.bg = rownames(gNB2annotSumHM)
genes.DiffReg.NO=sum(gNB2annotSumHM[gs.bg,"DP"]>=1)
genes.Genetics.NO= sum(gNB2annotSumHM[gs.bg,"hQTLPk"]>=1 &
                         gNB2annotSumHM[gs.bg,"GWAS"]>=1 &
                         (gNB2annotSumHM[gs.bg,"coloc"]>=1  & gNB2annotSumHM[gs.bg,"MR"]>=1 ))
genes.ovlp.NO=length(genes.filt)
fish.p=fisher.test(matrix(c(genes.ovlp.NO,
                            genes.DiffReg.NO-genes.ovlp.NO,
                            genes.Genetics.NO-genes.ovlp.NO,
                            nrow(gNB2annotSumHM)-genes.DiffReg.NO-genes.Genetics.NO+genes.ovlp.NO), ncol=2))$p.val
print(fish.p)


pdf(file.ou.summary.pdf, height=20, width=8)

pheatmap(gNB2annot.prop[genes.filt, paste0(HMs, "_DP")],
         annotation_row=data.frame(gNB2annot.prop[genes.filt,c(paste0(HMs, "_coloc"), paste0(HMs, "_MR"))]))

dev.off()

#genes.filt = rownames(gNB2annotSumHM)[apply(gNB2annotSumHM>=1, 1, sum)>=2]
#genes.filt = rownames(gNB2annotSum)[apply(gNB2annotSum>=1, 1, sum)>2]

gNB2range=sapply(gene2Neighb,
FUN=function(hic.regs)
{
  buf= sapply(hic.regs, FUN=function(x) unlist(strsplit(x, split="\t")))
  buf.chr= buf[1,]
  buf.start= min(as.numeric(buf[2,]))
  buf.end= max(as.numeric(buf[3,]))

  if(all(buf.chr==buf.chr[1]))
  {
    return(paste0(buf.chr[1], ":", buf.start, "-", buf.end))
  }else
  {
    return(NA)
  }
})
genes.filt = genes.filt[!is.na(gNB2range[genes.filt])]
write.table(cbind(gsub(":|-", "\t", gNB2range[genes.filt]), genes.filt), file=file.tmp.gFilt.bed, sep="\t", row.names = F, col.names = F, quote=F)
genes.filt.sorted = sapply(system(paste0("sortBed -i ", file.tmp.gFilt.bed), intern = T),
FUN=function(x)
{
  unlist(strsplit(x, split="\t"))[4]
})

# gNB2GWAS.topPval =sapply(genes.filt,
# FUN=function(g)
# {
#   print(g)
#   range=gNB2range[[g]]
#   if(is.na(range))
#   {
#     return(NA)
#   }
#   gwas.tab = seqminer::tabix.read.table(file.in.BP.gz, range)
#   min(gwas.tab$V10)
# })
# names(gNB2GWAS.topPval) =genes.filt

# genes.filt = rownames(gNB2annotSumHM)[apply(gNB2annotSumHM>=1, 1, sum)>=2 & gNB2GWAS.topPval<=GWAS.PVAL.CUTOFF]
# genes.filt = genes.filt[!is.na(genes.filt)]

save(HM.DP.score.hg19,
     HM.PK.hg19.annot,
     hic.reg2HM.Pks,
     hic.reg2HM.PK.annot.matrix,
     gNB2annotSum,
     gNB2range,
     #gNB2GWAS.topPval,
     genes.filt.sorted,
     file=file.ou.gNeighbor.annot.RData)


print("tracks for each gene neighborhood")##########################################
load(file.ou.gNeighbor.link.RData)
load(file.ou.gNeighbor.annot.RData)
#FUNCTION##############################################################################
getChrFromRegions = function(regions) #vector of regions with format "chr\tstart\tend"
{
  chrs = sapply(regions, FUN=function(x){unlist(strsplit(x, split="\t"))[1]})
  
  return(levels(factor(chrs)))
}

getHiCLinks = function(regions, hic.reg2neighb)#vector of regions with format "chr\tstart\tend"
{
  nb.links = c()
  for(nb in intersect(regions, names(hic.reg2neighb)))
  {
    nb.partners = hic.reg2neighb[[nb]]
    for(nb.p in intersect(nb.partners, nbs))
    {
      nb.links=c(nb.links, paste(nb, nb.p, NA, 1, ".", ".", 1, sep="\t"))
      if(nb > nb.p)
      {
        nb.links=c(nb.links, paste(nb.p, nb, NA, 1, ".", ".", 1, sep="\t"))
      }
    }
  }
  nb.links = levels(factor(nb.links))
  return(nb.links)
}

getHiCLinksFiltBEDPE=function(nb.links, g.chr)
{
  nb.links.bedpe = data.frame(t(sapply(nb.links, FUN=function(x)unlist(strsplit(x, split="\t")))), stringsAsFactors = F)
  colnames(nb.links.bedpe) = c("chrom1", "start1", "end1", "chrom2", "start2", "end2", "name", "score", "strand1", "strand2", "samplenumber")
  nb.links.bedpe = nb.links.bedpe[nb.links.bedpe$chrom1 == g.chr & nb.links.bedpe$chrom2 ==  g.chr,]
  nb.links.bedpe$start1 = as.numeric(nb.links.bedpe$start1)
  nb.links.bedpe$start2 = as.numeric(nb.links.bedpe$start2)
  nb.links.bedpe$end1 = as.numeric(nb.links.bedpe$end1)
  nb.links.bedpe$end2 = as.numeric(nb.links.bedpe$end2)
  nb.links.bedpe$score = as.numeric(nb.links.bedpe$score)
  nb.links.bedpe$samplenumber = as.numeric(nb.links.bedpe$samplenumber)
  
  return(nb.links.bedpe)
}

getNBGenesBED = function(g.chr, chr.start, chr.end)
{
  
  write(paste(g.chr, "\t", chr.start, "\t", chr.end, sep=""), file.tmp.gRegion.bed)
  cmd =paste( "intersectBed -a ", file.tmp.genesStructure.bed, " -b ", file.tmp.gRegion.bed," -u>", file.tmp.gStructure.bed, sep="")
  system(cmd)
  buf = read.table(file.tmp.gStructure.bed, head=F, sep="\t", row.names = NULL, stringsAsFactors = F)
  colnames(buf) = c("chrom", "start", "stop", "gene", "gene.type", "transcript", "score", "strand", "type")
  buf = buf[buf$gene.type=="protein_coding" | buf$gene==g ,]#only keep protein coding genes and the gene we are focusing on now
  buf$strand[as.character(buf$strand)=="+"] = 1
  buf$strand[as.character(buf$strand)=="-"] = -1
  buf$strand = as.numeric(buf$strand)
  
  NB.genes.bed = list()
  NB.genes.bed$g = buf[buf$gene==g, ]
  NB.genes.bed$others = buf[buf$gene!=g, ]
  
  return(NB.genes.bed)
}

gethaQTLFiltBedgraph = function(peaks2haQTL, g.pks, g.chr) #sorted
{
  haQTL.posAndT.bed = data.frame()
  for(i in 1:length(g.pks))
  {
    for(posID in names(peaks2haQTL[[g.pks[i]]]))
    {
      t= abs(peaks2haQTL[[g.pks[i]]][posID])
      pos = as.numeric(unlist(strsplit(posID, split="_"))[2])
      chr = unlist(strsplit(posID, split="_"))[1]
      haQTL.posAndT.bed = rbind(haQTL.posAndT.bed, data.frame(chrom = chr, start = pos-1, end = pos, value = t,stringsAsFactors = F))  
    }
    
  }
  haQTL.posAndT.bed =  haQTL.posAndT.bed[ haQTL.posAndT.bed$chrom==g.chr, ]
  haQTL.posAndT.bed =  haQTL.posAndT.bed[order(haQTL.posAndT.bed$start, decreasing = F),]
  return(haQTL.posAndT.bed)
}


getGWASFiltBedgraph = function(g.haQTL.gwas, dis, g.chr)
{
  haQTL.posAndGWAS.bed = data.frame(stringsAsFactors = F)
  for(posID in names(g.haQTL.gwas[[dis]]))
  {
    
    z= abs(g.haQTL.gwas[[dis]][posID])
    pos = as.numeric(unlist(strsplit(posID, split="_"))[2])
    chr = unlist(strsplit(posID, split="_"))[1]
    haQTL.posAndGWAS.bed = rbind(haQTL.posAndGWAS.bed, data.frame(chrom = chr, start = pos-1, end = pos, value = z,stringsAsFactors = F))
    
  }
  haQTL.posAndGWAS.bed = haQTL.posAndGWAS.bed[haQTL.posAndGWAS.bed$chrom==g.chr,]
  if(nrow(haQTL.posAndGWAS.bed)>=1)
  {
    haQTL.posAndGWAS.bed =  haQTL.posAndGWAS.bed[order(haQTL.posAndGWAS.bed$start, decreasing = F),]
  }
  return(haQTL.posAndGWAS.bed)
}


#
pdf(file.ou.pdf, width =25, height=12)
layout(matrix(1:5, ncol=1))
par(mar=c(3,15,2,3))
for(g in genes.filt.sorted)
{
  print(g)
  g.range = gNB2range[[g]]
  if(is.na(g.range)) next
    
  g.chr = getChrFromRegions(gsub(":|-", "\t", g.range))
  if(length(g.chr)>1)
  {
    print(paste(g, "mapping to multiple chroms"))
    next
  }
  
  #filt by gwas
  gwas.tab = seqminer::tabix.read.table(file.in.BP.gz, g.range) 
  if(length(gwas.tab)==0) next
  gwas.bedgraph = data.frame(chr= gwas.tab$V1,
                             start = gwas.tab$V2,
                             end = gwas.tab$V3,
                             value = -log10(gwas.tab$V10) 
                             )
  if(all(gwas.bedgraph$value <= -log10(GWAS.PVAL.CUTOFF))) next
  
  
  #range
  nbs = gene2Neighb[[g]]
  nb.links = getHiCLinks(nbs, hic.reg2neighb)
  if(length(nb.links)<1)
  {
    print(paste(g, "no Hi-C links around"))
    next
  }
  nb.links.bedpe = getHiCLinksFiltBEDPE(nb.links, g.chr)
  if(nrow(nb.links.bedpe)<1)
  {
    print(paste(g, "no Hi-C links around on the same chrom"))
    next
  }
  print(g)
  
  chr.start = max(1,min(c(nb.links.bedpe$start1, nb.links.bedpe$start2))-5000)
  chr.end  = max(c(nb.links.bedpe$end1, nb.links.bedpe$end2))+5000
  
  
  #axis(side=2,las=2,tcl=.2)
  #mtext("",side=2,line=1.75,cex=.75,font=2)

  #neighbor regions
  # nbs.bed = data.frame(t(sapply(nbs, FUN=function(x){unlist(strsplit(x, split="\t"))})), score = 1 , strand = ".",stringsAsFactors = F)
  # colnames(nbs.bed)[1:3] = c("chrom", "chromstart", "chromend")
  # nbs.bed$chromstart = as.numeric(nbs.bed$chromstart)
  # nbs.bed$chromend = as.numeric(nbs.bed$chromend)
  # plotBed(beddata=nbs.bed, chrom = g.chr, chromstart=chr.start, chromend=chr.end, row  = "auto", wiggle=0.001)
  # labelgenome(chrom=g.chr, chromstart= chr.start, chromend = chr.end, n=5, scale="Mb")
  #  
  #gene
  NB.genes.bed = getNBGenesBED(g.chr, chr.start, chr.end)
  
  
  if(nrow(NB.genes.bed$others)==0)
  {
    plot.new()
  }else
  {
    pg = plotGenes(NB.genes.bed$others[,c("chrom", "start", "stop", "gene", "score", "strand")],
               chrom = g.chr, chromstart = chr.start,chromend=chr.end,
               #colorby = c(0,1)[(NB.transcripts.bed$gene==g) +1],
               #colorbycol=SushiColors(2),
               col="blue",
               types="exon",#NB.transcripts.bed$type, 
               maxrows=8, bheight=0.2,
               plotgenetype="box",bentline=FALSE, wigglefactor = 0.1,
               labeloffset=.4, fontsize=0.6,# arrowlength = 0.025,
               labeltext=TRUE)
  }
  
  pg = plotGenes(NB.genes.bed$g[,c("chrom", "start", "stop", "gene", "score", "strand")],
                 chrom = g.chr, chromstart = chr.start,chromend=chr.end,
                 #colorby = c(0,1)[(NB.transcripts.bed$gene==g) +1],
                 #colorbycol=SushiColors(2),
                 col="red",
                 types="exon",#NB.transcripts.bed$type, 
                 maxrows=1, bheight=0.2,
                 plotgenetype="box",bentline=FALSE, wigglefactor = 0.1,
                 labeloffset=.4, fontsize=1.2,# arrowlength = 0.025,
                 labeltext=TRUE)
  labelgenome(chrom=g.chr, chromstart= chr.start, chromend = chr.end, n=5, scale="Mb")
  
  
  #axis(side=2,las=2,tcl=.2)
  #mtext("",side=2,line=1.75,cex=.75,font=2)
  
  #hiC
  pbpe = plotBedpe(nb.links.bedpe, chrom = g.chr, 
                   chromstart = chr.start,
                   chromend  = chr.end,
                   heights = nb.links.bedpe$score,
                   plottype="loops",
                   main=g)
  labelgenome(chrom=g.chr, chromstart= chr.start, chromend = chr.end, n=5, scale="Mb")
  
  #annotations bed
  annot.bed.df= data.frame()
  nbs.annot = rbind(hic.reg2HM.PK.annot.matrix[nbs,])
  for(annot.nm in colnames(nbs.annot))
  {
    nbs.filt = nbs[nbs.annot[, annot.nm]!=0]
    if(length(nbs.filt)==0) next
  
    buf= sapply(nbs.filt, FUN=function(x) unlist(strsplit(x, split="\t")))
    buf.chr = buf[1,]
    buf.start = as.numeric(buf[2,])
    buf.end = as.numeric(buf[3,])
    
    df =data.frame(chrom=buf.chr,
               start = buf.start,
               end = buf.end,
               name=annot.nm,
               score=0,
               strand = ".", stringsAsFactors = F)
    annot.bed.df = rbind(annot.bed.df, df)
  }
  annot.bed.df$row = as.numeric(factor(annot.bed.df$name))
  annot.bed.df$color = maptocolors(annot.bed.df$row, col=SushiColors(6))
  annot.bed.df.sorted = annot.bed.df[order(annot.bed.df$row),]
  plotBed(beddata = annot.bed.df.sorted,
          chrom = g.chr,
          chromstart = chr.start,
          chromend =chr.end,
          rownumber = annot.bed.df.sorted$row, 
          type = "region",
          color=annot.bed.df.sorted$color,
          row="given",
          plotbg="white",
          rowlabels=unique(annot.bed.df.sorted$name),
          rowlabelcol=unique(annot.bed.df.sorted$color), 
          rowlabelcex=0.75)
  labelgenome(chrom=g.chr, chromstart= chr.start, chromend = chr.end, n=5, scale="Mb")
  mtext("bulk annotate", side=3, adj=-0.065,line=0.5,font=2)
  
  #GWAS
  gwas.bedgraph.filt=gwas.bedgraph[gwas.bedgraph$value>=-log10(GWAS.PVAL.CUTOFF),]
  plotBedgraph(gwas.bedgraph.filt,
               chrom = g.chr, 
               chromstart = chr.start,
               chromend  = chr.end,
               colorbycol= SushiColors(5))
  labelgenome(chrom=g.chr, chromstart= chr.start, chromend = chr.end, n=5, scale="Mb")
  mtext("BP GWAS", side=3, adj=-0.065,line=0.5,font=2)
  axis(side=2,las=2, tcl=.2)
  
  
  #

  
}
dev.off()