#V1.1 #to add different source of GWAS data #check only enhancers near significant eQTL #adding norm or rerun #rerun is to contiue the previous results sh.heads = c() sh.heads["norm"]=paste("#!/bin/bash", "#$ -binding linear:1", #"#$ -P compbio_lab", "#$ -S /bin/bash", "##$ -V", "#$ -cwd", "#$ -e ./error.$JOB_NAME.$JOB_ID", "#$ -o ./outpt.$JOB_NAME.$JOB_ID", "#$ -l h_vmem=20g", "#$ -l h_rt=5:00:00", "##$ -pe smp 4", "source ~/.bashrc", "source ~/.my.bashrc", "reuse .r-3.6.0-bioconductor", sep="\n") sh.heads["rerun"]=paste("#!/bin/bash", "#$ -binding linear:1", #"#$ -P compbio_lab", "#$ -S /bin/bash", "##$ -V", "#$ -cwd", "#$ -e ./error.$JOB_NAME.$JOB_ID", "#$ -o ./outpt.$JOB_NAME.$JOB_ID", "#$ -l h_vmem=20g", "#$ -l h_rt=10:00:00", "##$ -pe smp 4", "source ~/.bashrc", "source ~/.my.bashrc", "reuse .r-3.6.0-bioconductor", sep="\n") # tiss2GTExTiss = c(Brain = "Brain_Frontal_Cortex_BA9", # Lung = "Lung", # Lung.RSC0.8OrMatched = "Lung", # Muscle = "Muscle_Skeletal", # Heart = "Heart_Left_Ventricle") #CHRs = paste0("chr", c(1:22)) options(scipen = 999) #eQTL2Enh.WIND.SIZE = 10000 SCRIPTs =c(GTEx= "a.colocalizeTest.haQTLVSGWAS.coloc_V1.1_per.GTEx.GWAS.R", #Alkes= "a.colocalizeTest.haQTLVSGWAS.coloc_V1.1_per.Alkes.GWAS.R", Lung= "a.colocalizeTest.haQTLVSGWAS.coloc_V1.1_per.Lung.GWAS.R", PGC= "a.colocalizeTest.haQTLVSGWAS.coloc_V1.1_per.PGC.GWAS.R") argv = commandArgs(trailingOnly = TRUE) #GENO.DIR = argv[1] MOD = argv[1] COND = argv[2] hQTL.WIND = as.numeric(argv[3]) #the window for coloclaization test #JOBs.N = as.numeric(argv[4]) haQTL.CUTOFF = "emp.p.cutoff0.05" GWAS.PVAL.CUTOFF = 1e-5 # tiss2GTExTiss = c(Brain = "Brain_Frontal_Cortex_BA9", # Lung = "Lung", # Muscle = "Muscle_Skeletal", # Heart = "Heart_Left_Ventricle") # #file.in.eGene.pos = paste0("/broad/compbio/data/GTEx/v8/eqtl/GTEx_Analysis_v8_eQTL_significant/", tiss2GTExTiss[TISS], ".v8.egenes.txt.gz") #file.in.sig.eQTL = paste0("/broad/compbio/data/GTEx/v8/eqtl/GTEx_Analysis_v8_eQTL_significant/", tiss2GTExTiss[TISS], ".v8.signif_variant_gene_pairs.txt.gz") # file.in.hg382hg19.RDS =paste0("/broad/compbio/lhou/codes/eGTEX-H3K27ac/peakMergeNormal/a_2_peakHeightsNormalized_hg19.narrowPeak.RSC0.8.RdsTot1e7.logQ2_V1.3_addRefPeak_CSFilt/", TISS, ".pks.hg382hg19Uniq.RDS") # file.in.g2pk.RData = paste0("../testPromEnhLink_eQTL/a_AREsNeareQTL/", TISS, "_10k/", TISS, ".gene2peaks_byeQTL.hg38.RData") # files.in.GWAS = dir("/broad/compbio/data/gtex_gwas_hg38_tabix/", "bed.gz$", full.names = T) # names(files.in.GWAS) = sapply(files.in.GWAS, # FUN=function(f.i.gwas) # { # gsub("imputed_|.bed.gz", "", basename(f.i.gwas)) # }) file.in.gwas.list="/broad/compbio/lhou/data/eGTEX-H3K27ac/eGTEx_GWAS_list.0908.2022.txt" file.in.hQTLPK.RData = paste0("~/lhou.compbio/codes/eGTEX-H3K27ac/haQTL/a_5_haQTL_multTest_FixedFactorNum_withGTExMinCov_V1.2.2_gINT_peer_rmAgeBatch_", hQTL.WIND/1000, "k/", COND, ".haQTLPeak.cutoffs.hg38.RData") dir.tmp =paste0("~/hptmp/eGTEx-H3K27ac/a_coloc_haQTLvsGWAS_bycoloc_V1.1/", COND, "_", hQTL.WIND/1000, "k/") dir.create(dir.tmp, showWarnings = F, recursive = T) file.tmp.haQTLPeak.bed = paste0(dir.tmp, COND, ".peaks.hg38.bed") # file.tmp.sigeQTL.bed = paste0(dir.tmp, TISS, ".eQTL.hg38.bed") # file.tmp.pks.hg38.bed = paste0(dir.tmp, TISS, ".peaks.hg38.bed") # file.tmp.eQTLOVLPpks.hg38.bed = paste0(dir.tmp, TISS, ".eQTLOVLPpeaks.hg38.bed") # gwas.info =read.table(file.in.gwas.list, sep="\t", row.names = 2, header = T, stringsAsFactors = F) # gwas2sampN=gwas.info$Sample_Size # names(gwas2sampN) = rownames(gwas.info) load(file.in.hQTLPK.RData) pks.slct = hQTLPeaks.list[[haQTL.CUTOFF]]#rownames(emp.pvals.filtNA)[emp.pvals.filtNA[,"emp.P.model"] <= haQTL.EMP.PVAL.CUTOFF] write.table(cbind(gsub(":|-", "\t", pks.slct), pks.slct), file= file.tmp.haQTLPeak.bed, sep="\t", row.names = F, col.names = F, quote = F) for(gwas.nm in rownames(gwas.info)) { source=gwas.info[gwas.nm,"source"] #JOB.I.inFN = paste(c(rep(0, nchar(JOBs.N)-nchar(JOB.I)), JOB.I), collapse="") file.tmp.coloc.RData = paste0(dir.tmp, gwas.nm, ".bycoloc.RData") print(file.tmp.coloc.RData) if(MOD == "rerun" && file.exists(file.tmp.coloc.RData)) { tryCatch( expr = { load(file.tmp.coloc.RData) }, error=function(e) { print("RData broken, use RData.tmp instead") system(paste0("mv ", file.tmp.coloc.RData, ".tmp ", file.tmp.coloc.RData)) } ) load(file.tmp.coloc.RData) print(c(length(hQTLPeaks.ovlpGWAS), length(gwasvshaQTL.coloc))) if(length(hQTLPeaks.ovlpGWAS) == length(gwasvshaQTL.coloc)) { next } } f.o.sh = paste0("a.", COND, ".", gwas.nm, ".coloc.sh") write(sh.heads[MOD], f.o.sh) write(paste("Rscript", SCRIPTs[source], MOD, COND, gwas.nm, hQTL.WIND, GWAS.PVAL.CUTOFF, file.tmp.coloc.RData, sep=" "), f.o.sh, append = T) #a.linkGenePeak_perGene_PRS.R write("echo jobs done", f.o.sh, append = T) system(paste0("qsub ", f.o.sh)) }