#include three different kinds of annotation set #include only limited number of GWAS #ARE grp only coming from 3 active marks #remove the filter snp step, since I rerun the cacluation of l2 for baseline model, and now snps set matching problem should be solved #use GWAS atlas data, some redundant summary statistics #for the baseline they get different number of SNPs as I got. #need to use the overlapping snps, ignore possible change to .M, .M_5_50 #should guarantee all the new annotations share the same set of SNPs CHRs = as.character(1:22) sh.head = paste("#!/bin/bash", "#$ -S /bin/bash", #"#$ -P compbio_lab", "#$ -binding linear:1", "##$ -V", "#$ -cwd", "#$ -e ./error.$JOB_NAME.$JOB_ID", "#$ -o ./outpt.$JOB_NAME.$JOB_ID", "#$ -l h_vmem=20g", "#$ -l h_rt=10:00:00", "##$ -pe smp 5", "source ~/.bashrc", "source ~/.my.bashrc", "export PATH=$HOME/lhou.compbio/software/anaconda2/bin:$PATH", "source activate ldsc", sep="\n") # dir.in.plink = "~/lhou.compbio/data/LDSC/1000G_EUR_Phase3_plink/" # dir.in.snp = "~/lhou.compbio/data/LDSC/hapmap3_snps/" file.in.baseline.pref = "~/lhou.compbio/data/LDSC/1000G_Phase3_baselineLD_v2.1_ldscores/baselineLD." file.in.weight.pref = "~/lhou.compbio/data/LDSC/weights_hm3_no_hla/weights." file.in.freq.pref = "~/lhou.compbio/data/LDSC/1000G_Phase3_frq/1000G.EUR.QC." files.in.GWAS=c() files.in.GWAS["PGC_SCZ"]= "~/lhou.compbio/data/GWAS/GTEx.v8.imputedGWAS.withRSID/imputed_pgc.scz2.bed" files.in.GWAS["RA_UKBS"]= "~/lhou.compbio/data/GWAS/GTEx.v8.imputedGWAS.withRSID/imputed_UKB_20002_1464_self_reported_rheumatoid_arthritis.bed" files.in.GWAS["IBD_UKBS"]= "~/lhou.compbio/data/GWAS/GTEx.v8.imputedGWAS.withRSID/imputed_UKB_20002_1461_self_reported_inflammatory_bowel_disease.bed" files.in.GWAS["MS_UKBS"]= "~/lhou.compbio/data/GWAS/GTEx.v8.imputedGWAS.withRSID/imputed_UKB_20002_1261_self_reported_multiple_sclerosis.bed" files.in.GWAS["PD_UKBS"]= "~/lhou.compbio/data/GWAS/GTEx.v8.imputedGWAS.withRSID/imputed_UKB_20002_1262_self_reported_parkinsons_disease.bed" files.in.GWAS["T1D_UKBS"]= "~/lhou.compbio/data/GWAS/GTEx.v8.imputedGWAS.withRSID/imputed_UKB_20002_1222_self_reported_type_1_diabetes.bed" files.in.GWAS["T2D_UKBS"]= "~/lhou.compbio/data/GWAS/GTEx.v8.imputedGWAS.withRSID/imputed_UKB_20002_1223_self_reported_type_2_diabetes.bed" files.in.GWAS["Neurot_UKB"]= "~/lhou.compbio/data/GWAS/GTEx.v8.imputedGWAS.withRSID/imputed_UKB_20127_Neuroticism_score.bed" files.in.GWAS["PGC_BP"]="~/lhou.compbio/data/GWAS/pgc_bipolar/daner_PGC_BIP32b_mds7a_0416a.hg38.sumstats.gz" files.in.GWAS["AD_NG2022"]="~/lhou.compbio/data/GWAS/AD/AD_Bellenguez_NG2022/GCST90027158_buildGRCh38.sumstats.gz" files.in.GWAS["HighBloodPressure"]="~/lhou.compbio/data/GWAS/GWASAtlas_ukb2_NG_2019/highBloodPressure_6150_4_logistic.EUR.sumstats.gz" files.in.GWAS["SmokeInit"]="~/lhou.compbio/data/GWAS/Alcohol_tobacco_NG_2019/SmokingInitiation.txt.hg38.sumstats.gz" files.in.GWAS["DrinkPerWeek"]="~/lhou.compbio/data/GWAS/Alcohol_tobacco_NG_2019/DrinksPerWeek.txt.hg38.sumstats.gz" files.in.GWAS["COVID19.HG"] = "~/lhou.compbio/data/GWAS/COVID19_hg/COVID19_HGI_ANA_C2_V2.sumstats.gz" # files.in.gwasSummary["FEV1"] = "~/lhou.compbio/data/GWAS/Lung.function_fromXushen/Shrine_30804560_FEV1_meta-analysis.parse.txt.gz" # files.in.gwasSummary["FVC"] = "~/lhou.compbio/data/GWAS/Lung.function_fromXushen/Shrine_30804560_FVC_meta-analysis.parse.txt.gz" # files.in.gwasSummary["PEF"] = "~/lhou.compbio/data/GWAS/Lung.function_fromXushen/Shrine_30804560_PEF_meta-analysis.parse.txt.gz" dirs.in.annot = c("a_annotations_bulkDP/limma_V1.4.1/", "a_annotations_gARE_emp.p.fdr.cutoff0.05/", "a_annotations_3actHM_AREGrp/") # dir.tmp.annot = "~/hptmp/eGTEx-H3K27ac/ldsc_a_2_haQTL_V1.2/" #annot_ovlpWithBaseline # if(!dir.exists(dir.tmp.annot)) # { # dir.create(dir.tmp.annot) # } # dir.tmp.annot.baseline = paste0(dir.tmp.annot, "baseline/") # if(!dir.exists(dir.tmp.annot.baseline)) # { # dir.create(dir.tmp.annot.baseline) # } dir.ou = paste0("a_2_heritPartition_byLDSC_AREGrp.bulkDP.gCRE_slctGWAS/") dir.create(dir.ou, showWarnings = F, recursive = T) #annotation of interest by regular expression for(d.i in dirs.in.annot) { annot.nms = levels(factor(sapply(dir(d.i, paste0("annot.gz"), full.names=F), #"haQTL.*\\.1.annot.gz" "diffPeak.*\\.1.annot.gz"" FUN=function(f.i) { return(gsub(".\\d+.annot.gz", "", f.i, perl=T)) }))) for(annot.nm in annot.nms)# #[1:8] [25:29] [9:16]#[17:24] { print(annot.nm) #LDSC --h2 # f.o.sh = paste0("a_2_LDSCPartit_", annot.nm, ".", TISSUE, ".sh") # write(sh.head, f.o.sh) # f.o.sh = paste0("a_2_LDSCPartit_", annot.nm, ".sh") cmds="" for(pheno in names(files.in.GWAS)) { #d.tmp.annot.nm = paste0(dir.tmp.annot, annot.nm, "/") f.o.res = paste0(dir.ou, pheno,"_", annot.nm) f.o.log = paste0(f.o.res, ".log") if(file.exists(f.o.log) && length(system(paste0("grep \"Analysis finished\" ", f.o.log), intern = T))==1 ) { next } cmd = paste0("ldsc.py", " --h2 ", files.in.GWAS[pheno], #" --ref-ld-chr ", dir.tmp.annot.baseline, "baselineLD.,", d.tmp.annot.nm, annot.nm, ".", " --ref-ld-chr ", file.in.baseline.pref, ",", d.i, annot.nm, ".", " --w-ld-chr ", file.in.weight.pref, " --overlap-annot", " --frqfile-chr ", file.in.freq.pref, " --out ", f.o.res ) cmds = paste(cmds, cmd, sep = "\n") } if(cmds!="") { write(sh.head, f.o.sh) write(cmds, f.o.sh, append = T) write("echo done!", f.o.sh, append = T) system(paste("qsub " , f.o.sh, sep="")) } } }