# gtex_v8_cis_qtls_mashr: See the MashR preprint for a description of the method: https://www.biorxiv.org/content/early/2018/09/21/096552 # read in mashR output se = read.table('./gtex_v8_cis_qtls_mashr/stephenslab-gtexresults-961b969/output/standard.error.txt',head=T) mean.list = readRDS('./gtex_v8_cis_qtls_mashr/stephenslab-gtexresults-961b969/output/MatrixEQTLSumStats.Portable.Z.coved.K3.P3.lite.single.expanded.V1.posterior.rds') mean = mean.list$posterior.means lfsr = mean.list$lfsr all(rownames(se) == rownames(mean)) colnames(mean) = colnames(se) colnames(lfsr) = colnames(se) se = data.frame(se[,c('Brain_Frontal_Cortex_BA9','Heart_Left_Ventricle','Muscle_Skeletal','Lung')]) mean = data.frame(mean[,c('Brain_Frontal_Cortex_BA9','Heart_Left_Ventricle','Muscle_Skeletal','Lung')]) lfsr = data.frame(lfsr[,c('Brain_Frontal_Cortex_BA9','Heart_Left_Ventricle','Muscle_Skeletal','Lung')]) colnames(se) = c('Brain','Heart','Muscle','Lung') colnames(mean) = c('Brain','Heart','Muscle','Lung') colnames(lfsr) = c('Brain','Heart','Muscle','Lung') mean.df = mean tissues = c('Brain','Heart','Muscle','Lung') for(tis in tissues){ se.other = se[,(tis != colnames(se))] mean.other = mean[,(tis != colnames(mean))] tis.other = colnames(se.other) mean.other$max = apply(mean.other[,tis.other], 1, FUN = max) mean.other$min = apply(mean.other[,tis.other], 1, FUN = min) mean.other[,'If.strongest'] = 0 mean.other$If.strongest[mean[,tis] > mean.other$max & mean[,tis]>0] = 1 mean.other$If.strongest[mean[,tis] < mean.other$min & mean[,tis]<0] = -1 mean.other[,'If.eQTL'] = 0 mean.other$If.eQTL[lfsr[,tis]<0.05] = 1 mean.df[,paste0('If.eQTL.',tis)] = mean.other$If.eQTL mean.df[,paste0('If.strongest.',tis)] = mean.other$If.strongest for(tis2 in tis.other){ mean.other[,paste0('Z.vs_',tis2)] = (mean[,tis] - mean[,tis2])/(se[,tis]^2 + se[,tis2]^2)^(1/2) } z.df = mean.other[,paste0('Z.vs_',tis.other)] mean.df[,paste0('min.abs_z_diff.',tis)] = apply(abs(z.df), 1, FUN = min) } saveRDS(mean.df,'eQTL.sharing.mashR.rds') saveRDS(mean,'eQTL.mean.mashR.rds') saveRDS(se,'eQTL.se.mashR.rds') # define tissue specific eQTL for(tis in tissues){ mean.df.tis = mean.df[mean.df[,paste0('If.strongest.',tis)] != 0 & mean.df[,paste0('If.eQTL.',tis)] ==1,] # print(summary(mean.df.tis[,paste0('min.abs_z_diff.',tis)])) } # read in tissue-specific haQTL # read in mashR output tissues = c('Brain','Heart','Muscle','Lung') mean = read.table('./gtex_v8_cis_qtls_mashr/gtex_v8.eqtls.all_top.z_beta.txt.gz',head=T,sep='\t') lfsr = read.table('./gtex_v8_cis_qtls_mashr/gtex_v8.eqtls.all_top.z_lfsr.txt.gz',head=T,sep='\t') all(colnames(lfsr) == colnames(mean)) mean = data.frame(mean[,c('gene','variant','Brain_Frontal_Cortex_BA9','Heart_Left_Ventricle','Muscle_Skeletal','Lung')]) lfsr = data.frame(lfsr[,c('gene','variant','Brain_Frontal_Cortex_BA9','Heart_Left_Ventricle','Muscle_Skeletal','Lung')]) colnames(mean) = c('gene','variant','Brain','Heart','Muscle','Lung') colnames(lfsr) = c('gene','variant','Brain','Heart','Muscle','Lung') mean$pair = paste0(mean$gene,'_',mean$variant) lfsr$pair = paste0(lfsr$gene,'_',lfsr$variant) all(lfsr$pair == mean$pair) lfsr$min = apply(lfsr[,tissues], 1, FUN = min) mean = mean[lfsr$min < 0.05,] lfsr = lfsr[lfsr$min < 0.05,] p.list = list() #p.tis1 = list() #p.tis2 = list() p.list.norm1 = list() p.list.norm2 = list() library(ggplot2) k = 1 library(stringr) for(tis1 in tissues){ df = read.table(paste0('./haQTL_hg38.diff_cutoff.100k.re/haQTL.tissue.compare/',tis1,'.emp.p.cutoff0.005.haQTLPeak.cutoffs.hg38.integratePeak.leadSNP.compare2otherTissue.parsed.txt'),head=T) for(tis2 in tissues){ if(tis1 == tis2){next} print(tis1) print(tis2) p.tis = list() df.comp = df[df$Rep.Tiss==tis2,] # haQTL df.comp$if.shared = 0 df.comp$if.shared[df.comp$Rep.nominalP < 0.02 & df.comp$Rep.beta * df.comp$Discover.beta>0] = 1 mean.comp = mean[lfsr[,tis1] < 0.05,] # eQTL beta mean.comp$abs.diff.beta = abs(mean.comp[,tis1] - mean.comp[,tis2]) mean.comp$abs.diff.beta.norm.tis1 = abs(mean.comp$abs.diff.beta/mean.comp[,tis1]) mean.comp$abs.diff.beta.norm.tis2 = abs(mean.comp$abs.diff.beta/mean.comp[,tis2]) merge.df = merge(df.comp,mean.comp,by.x='Top.haQTL',by.y='variant') p.list[[k]] = ggplot(merge.df,aes(x=as.factor(if.shared),y=abs.diff.beta)) + geom_boxplot() + theme_classic() + ggtitle(paste0(tis1,' vs ',tis2,'\nN0=',nrow(merge.df[merge.df$if.shared==0,]),' N1=',nrow(merge.df[merge.df$if.shared==1,]))) p.tis[[1]] = ggplot(merge.df,aes(x=as.factor(if.shared),y=abs(merge.df[,tis1]))) + geom_boxplot() + theme_classic() + ylab('abs eQTL z in tis1')+ ggtitle(paste0(tis1,' vs ',tis2,'\nN0=',nrow(merge.df[merge.df$if.shared==0,]),' N1=',nrow(merge.df[merge.df$if.shared==1,]))) # print(p) p.tis[[2]] = ggplot(merge.df,aes(x=as.factor(if.shared),y=abs(merge.df[,tis2]))) + geom_boxplot() + theme_classic() + ylab('abs eQTL z in tis2')+ ggtitle(paste0(tis1,' vs ',tis2,'\nN0=',nrow(merge.df[merge.df$if.shared==0,]),' N1=',nrow(merge.df[merge.df$if.shared==1,]))) # print(p) p.list.norm1[[k]] = ggplot(merge.df,aes(x=as.factor(if.shared),y=abs.diff.beta.norm.tis1)) + geom_boxplot() + theme_classic() + ggtitle(paste0(tis1,' vs ',tis2,'\nN0=',nrow(merge.df[merge.df$if.shared==0,]),' N1=',nrow(merge.df[merge.df$if.shared==1,]))) p.list.norm2[[k]] = ggplot(merge.df,aes(x=as.factor(if.shared),y=abs.diff.beta.norm.tis2)) + geom_boxplot() + theme_classic() + ggtitle(paste0(tis1,' vs ',tis2,'\nN0=',nrow(merge.df[merge.df$if.shared==0,]),' N1=',nrow(merge.df[merge.df$if.shared==1,]))) k = k+1 do.call("grid.arrange", c(p.tis, nrow=1)) } } library(gridExtra) pdf('haQTL.sharing.vs.eQTL.beta_difference.pdf',width=10,height=16,useDingbats=F) do.call("grid.arrange", c(p.list, nrow=4)) do.call("grid.arrange", c(p.list.norm1, nrow=4)) do.call("grid.arrange", c(p.list.norm2, nrow=4)) #do.call("grid.arrange", c(p.tis1, nrow=4)) #do.call("grid.arrange", c(p.tis2, nrow=4)) dev.off() # read in mashR output tissues = c('Brain','Heart','Muscle','Lung') mean = read.table('./gtex_v8_cis_qtls_mashr/gtex_v8.eqtls.all_top.z_beta.txt.gz',head=T,sep='\t') lfsr = read.table('./gtex_v8_cis_qtls_mashr/gtex_v8.eqtls.all_top.z_lfsr.txt.gz',head=T,sep='\t') all(colnames(lfsr) == colnames(mean)) mean = data.frame(mean[,c('gene','variant','Brain_Frontal_Cortex_BA9','Heart_Left_Ventricle','Muscle_Skeletal','Lung')]) lfsr = data.frame(lfsr[,c('gene','variant','Brain_Frontal_Cortex_BA9','Heart_Left_Ventricle','Muscle_Skeletal','Lung')]) colnames(mean) = c('gene','variant','Brain','Heart','Muscle','Lung') colnames(lfsr) = c('gene','variant','Brain','Heart','Muscle','Lung') mean$pair = paste0(mean$gene,'_',mean$variant) lfsr$pair = paste0(lfsr$gene,'_',lfsr$variant) all(lfsr$pair == mean$pair) #lfsr$min = apply(lfsr[,tissues], 1, FUN = min) #mean = mean[lfsr$min < 0.05,] #lfsr = lfsr[lfsr$min < 0.05,] # define tissue specific eQTL library(plyr) p.list = list() p.list2 = list() k = 1 merge.df3 = data.frame() for(tis in tissues){ mean.tis = mean[!is.na(lfsr[,tis]) & lfsr[,tis] < 0.05,] # eQTL beta lfsr.tis = lfsr[!is.na(lfsr[,tis]) & lfsr[,tis] < 0.05,] merge.df = merge(mean.tis,lfsr.tis,by='pair',suffixes=c('.beta','.lfsr')) df = read.table(paste0('./haQTL_hg38.diff_cutoff.100k.re/',tis,'.emp.p.cutoff0.005.gARE.Types.txt'),head=T,sep='\t') for(tis2 in tissues){ if(tis2 == tis){next} print(paste0(tis,' ',tis2)) merge.df$If.eQTL_shared = 0 merge.df$If.eQTL_shared[ !is.na(merge.df[,paste0(tis2,'.lfsr')]) & merge.df[,paste0(tis2,'.lfsr')]<0.05 & merge.df[,paste0(tis,'.beta')] * merge.df[,paste0(tis2,'.beta')] >0] = 1 df.tis2 = df df.tis2$If.gARE_shared = 0 df.tis2$If.gARE_shared[grepl(tis2,df.tis2$gARE.sharing_tissue)] = 1 intersect.df = merge(merge.df,df.tis2,by.x='variant.beta',by.y='lead_haQTL') sum = data.frame(table(intersect.df$If.eQTL_shared,intersect.df$If.gARE_shared)) colnames(sum) = c('If.eQTL_shared','If.gARE_shared','Freq') sum = ddply(sum,'If.eQTL_shared',transform,perc=Freq/sum(Freq)*100) counts = table(intersect.df$If.gARE_shared) p.list[[k]] = ggplot(sum,aes(x=If.eQTL_shared,y=perc,fill=If.gARE_shared)) + theme_classic() + theme(aspect.ratio=1.5,legend.position="top") + geom_bar(stat="identity", colour="black") + guides(fill=guide_legend(reverse=TRUE)) + ggtitle(paste0(tis,' vs ',tis2,'\nN gARE0,1 = ',counts[['0']],' ',counts[['1']])) p.list2[[k]] = ggplot(sum,aes(x=If.eQTL_shared,y=perc,fill=If.gARE_shared)) + theme_classic() + theme(aspect.ratio=1.5,legend.position="top") + geom_bar(stat="identity", colour="black",width=0.75, position="dodge") + guides(fill=guide_legend(reverse=TRUE)) + ggtitle(paste0(tis,' vs ',tis2,'\nN gARE0,1 = ',counts[['0']],' ',counts[['1']])) k = k+1 } } pdf('Tissue_specific_eQTL.vs.tiss_specific_gARE.pair.pdf',useDingbats=T,width=12,height=10) do.call("grid.arrange", c(p.list, nrow=4)) do.call("grid.arrange", c(p.list2, nrow=4)) dev.off()