#V1.1 #include time difference between sample taken and lab test taken #compare resuls from different marks #including same cell type across different individuals #and same individual across different cell types #split into control and case library("RColorBrewer") GRP2REFs.SLCT=c("CD4T", "CD8T", "BCell", "NKCell", "Monocyte", "Neutrophil", "Lymphocyte") GRP2COL = brewer.pal(n = length(GRP2REFs.SLCT), name = "Set2") names(GRP2COL) = GRP2REFs.SLCT indivNum.Bin = 50 file.in.labTest.csv = "~/lhou.compbio/data/Mayo_Bipolar/labs_w_dT_DawnOfTimesToSample_forSharing.csv" file.in.meta = "~/lhou.compbio/data/Mayo_Bipolar/Batch_Feb.2019/broad_wgs_metadata.csv" file.in.meta2 = "~/lhou.compbio/data/Mayo_Bipolar/Bipolar_ptDemographics.csv" files.in.fractionEst.RData = c() files.in.fractionEst.RData["H3K27ac_nnPois"] = "./b_real_estFract_V1.4.3_nnPoisR_peakNorm_colinearity_6cellType/H3K27ac/estFraction.results.nnPoisR.FiltQ0.01.RData" files.in.fractionEst.RData["H3K4me1_nnPois"] = "./b_real_estFract_V1.4.3_nnPoisR_peakNorm_colinearity_6cellType/H3K4me1/estFraction.results.nnPoisR.FiltQ0.01.RData" files.in.fractionEst.RData["H3K36me3_nnPois"] = "./b_real_estFract_V1.4.3_nnPoisR_peakNorm_colinearity_6cellType/H3K36me3/estFraction.results.nnPoisR.FiltQ0.1.RData" files.in.fractionEst.RData["H3K27ac_nnPois_noIntcpt"] = "./b_real_estFract_V1.4.4_nnPoisR_noIntcpt_peakNorm_6cellType/H3K27ac/estFraction.results.nnPoisR.FiltQ0.01.RData" files.in.fractionEst.RData["H3K4me1_nnPois_noIntcpt"] = "./b_real_estFract_V1.4.4_nnPoisR_noIntcpt_peakNorm_6cellType/H3K4me1/estFraction.results.nnPoisR.FiltQ0.01.RData" files.in.fractionEst.RData["H3K36me3_nnPois_noIntcpt"] = "./b_real_estFract_V1.4.4_nnPoisR_noIntcpt_peakNorm_6cellType/H3K36me3/estFraction.results.nnPoisR.FiltQ0.1.RData" files.in.fractionEst.RData["H3K27ac_nnPois_noIntcpt_autoFeature"] = "./b_real_estFract_V1.4.4.1_nnPoisR_noIntcpt_peakNorm_6cellType_autoSig/H3K27ac/estFraction.results.nnPoisR.FiltQ0.01.RData" files.in.fractionEst.RData["H3K4me1_nnPois_noIntcpt_autoFeature"] = "./b_real_estFract_V1.4.4.1_nnPoisR_noIntcpt_peakNorm_6cellType_autoSig/H3K4me1/estFraction.results.nnPoisR.FiltQ0.01.RData" files.in.fractionEst.RData["H3K36me3_nnPois_noIntcpt_autoFeature"] = "./b_real_estFract_V1.4.4.1_nnPoisR_noIntcpt_peakNorm_6cellType_autoSig/H3K36me3/estFraction.results.nnPoisR.FiltQ0.1.RData" files.in.fractionEst.RData["H3K27ac_bNMF_R.A10"] = "./b_real_estFract_V1.5.1.3_colinearitySigPeaks_peakNorm_6CellType/H3K27ac_R.A10/fractEst.FiltQ0.01.RData" files.in.fractionEst.RData["H3K4me1_bNMF_R.A10"] = "./b_real_estFract_V1.5.1.3_colinearitySigPeaks_peakNorm_6CellType/H3K4me1_R.A10/fractEst.FiltQ0.01.RData" files.in.fractionEst.RData["H3K36me3_bNMF_R.A10"] = "./b_real_estFract_V1.5.1.3_colinearitySigPeaks_peakNorm_6CellType/H3K36me3_R.A10/fractEst.FiltQ0.1.RData" files.in.fractionEst.RData["H3K27ac_bNMF_R.A100"] = "./b_real_estFract_V1.5.1.3_colinearitySigPeaks_peakNorm_6CellType/H3K27ac_R.A100/fractEst.FiltQ0.01.RData" files.in.fractionEst.RData["H3K4me1_bNMF_R.A100"] = "./b_real_estFract_V1.5.1.3_colinearitySigPeaks_peakNorm_6CellType/H3K4me1_R.A100/fractEst.FiltQ0.01.RData" files.in.fractionEst.RData["H3K36me3_bNMF_R.A100"] = "./b_real_estFract_V1.5.1.3_colinearitySigPeaks_peakNorm_6CellType/H3K36me3_R.A100/fractEst.FiltQ0.1.RData" files.in.fractionEst.RData["H3K27ac_bNMF_R.A1000"] = "./b_real_estFract_V1.5.1.3_colinearitySigPeaks_peakNorm_6CellType/H3K27ac_R.A1000/fractEst.FiltQ0.01.RData" files.in.fractionEst.RData["H3K4me1_bNMF_R.A1000"] = "./b_real_estFract_V1.5.1.3_colinearitySigPeaks_peakNorm_6CellType/H3K4me1_R.A1000/fractEst.FiltQ0.01.RData" files.in.fractionEst.RData["H3K36me3_bNMF_R.A1000"] = "./b_real_estFract_V1.5.1.3_colinearitySigPeaks_peakNorm_6CellType/H3K36me3_R.A1000/fractEst.FiltQ0.1.RData" # files.in.fractionEst.RData["H3K27ac_bNMF"] = "b_real_estFract_V1.5.1_bNMF/H3K27ac/fractEst.RData" # files.in.fractionEst.RData["H3K4me1_bNMF"] = "b_real_estFract_V1.5.1_bNMF/H3K4me1/fractEst.RData" # files.in.fractionEst.RData["H3K27ac"] = "archive/a_2_estFractReal_V1.4.1_nnPoisR/H3K27ac/estFraction.results.nnPoisR.FiltQ0.01.RData" # files.in.fractionEst.RData["H3K4me1"] = "archive/a_2_estFractReal_V1.4.1_nnPoisR/H3K4me1/estFraction.results.nnPoisR.FiltQ0.01.RData" # files.in.fractionEst.RData["H3K36me3"] = "archive/a_2_estFractReal_V1.4.1_nnPoisR/H3K36me3/estFraction.results.nnPoisR.FiltQ0.01.RData" # files.in.fractionEst.RData["H3K4me1_NeutrophFromBP"] = "archive/a_2_estFractReal_V1.4.1_nnPoisR/H3K4me1_NeutrFromBP/estFraction.results.nnPoisR.FiltQ0.01.RData" # files.in.fractionEst.RData["H3K27ac_depthNormWithBP"] = "archive/a_2_estFractReal_V1.4.1_nnPoisR/H3K27ac_depthNormWithBP/estFraction.results.nnPoisR.FiltQ0.01.RData" # # file.in.fractionEst.CDseq.RData = "b_real_estFract_CDseq/H3K27ac.CDSeqTestRun.mcmc1k.df1.gsz1k.bn20.quasi.unsupervised.RData" dir.ou="c_cmpCellFractsWithEHR_6cellType_V1.1_deltaTime/" dir.create(dir.ou, showWarnings = F, recursive = T) file.ou.unNorm.pdf=paste0(dir.ou, "cmpCellFract.unNorm.pdf") file.ou.norm.pdf=paste0(dir.ou, "cmpCellFract.norm.pdf") file.ou.unNorm.dTime.pdf=paste0(dir.ou, "cmpCellFract.unNorm.dTime.pdf") file.ou.norm.dTime.pdf=paste0(dir.ou, "cmpCellFract.norm.dTime.pdf") # meta = read.table(file.in.meta, sep=",", header=T, row.names = 1) dis2samps = split(paste0("id_", rownames(meta)), meta[,"group"]) buf = read.table(file.in.meta2, sep=",", header = T, row.names = 1) samp.age = buf[,"age_atsamp"] names(samp.age) = paste0("id_", rownames(buf)) # cellCounts.info = read.table(file.in.labTest.csv, sep=",", header=T, row.names = NULL, stringsAsFactors = F, comment.char = "", quote="\"") cellCounts.inds = paste0("id_", levels(factor(cellCounts.info[,1]))) cellCounts.celltypeAndInd = matrix(NA, ncol=6, nrow=length(cellCounts.inds)) rownames(cellCounts.celltypeAndInd) = cellCounts.inds colnames(cellCounts.celltypeAndInd) =c("Lymphocyte", "Lymphocyte_dTime", "Monocyte", "Monocyte_dTime", "Neutrophil", "Neutrophil_dTime") # cellCounts.lymph = cellCounts.info[cellCounts.info$TestDesc=="Lymphocytes..", "Resultn"] names(cellCounts.lymph) = paste0("id_", cellCounts.info[cellCounts.info$TestDesc=="Lymphocytes..", 1]) cellCounts.lymph.dTime = cellCounts.info[cellCounts.info$TestDesc=="Lymphocytes..", "diff_labdate_sampdt_days"] names(cellCounts.lymph.dTime) = paste0("id_", cellCounts.info[cellCounts.info$TestDesc=="Lymphocytes..", 1]) cellCounts.celltypeAndInd[names(cellCounts.lymph), "Lymphocyte"]=cellCounts.lymph cellCounts.celltypeAndInd[names(cellCounts.lymph.dTime), "Lymphocyte_dTime"]=cellCounts.lymph.dTime # cellCounts.Mono = cellCounts.info[cellCounts.info$TestDesc=="Monocytes..", "Resultn"] names(cellCounts.Mono) = paste0("id_", cellCounts.info[cellCounts.info$TestDesc=="Monocytes..", 1]) cellCounts.Mono.dTime = cellCounts.info[cellCounts.info$TestDesc=="Monocytes..", "diff_labdate_sampdt_days"] names(cellCounts.Mono.dTime) = paste0("id_", cellCounts.info[cellCounts.info$TestDesc=="Monocytes..", 1]) cellCounts.celltypeAndInd[names(cellCounts.Mono), "Monocyte"]=cellCounts.Mono cellCounts.celltypeAndInd[names(cellCounts.Mono.dTime), "Monocyte_dTime"]=cellCounts.Mono.dTime # cellCounts.Neutr = cellCounts.info[cellCounts.info$TestDesc=="Neutrophils...", "Resultn"] names(cellCounts.Neutr) = paste0("id_", cellCounts.info[cellCounts.info$TestDesc=="Neutrophils...", 1]) cellCounts.Neutr.dTime = cellCounts.info[cellCounts.info$TestDesc=="Neutrophils...", "diff_labdate_sampdt_days"] names(cellCounts.Neutr.dTime) = paste0("id_", cellCounts.info[cellCounts.info$TestDesc=="Neutrophils...", 1]) cellCounts.celltypeAndInd[names(cellCounts.Neutr), "Neutrophil"]=cellCounts.Neutr cellCounts.celltypeAndInd[names(cellCounts.Neutr.dTime), "Neutrophil_dTime"]=cellCounts.Neutr.dTime cellCounts.celltypeAndInd.filt= cellCounts.celltypeAndInd[apply(cellCounts.celltypeAndInd, 1, FUN=function(x){all(!is.na(x))}), ] cellCounts.celltypeAndInd.filt.mean= t(apply(cellCounts.celltypeAndInd.filt[,c("Lymphocyte", "Monocyte", "Neutrophil")], 1, FUN=function(x){x/sum(x)})) if(all(apply(cellCounts.celltypeAndInd.filt[,c("Lymphocyte_dTime", "Monocyte_dTime", "Neutrophil_dTime")], 1, FUN=function(x){all(x==x[1])}))) { print("same time for different cell types for each individual") } # ind2cellFrac.unNorm.mean = lapply(files.in.fractionEst.RData, FUN=function(f.i.RData) { load(f.i.RData) Grp.slct.props.unNorm.mean = sapply(colnames(Grp.slct.props.sampling.unNorm[[1]]), FUN=function(ref) { ref.props= sapply(Grp.slct.props.sampling.unNorm, FUN=function(x) x[,ref]) return(apply(ref.props, 1, mean)) }) #colnames(Grp.slct.props.unNorm.mean) = REF2CELLTYPE[colnames(Grp.slct.props.unNorm.mean)] return(Grp.slct.props.unNorm.mean) }) ind2cellFrac.mean = lapply(files.in.fractionEst.RData, FUN=function(f.i.RData) { load(f.i.RData) Grp.slct.props.mean = sapply(colnames(Grp.slct.props.sampling[[1]]), FUN=function(ref) { ref.props= sapply(Grp.slct.props.sampling, FUN=function(x) x[,ref]) return(apply(ref.props, 1, mean)) }) #colnames(Grp.slct.props.mean) = REF2CELLTYPE[colnames(Grp.slct.props.mean)] return(Grp.slct.props.mean) }) # ###################################################### # #compare with CDseq results # load(file.in.fractionEst.CDseq.RData) # # ind2cellFrac.unNorm.mean=c(ind2cellFrac.unNorm.mean,CDseq=list(t(result_histone_RR$estProp))) # colnames(ind2cellFrac.unNorm.mean[["CDseq"]]) = REF2CELLTYPE[colnames(ind2cellFrac.unNorm.mean[["CDseq"]])] # # ind2cellFrac.mean=c(ind2cellFrac.mean,CDseq=list(t(result_histone_RR$estProp))) # colnames(ind2cellFrac.mean[["CDseq"]]) = REF2CELLTYPE[colnames(ind2cellFrac.mean[["CDseq"]])] # ##################################################### # pdf(file.ou.unNorm.pdf, width=20, height=12) # layout(matrix(1:18, ncol=6)) # # for(i in 1:(length(ind2cellFrac.unNorm.mean))) # { # inds.ovlp = intersect(rownames(ind2cellFrac.unNorm.mean[[i]]), # rownames(cellCounts.celltypeAndInd.filt)) # case.ovlp = intersect(dis2samps[["case"]], inds.ovlp) # ctrl.ovlp = intersect(dis2samps[["control"]], inds.ovlp) # # for(cellType in colnames(cellCounts.celltypeAndInd.filt)) # { # if(cellType %in% colnames(ind2cellFrac.unNorm.mean[[i]])) # { # fractEst = ind2cellFrac.unNorm.mean[[i]][inds.ovlp, cellType] # }else # { # fractEst = apply(ind2cellFrac.unNorm.mean[[i]][inds.ovlp, c("CD4T", "CD8T", "BCell", "NKCell")], 1, sum) # } # # pcc = cor(fractEst, # cellCounts.celltypeAndInd.filt[inds.ovlp, cellType] # ) # pcc.case = signif(cor(fractEst[case.ovlp], # cellCounts.celltypeAndInd.filt.mean[case.ovlp, cellType] # ),3) # pcc.ctrl = signif(cor(fractEst[ctrl.ovlp], # cellCounts.celltypeAndInd.filt.mean[ctrl.ovlp, cellType] # ),3) # # plot(cellCounts.celltypeAndInd.filt[inds.ovlp, cellType], # fractEst, # xlab="lab test", # ylab=paste0("estimate from ", names(ind2cellFrac.mean)[i]), # main=paste0("faction estimated for ", cellType, "\nPCC",signif(pcc, 3)), # col= c("blue", "red")[(inds.ovlp %in% case.ovlp) +1], # pch=c(19, 4)[(inds.ovlp %in% case.ovlp) +1]) # legend("topleft", # legend = paste0(c("contrl PCC: ", "case PCC: "), c(pcc.ctrl, pcc.case)) , # col = c("blue", "red"), # pch=c(19, 4) # ) # #abline(a=0, b=1, lty=2, lwd=2) # } # # } # # dev.off() pdf(file.ou.norm.pdf, width=15, height=15) layout(matrix(1:9, ncol=3)) par(mar=c(4,7,5,5)) for(i in 1:(length(ind2cellFrac.mean))) { inds.ovlp = intersect(rownames(ind2cellFrac.mean[[i]]), rownames(cellCounts.celltypeAndInd.filt)) case.ovlp = intersect(dis2samps[["case"]], inds.ovlp) ctrl.ovlp = intersect(dis2samps[["control"]], inds.ovlp) for(cellType in c("Lymphocyte", "Monocyte", "Neutrophil")) { if(cellType %in% colnames(ind2cellFrac.mean[[i]])) { fractEst = ind2cellFrac.mean[[i]][inds.ovlp, cellType] }else { fractEst = apply(ind2cellFrac.mean[[i]][inds.ovlp, c("CD4T", "CD8T", "BCell", "NKCell")], 1, sum) } pcc = cor(fractEst, cellCounts.celltypeAndInd.filt.mean[inds.ovlp, cellType] ) pcc.case = signif(cor(fractEst[case.ovlp], cellCounts.celltypeAndInd.filt.mean[case.ovlp, cellType] ),3) pcc.ctrl = signif(cor(fractEst[ctrl.ovlp], cellCounts.celltypeAndInd.filt.mean[ctrl.ovlp, cellType] ),3) plot(cellCounts.celltypeAndInd.filt.mean[inds.ovlp, cellType], fractEst, xlab="lab test", ylab=paste0("estimate from ", names(ind2cellFrac.mean)[i]), main=paste0("faction estimated for ", cellType, "\nPCC",signif(pcc, 3)), col= c("blue", "red")[(inds.ovlp %in% case.ovlp) +1], pch=c(19, 4)[(inds.ovlp %in% case.ovlp) +1]) legend("topleft", legend = paste0(c("contrl PCC: ", "case PCC: "), c(pcc.ctrl, pcc.case)) , col = c("blue", "red"), pch=c(19, 4) ) abline(lm(fractEst[case.ovlp]~cellCounts.celltypeAndInd.filt.mean[case.ovlp, cellType]), lty=2, lwd=2, col="red") abline(lm(fractEst[ctrl.ovlp]~cellCounts.celltypeAndInd.filt.mean[ctrl.ovlp, cellType]), lty=2, lwd=2, col="blue") } } dev.off() #sort according to time delta pdf(file.ou.norm.dTime.pdf, width=15, height=20) plot(samp.age[rownames(cellCounts.celltypeAndInd.filt)], cellCounts.celltypeAndInd.filt[,"Lymphocyte_dTime"], xlab="age", ylab="deltaTime") layout(matrix(1:15, ncol=3)) par(mar=c(4,7,5,5)) inds.sortedByDtime = rownames(cellCounts.celltypeAndInd.filt)[order(cellCounts.celltypeAndInd.filt[,"Lymphocyte_dTime"], decreasing = T)] cellCounts.celltypeAndInd.filt.totalCounts = apply(cellCounts.celltypeAndInd.filt[,c("Lymphocyte", "Monocyte", "Neutrophil")], 1, sum) cellCounts.celltypeAndInd.filt.normalized = round(sweep(cellCounts.celltypeAndInd.filt[,c("Lymphocyte", "Monocyte", "Neutrophil")], 1, mean(cellCounts.celltypeAndInd.filt.totalCounts)/cellCounts.celltypeAndInd.filt.totalCounts*10, "*")) for(i in 1:(length(ind2cellFrac.mean))) { inds.ovlp = intersect(inds.sortedByDtime, rownames(ind2cellFrac.mean[[i]])) inds.ovlp.sortedBins = lapply(1:(length(inds.ovlp)-indivNum.Bin+1), FUN=function(i) { inds.ovlp[i:(i+indivNum.Bin-1)] }) bin.dTime.median = sapply(inds.ovlp.sortedBins, FUN=function(inds) { median(cellCounts.celltypeAndInd.filt[inds,"Lymphocyte_dTime"]) }) fractEsts = sapply(c("Lymphocyte", "Monocyte", "Neutrophil"), FUN=function(cellType) { if(cellType %in% colnames(ind2cellFrac.mean[[i]])) { return(ind2cellFrac.mean[[i]][inds.ovlp, cellType]) }else { return(apply(ind2cellFrac.mean[[i]][inds.ovlp, c("CD4T", "CD8T", "BCell", "NKCell")], 1, sum)) } }) colnames(fractEsts) = c("Lymphocyte", "Monocyte", "Neutrophil") rownames(fractEsts) = inds.ovlp for(cellType in c("Lymphocyte", "Monocyte", "Neutrophil")) { pccs = sapply(inds.ovlp.sortedBins, FUN=function(inds) { cor(fractEsts[inds, cellType], cellCounts.celltypeAndInd.filt.mean[inds, cellType]) }) plot(bin.dTime.median, pccs, xlab="median delta time (days) for each individuals bin", ylab=paste0("correlation between lab test counts and\n", names(ind2cellFrac.mean)[i]), main=paste0("fraction estimated for ", cellType)) # legend("topleft", # legend = paste0(c("contrl PCC: ", "case PCC: "), c(pcc.ctrl, pcc.case)) , # col = c("blue", "red"), # pch=c(19, 4) # ) # abline(lm(fractEst[case.ovlp]~cellCounts.celltypeAndInd.filt.mean[case.ovlp, cellType]), lty=2, lwd=2, col="red") # abline(lm(fractEst[ctrl.ovlp]~cellCounts.celltypeAndInd.filt.mean[ctrl.ovlp, cellType]), lty=2, lwd=2, col="blue") } #likelihood based on multinomial distribution probs = sapply(inds.ovlp, FUN=function(ind) { prob = fractEsts[ind, c("Lymphocyte", "Monocyte", "Neutrophil")] counts = cellCounts.celltypeAndInd.filt.normalized[ind,c("Lymphocyte", "Monocyte", "Neutrophil")] dmultinom(x=counts, prob=prob, log = T) }) plot(cellCounts.celltypeAndInd.filt[inds.ovlp,"Lymphocyte_dTime"], probs, xlab="delta time (days) for each individuals", ylab=paste0("log probability of cell counts based on \n", names(ind2cellFrac.mean)[i]), main=paste0("fraction estimated for ", cellType)) probs.bins = sapply(inds.ovlp.sortedBins, FUN=function(inds) { mean(probs[inds]) }) plot(bin.dTime.median, probs.bins, xlab="median delta time (days) for each individuals bin", ylab=paste0("mean log probability of cell counts based on \n", names(ind2cellFrac.mean)[i]), main=paste0("fraction estimated for ", cellType)) } dev.off()