#V1.1 #all sample cluster #try hcluster on the correlation matrix too #use PCs to cluster patient samples to understand their heterogeneity #first find K nearest neighbor and then cluster #and compare the membership of patients across different marks #use normalized mutual informaiton to R.LIBS.PATH = .libPaths() .libPaths(c("~/lhou.compbio/libs/R/x86_64-pc-linux-gnu-library/3.6/", R.LIBS.PATH)) # library(ggplot2) # library(ggrepel) library(MOFA) library(igraph) library(pheatmap) library(ggplot2) library(Rtsne) library(entropy) library(RColorBrewer) library(ComplexHeatmap) COR.CUTOFF=0.4 NEAREST_K=10 colors = brewer.pal(10, "Set3") #args = commandArgs(trailingOnly=TRUE) #TYPE="HiC.DPs" #file.in.factor.RData = paste0("b_factorAnalysis_across5HMs_MOFA/HiC.DPs/ind.MOFA.topPeak10000.varProp0.01.RData") file.in.factor.RData = paste0("b_factorAnalysis_across5HMs_MOFA_V1.2_pvalCutoff/HiC.DPs/ind.MOFA.DP-pval0.01.varProp0.005.RData")# file.in.meta.csv = "~/lhou.compbio/data/Mayo_Bipolar/Batch_Feb.2019/broad_wgs_metadata.csv" #dir.ou=paste0("b_factorAnalysis_acrossHMs_MOFA/", TYPE, "/") #dir.create(dir.ou, showWarnings = F, recursive = T) file.ou.RData = gsub(".RData", ".sampClu_V1.1.RData", file.in.factor.RData) file.ou.pdf = gsub(".RData", ".sampClu_V1.1.pdf", file.in.factor.RData) file.ou.tsv = gsub(".RData", ".sampClu_V1.1.tsv", file.in.factor.RData) # # file.ou.RData = paste0(dir.ou, "patientclu.RData") # file.ou.clus.pdf = paste0(dir.ou, "patientHeter.pdf") # file.ou.cluCmp.pdf = paste0(dir.ou, "patientHeter.cmpAcrossHMs.pdf") # # # meta = read.table(file.in.meta.csv, sep=",", header=T, row.names=1) rownames(meta)=paste("id", rownames(meta), sep="_") samps.case = rownames(meta)[meta$group=="case"] #cluster #from correlation to cluster # getClusterFromCors_kNN_Louvain=function(cors, cor.cutoff, nearest_K, weighted=T) { adj.matrix = cors adj.matrix[cors<=cor.cutoff] =0 diag(adj.matrix)=0 print(paste0("average #nearst neighbor: ", sum(adj.matrix!=0)/nrow(adj.matrix))) adj.matrix.isKNN = apply(adj.matrix, 2, FUN=function(x) { w=rep(1, length(x)) x.k = sort(x, decreasing = T)[nearest_K] w[x