Rscripts/clustering/SAFE_2.1_Linux/SAFE_modified.R

# NOTE! This scripts has been modified from the one downloaded from
# https://yunliweb.its.unc.edu//safe/download.php, in order to set the number of
# cores to use for the clustering. All modifications are indicated in the code.
# 
# !/usr/bin/env Rscript
# 11/10/2017

# @title Single-cell Aggregated clustering From Ensemble (SAFE)
#
# @description SAFE (Single-cell Aggregated clustering From Ensemble): Cluster ensemble for single-cell RNA-seq data
# 
# @param cluster_results a J*N matrix with J individual clustering methods and N cells
# @param k_min defines the minimum number of clusters used for ensembel clustering. Default is 2.
# @param k_max defines the maximum number of clusters used for ensembel clustering. Default is 
# the maximum cluster number estimated by all the single solutions.
# @param MCLA a boolean parameter that defines whether to use MCLA algorithm for ensemble clustering. 
# Default is "TRUE".
# @param HGPA a boolean parameter that defines whether to use HGPA algorithm for ensemble clustering. 
# Default is "FALSE".
# @param CSPA a boolean parameter that defines whether to use CSPA algorithm for ensemble clustering. 
# Default is "FALSE".
# @param cspc_cell_max defines the maximum number of cells above which CSPA is not run, when \code{CSPA = TRUE}.
# @param SEED sets the seed of the random number generator. Setting the seed to a fixed value can produce 
# reproducible clustering results for MCLA and CSPA algorithms. 
#
# @return a list containing the overall optimal ensemble clustering and cluster number, 
# as well as the optimal resutls for each algorithm.
#
# @author Yuchen Yang <yyuchen@email.unc.edu>, Ruth Huh <rhuh@live.unc.edu>, 
# Houston Culpepper <hculpepp@live.unc.edu>, Yun Li <yunli@med.unc.edu>
# @citation Yuchen Yang, Ruth Huh, Houston Culpepper, Yun Li. SAFE (Single-cell Aggregated clustering From Ensemble): Cluster ensemble for single-cell RNA-seq data. 2017
SAFE <- function(cluster_results, k_min = 2, k_max = NULL, MCLA = TRUE, HGPA = FALSE, CSPA = FALSE, cspc_cell_max = NULL, SEED = 1){
  ### Example for Input data
  ### cluster_results = matrix(data = c(1,1,1,2,2,3,3,2,2,2,3,3,1,1,1,1,2,2,3,3,3),nrow = 3, byrow = T)
  
  ## Modification: Only set seed to SEED if not NULL, otherwise generate a random seed
  if (is.null(SEED)) {
    SEED <- round(1e6*runif(1))
  }
  set.seed(SEED)
  
  ### Transforming missing data into NA
  for (i in 1:nrow(cluster_results)){
    cluster_results[i,which(is.na(cluster_results[i,]))] <- 0
  }

  ###remapping cluster labels where needed
  for (r in 1:nrow(cluster_results)){
    labels <- cluster_results[r,]
    if (max(labels) != length(levels(as.factor(labels)))){
      labels_new <- cbind(labels, t(t(c(1:length(labels)))))
      labels_inorder <- labels_new[order(labels_new[,1]),]
      last <- NULL
      curind <- 0
      for (i in 1:nrow(labels_inorder)){
        if (last != labels_inorder[i,1] || is.null(last)){
          last <- labels_inorder[i,1]
          curind <- curind + 1
        }
        labels_inorder[i,1] <- curind
      }
      labels_inorder <- labels_inorder[order(labels_inorder[,2]),]
      labels_new <- t(labels_inorder[,1])
      cluster_results[r,] <- labels_new
    }
  }
  
  if(is.null(k_max)){
    k_max <- max(cluster_results)
  }
  

  ### Function hypergraph_generation performs transformation from clustering results to hypergraph
  # object contains clustering results generated by all the single-cell clustering methods. nrow equals to No. of methods, and ncol is No. of cells
  hypergraph_generation <- function(object){
    hypergraph <- matrix(data = 0, nrow = length(object), ncol = max(object))
    for (i in 1:length(hypergraph[1,])){
      hypergraph[,i] <- as.numeric(object == i)
    }
    return(hypergraph)
  }
  
  
  ### Define function ints
  ints <- function(s, tafter = 1000000000){
    tbefore <- sum(s)
    while (tafter/tbefore <= 0.5){
      tafter = tafter * 10
    }
    if (tbefore != 0){
      s = round(s * (tafter/tbefore))
    }
    #p <- 1 - sum(as.numeric(s>0))/(nrow(s)*ncol(s))
    return(s)
  }
  
  
  ### Function wgraph
  # object is the hypergraph generated before
  wgraph <- function(object, w = NULL, method = NULL, dataname = NULL) {
    library("bit64")
    options(scipen = 100)
    if (is.null(method)){
      method <- 0
    }
    if (is.null(dataname)){
      dataname <- "graph"
    }
    dataname <- paste(dataname, method, sep = "")
    
    if (method == 0 || method == 1){
      object <- object - diag(diag(object))
    }
    
    object <- ints(object)
    
    if (method == 1){
      w <- ints(w)
    }
    
    while (as.numeric(file.exists(dataname))){
      dataname <- paste(dataname, method, sep = "")
    }
    
    print(paste("wgraph: writing ", dataname, sep = ""))
    
    file_name = paste(dataname, sep = "")
    if (method == 0){
      object_sum <- sum(as.numeric(object > 0))/2
      if (object_sum < ceiling(object_sum)){
        x <- as.numeric(c(nrow(object),format(object_sum, scientific = T), 1))
      } else{
        x <- c(nrow(object), object_sum, 1)
      }
      x <- as.integer64(x)
      write.table(x, file = file_name, row.names = FALSE, col.names = FALSE, quote = FALSE, eol = " ")
      write.table("\n", file = file_name, row.names = FALSE, col.names = FALSE, quote = FALSE, eol = "", append = TRUE)
    } else if (method == 1){
      object_sum <- sum(as.numeric(object > 0))/2
      if (object_sum < ceiling(object_sum)){
        x <- as.numeric(c(nrow(object),format(object_sum, scientific = T), 11))
      } else{
        x <- c(nrow(object), object_sum, 11)
      }
      x <- as.integer64(x)
      write.table(x, file = file_name, row.names = FALSE, col.names = FALSE, quote = FALSE, eol = " ")
      write.table("\n", file = file_name, row.names = FALSE, col.names = FALSE, quote = FALSE, eol = "", append = TRUE)
    } else {
      validcolumns <- which(colSums(object) > 0)
      if (length(validcolumns) != ncol(object)){
        print ("wgraph: removing empty feature columns")
        object <- object[, validcolumns]
      }
      x <- c(ncol(object), nrow(object), 1)
      x <- as.integer64(x)
      write.table(x, file = file_name, row.names = FALSE, col.names = FALSE, quote = FALSE, eol = " ")
      write.table("\n", file = file_name, row.names = FALSE, col.names = FALSE, quote = FALSE, eol = "", append = TRUE)
    }
    
    if (method == 0 || method == 1){
      for (i in 1:nrow(object)){
        if (sum(object[i,]) == 0){
          write.table("\n", file = file_name, row.names = FALSE, col.names = FALSE, quote = FALSE, eol = "", append = TRUE)
        } else{
          edges <- which(object[i,] > 0)
          weights <- object[i,edges]
          if (method == 0){
            interlaced <- vector(mode = "numeric", length = 2 * length(edges))
            interlaced[seq(1, 2 * length(edges) -1, 2)] <- edges
            interlaced[seq(2, 2 * length(edges), 2)] <- weights
          } else {
            interlaced <- vector(mode = "numeric", length = 2 * length(edges) + 1)
            interlaced[1] <- w[i]
            interlaced[seq(2, 2 * length(edges), 2)] <- edges
            interlaced[seq(3, 2 * length(edges) + 1, 2)] <- weights
          }
          interlaced <- as.integer64(interlaced)
          write.table(interlaced, file = file_name, row.names = FALSE, col.names = FALSE, quote = FALSE, eol = " ", append = TRUE)
          write.table("\n", file = file_name, row.names = FALSE, col.names = FALSE, quote = FALSE, eol = "", append = TRUE)
        }
      }
    } else {
      print (paste('wgraph: ', nrow(object), ' vertices and ', ncol(object), ' non-zero hyperedges', sep = ""))
      for (i in 1:ncol(object)){
        if (sum(object[,i]) == 0){
          write.table("\n", file = file_name, row.names = FALSE, col.names = FALSE, quote = FALSE, eol = "", append = TRUE)
        } else {
          edges <- which(object[,i] > 0)
          if (method == 2){
            weight <- sum(object[,i])
          } else {
            weight <- w[i]
          }
          interlaced <- c(weight, edges)
          interlaced <- as.integer64(interlaced)
          write.table(interlaced, file = file_name, row.names = FALSE, col.names = FALSE, quote = FALSE, eol = " ", append = TRUE)
          write.table("\n", file = file_name, row.names = FALSE, col.names = FALSE, quote = FALSE, eol = "", append = TRUE)
        }
      }
    }
    return(dataname)
  }
  
  
  ### Function sgraph performs cluster ensembles using pmeits or shmetis.
  ### Be sure that both gpmetis and shmetis are located at the work directory and are executable.
  # k is the No. of clusters.
  # dataname is filename returned by function wgraph.
  # Moreover, this function requires to install package "stringr"
  sgraph <- function(k, dataname){
    library("stringr")

    if (is.null(dataname)){
      dataname <- "graph0"
    }
    result_name <- paste(dataname, '.part.', k, sep = '')
    
    last_char <- as.numeric(substr(dataname, str_length(dataname), str_length(dataname)))
    
    if (is.null(last_char)){
      print ("sgraph: file dose not comply to name convention")
      last_char <- 0
    }
    
    command <- NULL
    if (last_char < 2){
      command <- paste('./gpmetis -seed=', SEED, ' ', dataname, ' ', k, sep = '')
      employed_method <- "gpmetis"
      print (paste('cluster ensembles: ', command, sep = ''))
    } else {
      ubfactor <- 5
      command <- paste('./shmetis ', dataname, ' ', k, ' ', ubfactor, sep = '')
      employed_method <- "shmetis"
      print (paste('cluster ensembles: ', command, sep = ''))
    }
    
    system(command)
    
    fid <- read.table(result_name)
    if (is.null(fid)){
      print ('sgraph: partitioning not successful due to external error')
      fid <- read.table(dataname)
      if (is.null(fid)){
        print ('sgraph: graph file is not accessible')
      } else{
        if (last_char >= 2){
          junk <- read.table("result_name", head = FALSE)
        }
        labels <- matrix(1, ncol = length(fid[,1]))
        if (is.null(labels)){
          print ('sgraph: empty label vetor - suspecting file system full')
        }
      }
    } else{
      print (paste('sgraph: ', employed_method, ' completely loads ', result_name, sep = ''))
      labels <- t(as.matrix(fid + 1))
    }
    return(labels)
  }
  
  
  ### Function evolmutual is for transforming mutual information between the lables of ensemble cluster and clustering.
  # labels are the ensemble cluster labels generated by function sgraph.
  # object is the cluster labels of individual single-cell clustering method.
  evalmutual <- function(labels, object){
    remappedecl <- matrix(0, ncol = length(object))
    A = matrix(0, nrow = max(object), ncol = 2 + max(labels))
    for (i in 1:max(object)){
      activepoints <- which(object == i)
      composition <- hist(labels[activepoints], breaks = c(0:max(labels)), plot = FALSE)
      j <- which(composition$counts == max(composition$counts))
      j <- j[1]
      A[i,] <- c(j, i, composition$counts)
    }
    A <- A[order(A[,1]),]
    A <- A[,c(-1, -2)]
    
    pdf <- A/sum(A)
    pdf_trans <- pdf
    pdf_trans[which(pdf_trans == 0)] = 1
    px <- rowSums(pdf)
    px_trans <- px
    px_trans[which(px_trans == 0)] = 1
    py <- colSums(pdf)
    py_trans <- py
    py_trans[which(py_trans == 0)] = 1
    
    if (length(px) > 1){
      hxbk <- -sum(px * log2(px_trans))
    } else {
      hxbk <- 0
    }
    if (length(py) > 1){
      hybg <- -sum(py * log2(py_trans))
    } else {
      hybg <- 0
    }
    
    if (length(px) * length(py) == 1 || hxbk ==0 || hybg == 0){
      p <- 0
    } else {
      p <- sum(pdf * log2(pdf_trans/(px%*%t(py))))/sqrt(hxbk * hybg)
    }
    return(p)
  }
  
  
  ### Function check can validate and fix problems in cluster labels
  # labels are the ensemble cluster lables generated by function sgraph.
  check <- function(labels){
    labels_new <- cbind(t(labels), t(t(c(1:length(labels)))))
    labels_inorder <- labels_new[order(labels_new[,1]),]
    
    last <- NULL
    curind <- 0
    for (i in 1:nrow(labels_inorder)){
      if (last != labels_inorder[i,1] || is.null(last)){
        last <- labels_inorder[i,1]
        curind <- curind + 1
      }
      labels_inorder[i,1] <- curind
    }
    labels_inorder <- labels_inorder[order(labels_inorder[,2]),]
    labels_new <- t(labels_inorder[,1])
    
    if (max(labels) != max(labels_new)){
      print ('Result checking: not a dense integer mapping')
      labels <- NULL
      labels <- labels_new
      print (paste('Remapped to 1 to ', max(labels), ' clusters', sep = ''))
    }
    return(labels)
  }
  
  
  ### Function ANMI_calculation can calculate ANMI for each cluster ensemble result.
  # object is the clustering labels of individual single-cell clustering methods
  # labels are the enseble cluster labels generated by function sgraph.
  ANMI_calculation <- function(object, labels){
    
    m <- 0
    total_inds <- 0
    q <- numeric()
    object <- as.matrix(object)
    
    for (i in 1:nrow(object)){
      inds <- which(is.finite(object[i,]))
      q[i] <- evalmutual(labels = check(labels = t(as.matrix(labels[inds]))), object = check(labels = t(as.matrix(object[i,inds]))))
      m <- q[i] * length(inds) + m
      total_inds <- total_inds + length(inds)
    }
    
    m <- m/total_inds
    return(m)
  }
  
  
  ### Function hgpa performs cluster ensemble by HyperGraph-Paritioning Algorithm (HGPA)
  # object is the clustering results generated by all single-cell clustering methods
  # k is the maximum No. of clusters
  hgpa <- function(object, k = k){
    timestart<-Sys.time()

    hypergraph <- NULL
    
    print('Cluster ensembles using HGPA')
    # Generating hypergraph
    for (i in 1:nrow(object)) {
      hypergraph_each <- hypergraph_generation(object[i,])
      hypergraph = cbind(hypergraph, hypergraph_each)
    }
    
    filename <- wgraph(object = hypergraph, method = 2)
    clusters <- sgraph(k = k, dataname = filename)
    row.names(clusters) <- NULL
    
    system('rm graph2')
    rm_file <- paste('rm graph2.part.', k, sep = '')
    system(rm_file)
    
    timeend<-Sys.time()
    runningtime<-timeend-timestart
    print(timestart)
    print(timeend)
    hgpa_time = paste('Runing time of HGPA is ', runningtime, sep = "")
    print(hgpa_time)
    
    return(clusters)
  }
  
  
  ### Function mcla performs cluster ensemble by Meta-CLustering Algorithm (MCLA)
  # object is the clustering results generated by all single-cell clustering methods
  # k is the maximum No. of clusters
  mcla <- function(object, k = k){
    timestart<-Sys.time()
    
    hypergraph <- NULL
    
    print('Cluster ensembles using MCLA')
    for (i in 1:nrow(object)) {
      hypergraph_each <- hypergraph_generation(object[i,])
      hypergraph = cbind(hypergraph, hypergraph_each)
    }
    
    ### Function simxjac computes extended Jaccard similarity between row objects in matrices a and b
    # matrixA is the hypergraph generated by function hypergraph_generation
    simxjac <- function(matrixA){
      A_matrix_square <- matrixA %*% t(matrixA)
      A_square <- as.matrix(rowSums(matrixA ^2))
      sim_jac <- A_matrix_square/((A_square %*% matrix(1,ncol = nrow(matrixA))) + matrix(1, nrow = nrow(matrixA)) %*% t(A_square) - A_matrix_square)
      return(sim_jac)
    }
    
    sim_jac <- simxjac(matrixA = t(hypergraph))
    
    
    ### Function cmetis provides cluster labels 1 to k from edge weighted value balanced graph partitioning
    # object is the similarity matrix sim_jac generated by function simjac
    # k is the maximum No. of clusters
    cmetis <- function(sim_matrix, w, k){
      filename <- wgraph(object = sim_matrix, w = w, method = 1)
      labels <- sgraph(k = k, dataname = filename)
      return(labels)
    }
    
    labels <- cmetis(sim_matrix = sim_jac, w = rowSums(t(hypergraph)), k = k)
    
    hypergraph_cum <- NULL
    for (i in 1:max(labels)){
      matched_clusters <- which(labels == i)
      if (length(matched_clusters) > 1){
        hypergraph_cum <- rbind(hypergraph_cum, rowSums(hypergraph[,matched_clusters])/ncol(hypergraph[,matched_clusters]))
      } else if (length(matched_clusters) == 1){
        hypergraph_cum <- rbind(hypergraph_cum, hypergraph[,matched_clusters])
      }
    }

    
    
    ### Function hypergraphTOcluster generate the final cluster ensemble results using normalized hypergraph
    hypergraphTOcluster <- function(hypergraph_cum){
      randbreakties <- 1
      allzeroclumns <-NULL
      allzeroclumns <- which(colSums(hypergraph_cum) == 0)
      
      if (length(allzeroclumns) != 0){
        print (paste('hypergraphTOcluster: ', length(allzeroclumns), ' objects (', round(100*length(allzeroclumns)/ncol(hypergraph)), '%) with all zero associations', sep = ''))
        hypergraph_cum[,allzeroclumns] <- matrix(runif(nrow(hypergraph_cum) * length(allzeroclumns)), nrow = nrow(hypergraph_cum), ncol = length(allzeroclumns))
      }
      hypergraph_cum <- hypergraph_cum + matrix(runif(nrow(hypergraph_cum) * ncol(hypergraph_cum)), nrow = nrow(hypergraph_cum), ncol = ncol(hypergraph_cum))/10000
      
      ### Normalization
      library("Matrix")
      hypergraph_cum <- t(as(diag(1/rowSums(abs(t(hypergraph_cum)))), "sparseMatrix") %*% t(hypergraph_cum))
      
      m <- apply(hypergraph_cum, 2, max)
      clusters <- matrix(0, ncol = ncol(hypergraph_cum))
      max_postp <- matrix(0, ncol = ncol(hypergraph_cum))
      for (i in nrow(hypergraph_cum):1){
        max_position <- which(hypergraph_cum[i,] == m)
        if (length(max_position) != 0){
          clusters[max_position] <- i * matrix(1, ncol = length(max_position))
          max_postp[max_position] <- hypergraph_cum[i, max_position]
        }
      }
      print(paste('Delivering ', max(clusters), ' clusters', sep = ""))
      print(paste('Average posterior probablity is ', mean(max_postp), sep = ""))
      return(clusters)
    }
    
    clusters <- hypergraphTOcluster(hypergraph_cum = hypergraph_cum)
    
    system('rm graph1')
    rm_file <- paste('rm graph1.part.', k, sep = '')
    system(rm_file)
    
    timeend<-Sys.time()
    runningtime<-timeend-timestart
    print(timestart)
    print(timeend)
    mcla_time = paste('Runing time of MCLA is ', runningtime, sep = "")
    print(mcla_time)
    
    return(clusters)
  }
  
  
  cspa <- function(object, k = k){
    timestart<-Sys.time()

    hypergraph <- NULL
    
    print('Cluster ensembles using CSPA')
    for (i in 1:nrow(object)) {
      hypergraph_each <- hypergraph_generation(object[i,])
      hypergraph = cbind(hypergraph, hypergraph_each)
    }
    
    ### Function checks
    checks <- function(object){
      if (nrow(object) == ncol(object)){
        if (sum(diag(object) != 1) > 0){
          print ('self-similarity object[i,i] is not always 1')
          for (i in 1:nrow(object)){
            object[i,i] <- 1;
          }
          print ('diagonal is set to 1')
        }
      }
      return(object)
    }
      
    similarity_matrix <- (hypergraph %*% t(hypergraph))/nrow(object)
    
    filename <- wgraph(object = checks(similarity_matrix), method = 0)
    clusters <- sgraph(k = k, dataname = filename)
    row.names(clusters) <- NULL
    
    system('rm graph0')
    rm_file <- paste('rm graph0.part.', k, sep = '')
    system(rm_file)
    
    timeend<-Sys.time()
    runningtime<-timeend-timestart
    print(timestart)
    print(timeend)
    cspa_time = paste('Runing time of CSPA is ', runningtime, sep = "")
    print(cspa_time)
    
    return(clusters)
  }

  cluster_ensembles <- list()
  
  if (HGPA == TRUE){
    cluster_ensembles$HGPA_ANMI <- 0
    hgpa_ANMI_each <- numeric()
    hgpa_k_each <- numeric()
    
    for (K in k_min:k_max){
      hgpa_all_result <- matrix(0, nrow = 10, ncol = ncol(cluster_results))
      hgpa_all_ANMI = numeric()
      for (i in 1:10){
        hgpa_all_result[i,] <- check(labels = hgpa(object = cluster_results, k = K))
        hgpa_all_ANMI[i] <- ANMI_calculation(object = cluster_results, labels = hgpa_all_result[i,])
      }
      
      hgpa_result <- hgpa_all_result[order(hgpa_all_ANMI)[6],]
      hgpa_ANMI <- hgpa_all_ANMI[order(hgpa_all_ANMI)[6]]
      #hgpa_result <- check(labels = hgpa(object = cluster_results, k = K))
      #hgpa_ANMI <- ANMI_calculation(object = cluster_results, labels = hgpa_result)
      
      hgpa_ANMI_each[K-1] <- hgpa_ANMI
      hgpa_k_each[K-1] <- max(hgpa_result)
      
      if (hgpa_ANMI >= cluster_ensembles$HGPA_ANMI){
        cluster_ensembles$HGPA <- c(hgpa_result)
        cluster_ensembles$HGPA_optimal_k <- max(hgpa_result)
        cluster_ensembles$HGPA_ANMI <- hgpa_ANMI
      }
    }
  }
  
  if (MCLA == TRUE){
    cluster_ensembles$MCLA_ANMI <- 0
    mcla_ANMI_each <- numeric()
    mcla_k_each <- numeric()
    
    for (K in k_min:k_max){
      mcla_result <- check(mcla(object = cluster_results, k = K))
      mcla_ANMI <- ANMI_calculation(object = cluster_results, labels = mcla_result)
      
      mcla_ANMI_each[K-1] <- mcla_ANMI
      mcla_k_each[K-1] <- max(mcla_result)
      
      if (mcla_ANMI >= cluster_ensembles$MCLA_ANMI){
        cluster_ensembles$MCLA <- c(mcla_result)
        cluster_ensembles$MCLA_optimal_k <- max(mcla_result)
        cluster_ensembles$MCLA_ANMI <- mcla_ANMI
      }
    }
  }

  if (CSPA == TRUE & ((is.null(cspc_cell_max)) || 
      ((!is.null(cspc_cell_max)) & (ncol(cluster_results) <= cspc_cell_max)))){
    cluster_ensembles$CSPA_ANMI <- 0
    cspa_ANMI_each <- numeric()
    cspa_k_each <- numeric()
      
    for (K in k_min:k_max){
      cspa_result <- check(cspa(object = cluster_results, k = K))
      cspa_ANMI <- ANMI_calculation(object = cluster_results, labels = cspa_result)
        
      cspa_ANMI_each[K-1] <- cspa_ANMI
      cspa_k_each[K-1] <- max(cspa_result)
        
      if (cspa_ANMI >= cluster_ensembles$CSPA_ANMI){
        cluster_ensembles$CSPA <- c(cspa_result)
        cluster_ensembles$CSPA_optimal_k <- max(cspa_result)
        cluster_ensembles$CSPA_ANMI <- cspa_ANMI
      }
    }
  }
  
  for (K in k_min:k_max){
    if (HGPA == TRUE){
      print(paste('HGPA partitioning at K = ', K, ': ', hgpa_k_each[K-1], ' clusters at ANMI = ', hgpa_ANMI_each[K-1], sep = ""))
    }
  }
  for (K in k_min:k_max){
    if (MCLA == TRUE){
      print(paste('MCLA partitioning at K = ', K, ': ', mcla_k_each[K-1], ' clusters at ANMI = ', mcla_ANMI_each[K-1], sep = ""))
    }
  }
  for (K in k_min:k_max){
    if (CSPA == TRUE & ((is.null(cspc_cell_max)) || 
                        ((!is.null(cspc_cell_max)) & (ncol(cluster_results) <= cspc_cell_max)))){
      print(paste('CSPA partitioning at K = ', K, ': ', cspa_k_each[K-1], ' clusters at ANMI = ', cspa_ANMI_each[K-1], sep = ""))
    }
  }
  
  if ((HGPA == TRUE) + (MCLA == TRUE) + (CSPA == TRUE) > 1){
    labels_all <- logical()
    ANMI <- logical()
    if (HGPA == TRUE){
      labels_all = rbind(labels_all, cluster_ensembles$HGPA)
      ANMI = cbind(ANMI, cluster_ensembles$HGPA_ANMI)
    }
    if (MCLA == TRUE){
      labels_all = rbind(labels_all, cluster_ensembles$MCLA)
      ANMI = cbind(ANMI, cluster_ensembles$MCLA_ANMI)
    }
    if (CSPA == TRUE){
      labels_all = rbind(labels_all, cluster_ensembles$CSPA)
      ANMI = cbind(ANMI, cluster_ensembles$CSPA_ANMI)
    }
    
    cluster_ensembles$optimal_k <- max(labels_all[which(ANMI == max(ANMI))[1],])
    cluster_ensembles$optimal_clustering <- labels_all[which(ANMI == max(ANMI))[1],]
    cluster_ensembles$Summary = paste('Optimal number of clusters is ', cluster_ensembles$optimal_k, ' with ANMI = ', max(ANMI), sep = "")
    print(cluster_ensembles$Summary)
  } else {
    if (HGPA == TRUE){
      cluster_ensembles$optimal_clustering = cluster_ensembles$HGPA
      cluster_ensembles$optimal_k = cluster_ensembles$HGPA_optimal_k
      
      cluster_ensembles$Summary = paste('Optimal number of clusters is ', cluster_ensembles$optimal_k, ' with ANMI = ', cluster_ensembles$HGPA_ANMI, sep = "")
      print(cluster_ensembles$Summary)
    }
    if (MCLA == TRUE){
      cluster_ensembles$optimal_clustering = cluster_ensembles$MCLA
      cluster_ensembles$optimal_k = cluster_ensembles$MCLA_optimal_k
      
      cluster_ensembles$Summary = paste('Optimal number of clusters is ', cluster_ensembles$optimal_k, ' with ANMI = ', cluster_ensembles$MCLA_ANMI, sep = "")
      print(cluster_ensembles$Summary)
    }
    if (CSPA == TRUE){
      cluster_ensembles$optimal_clustering = cluster_ensembles$CSPA
      cluster_ensembles$optimal_k = cluster_ensembles$CSPA_optimal_k
      
      cluster_ensembles$Summary = paste('Optimal number of clusters is ', cluster_ensembles$optimal_k, ' with ANMI = ', cluster_ensembles$CSPA_ANMI, sep = "")
      print(cluster_ensembles$Summary)
    }
  }
  
  return(cluster_ensembles)
}