R/optimal_clustering.R
ca930686
 #' Clustering with the optimal parameters estimated by these tools
 #'
 #' @param data A dataframe, where columns are features and rows are data points
 #' @param clusters Number of clusters to be generated by this clustering
 #' @param algorithm The clustering algorithm to be used
 #'
65905451
 #' @return An object of class "optimalClustering" containing a dataframe with
 #' the memberships of the samples found in the input data, the optimal
 #' stability score and parameter used
ca930686
 #'
 #' @export
 #'
 #' @examples
 #' optimalClustering(toy_genes, 2,"kmeans")
f028ae4f
 #'
 #' @importFrom fpc speccCBI hclustCBI kmeansCBI
ca930686
 
 optimalClustering <- function(data, clusters, algorithm) {
 
   if (algorithm == "spectral") {
 
     spectral.kernels <- c("rbfdot", "polydot", "vanilladot", "tanhdot",
                           "laplacedot", "anovadot", "splinedot")
 
     optimal.stability <- 0
     optimal.memberships <- seq(0,0,length.out=dim(data)[1])
 
     for (par in spectral.kernels) {
 
f028ae4f
       sc.boot <- fpc::clusterboot(data,
ca930686
                              B = 25,
                              bootmethod = "boot",
                              clustermethod = speccCBI,
                              k = clusters,
                              kernel = par,
                              seed = 28588,
                              showplots = FALSE,
                              count=FALSE)
 
       if (mean(sc.boot$bootmean) > optimal.stability) {
         optimal.stability <- mean(sc.boot$bootmean)
         optimal.memberships <- sc.boot$partition
         optimal.parameter <- par
       }
     }
 
   } else if (algorithm == "hierarchical") {
 
     hierarchical.methods <- c("average", "ward.D", "ward.D2", "single",
                            "complete", "mcquitty", "median", "centroid")
 
     optimal.stability <- 0
     optimal.memberships <- seq(0,0,length.out=dim(data)[1])
 
     for (par in hierarchical.methods) {
 
f028ae4f
       sc.boot <- fpc::clusterboot(data,
ca930686
                              B = 25,
                              bootmethod = "boot",
                              clustermethod = hclustCBI,
                              k = clusters,
                              method = par,
                              seed = 28588,
                              showplots = FALSE,
                              count=FALSE)
 
       if (mean(sc.boot$bootmean) > optimal.stability) {
         optimal.stability <- mean(sc.boot$bootmean)
         optimal.memberships <- sc.boot$partition
         optimal.parameter <- par
       }
     }
   } else {
 
     kmeans.kernels <- c("Hartigan-Wong", "Lloyd", "Forgy",
                         "MacQueen")
 
     optimal.stability <- 0
     optimal.memberships <- seq(0,0,length.out=dim(data)[1])
 
     for (par in kmeans.kernels) {
 
f028ae4f
       sc.boot <- fpc::clusterboot(data,
ca930686
                              B = 25,
                              bootmethod = "boot",
                              clustermethod = kmeansCBI,
                              k = clusters,
                              algorithm = par,
                              seed = 28588,
                              showplots = FALSE,
                              count=FALSE)
 
       if (mean(sc.boot$bootmean) > optimal.stability) {
         optimal.stability <- mean(sc.boot$bootmean)
         optimal.memberships <- sc.boot$partition
         optimal.parameter <- par
       }
     }
   }
 
53b258a9
   memberships <- data.frame(id = rownames(data),
                             memberships = optimal.memberships)
ca930686
 
65905451
   optimalClustering <-
     function(optimal.memberships = memberships,
              optimal.stability.score = optimal.stability,
              optimal.parameter.used  = optimal.parameter){
 
       oc <- list(optimal.memberships = optimal.memberships,
                  optimal.stability.score = optimal.stability.score,
                  optimal.parameter.used  = optimal.parameter.used)
 
       ## Set the name for the class
       class(oc) <- "optimalClustering"
 
       return(oc)
     }
 
   optimal.clustering <- optimalClustering()
 
   return(optimal.clustering)
 }