#' Clustering with the optimal parameters estimated by these tools #' #' @param data A dataframe, where columns are features and rows are data points #' @param clusters Number of clusters to be generated by this clustering #' @param algorithm The clustering algorithm to be used #' #' @return An object of class "optimalClustering" containing a dataframe with #' the memberships of the samples found in the input data, the optimal #' stability score and parameter used #' #' @export #' #' @examples #' optimalClustering(toy_genes, 2,"kmeans") #' #' @importFrom fpc speccCBI hclustCBI kmeansCBI optimalClustering <- function(data, clusters, algorithm) { if (algorithm == "spectral") { spectral.kernels <- c("rbfdot", "polydot", "vanilladot", "tanhdot", "laplacedot", "anovadot", "splinedot") optimal.stability <- 0 optimal.memberships <- seq(0,0,length.out=dim(data)[1]) for (par in spectral.kernels) { sc.boot <- fpc::clusterboot(data, B = 25, bootmethod = "boot", clustermethod = speccCBI, k = clusters, kernel = par, seed = 28588, showplots = FALSE, count=FALSE) if (mean(sc.boot$bootmean) > optimal.stability) { optimal.stability <- mean(sc.boot$bootmean) optimal.memberships <- sc.boot$partition optimal.parameter <- par } } } else if (algorithm == "hierarchical") { hierarchical.methods <- c("average", "ward.D", "ward.D2", "single", "complete", "mcquitty", "median", "centroid") optimal.stability <- 0 optimal.memberships <- seq(0,0,length.out=dim(data)[1]) for (par in hierarchical.methods) { sc.boot <- fpc::clusterboot(data, B = 25, bootmethod = "boot", clustermethod = hclustCBI, k = clusters, method = par, seed = 28588, showplots = FALSE, count=FALSE) if (mean(sc.boot$bootmean) > optimal.stability) { optimal.stability <- mean(sc.boot$bootmean) optimal.memberships <- sc.boot$partition optimal.parameter <- par } } } else { kmeans.kernels <- c("Hartigan-Wong", "Lloyd", "Forgy", "MacQueen") optimal.stability <- 0 optimal.memberships <- seq(0,0,length.out=dim(data)[1]) for (par in kmeans.kernels) { sc.boot <- fpc::clusterboot(data, B = 25, bootmethod = "boot", clustermethod = kmeansCBI, k = clusters, algorithm = par, seed = 28588, showplots = FALSE, count=FALSE) if (mean(sc.boot$bootmean) > optimal.stability) { optimal.stability <- mean(sc.boot$bootmean) optimal.memberships <- sc.boot$partition optimal.parameter <- par } } } memberships <- data.frame(id = rownames(data), memberships = optimal.memberships) optimalClustering <- function(optimal.memberships = memberships, optimal.stability.score = optimal.stability, optimal.parameter.used = optimal.parameter){ oc <- list(optimal.memberships = optimal.memberships, optimal.stability.score = optimal.stability.score, optimal.parameter.used = optimal.parameter.used) ## Set the name for the class class(oc) <- "optimalClustering" return(oc) } optimal.clustering <- optimalClustering() return(optimal.clustering) }