Bioconductor Code: omada

Browse code

Fixing some dep bugs

Sokratis Kariotis authored on 13/06/2022 06:26:26
Showing 17 changed files

DESCRIPTION index ddf471a..fb98dd4 100644
NAMESPACE index 01c66a6..e404cb3 100644
R/.Rhistory index 0000000..e69de29
R/cluster_voting.R index 5722fb3..d904d70 100644
R/clustering_method_selection.R index 7d0f14f..cb75ca3 100644
R/feasibility_analysis_based_on_data.R index 00ca367..83924f7 100644
R/feasibilty_analysis.R index 565ca55..6c70d73 100644
R/feature_selection.R index f48d5f5..28ba238 100644
R/gene_singatures.R index 77c1a70..226c682 100644
R/optimal_clustering.R index 624e3a8..2280dfe 100644
R/partition_agreement.R index 4280d80..77c610b 100644
man/clusterVoting.Rd index 71bb1f1..3c01d1b 100644
man/clusteringMethodSelection.Rd index 27221dd..e96710b 100644
man/feasibilityAnalysis.Rd index 8b14ede..cf4e7c7 100644
man/feasibilityAnalysisDataBased.Rd index 3141d4d..62e8af9 100644
man/geneSignatures.Rd index e322c5e..e2e46a7 100644
man/optimalClustering.Rd index 8150c8c..2396977 100644

History View file @ f028ae4

@@ -32,7 +32,8 @@ Depends:
                          clusterCrit (>= 1.2.8),
                          clValid (>= 0.7),
                          glmnet (>= 4.1.3),
                     -    dplyr(>= 1.0.7)
                     +    dplyr(>= 1.0.7),
                     +    stats (>= 4.1.2)
                      Suggests:
                          rmarkdown,
                          knitr,

NAMESPACE

History View file @ f028ae4

@@ -76,3 +76,15 @@ export(plot_feature_selection)
                      export(plot_partition_agreement)
                      export(plot_signature_feature)
                      export(plot_vote_frequencies)
                     +import(ggplot2)
                     +importFrom(clValid,clusters)
                     +importFrom(diceR,prepare_data)
                     +importFrom(dplyr,"%>%")
                     +importFrom(dplyr,across)
                     +importFrom(dplyr,filter)
                     +importFrom(dplyr,left_join)
                     +importFrom(fpc,hclustCBI)
                     +importFrom(fpc,kmeansCBI)
                     +importFrom(fpc,speccCBI)
                     +importFrom(pdfCluster,adj.rand.index)
                     +importFrom(stats,cutree)

R/.Rhistory

History View file @ f028ae4

new file mode 100644

R/cluster_voting.R

History View file @ f028ae4

@@ -7,9 +7,10 @@
                      #' @param algorithm The clustering algorithm to use for the multiple clustering
                      #' runs to be measured
                      #'
                     -#' @return An object of class "clusterVoting" containing a matrix with metric scores for every k and
                     -#' internal index, cluster memberships for every k, a dataframe with the k votes
                     -#'  for every index, k vote frequencies and the frequency barplot of the k votes
                     +#' @return An object of class "clusterVoting" containing a matrix with metric
                     +#'  scores for every k and internal index, cluster memberships for every k, a
                     +#'  dataframe with the k votes for every index, k vote frequencies and the
                     +#'  frequency barplot of the k votes
                      #'
                      #' @export
                      #'
@@ -17,6 +18,9 @@
                      #' clusterVoting(toy_genes, 4,14,"sc")
                      #' clusterVoting(toy_genes, 2,7,"hc")
                      #' clusterVoting(toy_genes, 2,4,"km")
                     +#'
                     +#' @importFrom diceR prepare_data
                     +#' @import ggplot2
                      clusterVoting <- function(data ,min.k ,max.k, algorithm) {
@@ -45,24 +49,25 @@ clusterVoting <- function(data ,min.k ,max.k, algorithm) {
                        for(current_k in min.k:max.k) {
                          if(algorithm == "sc") {
                     -      cl <- specc(data, centers=current_k, kernel = "rbfdot")
                     +      cl <- kernlab::specc(data, centers=current_k, kernel = "rbfdot")
                            cls <- [email protected]
                          } else if(algorithm == "hc") {
                     -      dist_mat <- dist(data, method = "euclidean")
                     -      cl <- hclust(dist_mat, method = "average")
                     -      cls <- cutree(cl, k = current_k)
                     +      dist_mat <- stats::dist(data, method = "euclidean")
                     +      cl <- stats::hclust(dist_mat, method = "average")
                     +      cls <- stats::cutree(cl, k = current_k)
                          } else if(algorithm == "km") {
                     -      cl <- kmeans(data, current_k, algorithm = "Hartigan-Wong")
                     +      cl <- stats::kmeans(data, current_k, algorithm = "Hartigan-Wong")
                            cls <- cl$cluster
+                         }
                     -    criteria <- intCriteria(data,cls,c("calinski_harabasz","dunn","pbm","tau",
                     -                                       "gamma", "c_index","davies_bouldin",
                     +    criteria <- clusterCrit::intCriteria(data,cls,c("calinski_harabasz","dunn",
                     +                                                    "pbm","tau", "gamma",
                     +                                                    "c_index","davies_bouldin",
                                                             "mcclain_rao","sd_dis", "ray_turi",
                                                             "g_plus","silhouette","s_dbw"))
                          con <- clValid::connectivity(clusters = cls, Data = data)
                     -    comp <- compactness(data, cls)
                     +    comp <- diceR::compactness(data, cls)
                          criteria <- c(criteria, connectivity=con, compactness=comp)
                          criteria <- array(as.numeric(unlist(criteria)))
                          scores[, counter] <- criteria
@@ -104,7 +109,8 @@ clusterVoting <- function(data ,min.k ,max.k, algorithm) {
                        colnames(ensemble.results) <- c("k", "Frequency")
                        ensemble.results$Frequency <- as.numeric(ensemble.results$Frequency)
                     -  ensemble.plot <- ggplot(ensemble.results, aes(k, Frequency, fill = k)) +
                     +  ensemble.plot <- ggplot2::ggplot(ensemble.results, aes(k, Frequency,
                     +                                                         fill = k)) +
                          geom_col() +
                          scale_fill_brewer(palette="Dark2")

R/clustering_method_selection.R

History View file @ f028ae4

@@ -16,6 +16,9 @@
                      #' number.of.comparisons = 4)
                      #' clusteringMethodSelection(toy_genes, method.upper.k = 2,
                      #' number.of.comparisons = 2)
                     +#'
                     +#' @import ggplot2
                     +#' @importFrom clValid clusters
                      clusteringMethodSelection <- function(data, method.upper.k = 5,
                                                            number.of.comparisons = 3) {
@@ -114,10 +117,10 @@ clusteringMethodSelection <- function(data, method.upper.k = 5,
                        s.mean <- mean(df.final$spectral)
                        k.mean <- mean(df.final$kmeans)
                     -  df.plot <- melt(df.final, id=c("clusters"))
                     +  df.plot <- reshape::melt(df.final, id=c("clusters"))
                        colnames(df.plot) <- c("clusters", "methods", "value")
                     -  agreements.plot <- ggplot(df.plot, aes(x = clusters, y = value)) +
                     +  agreements.plot <- ggplot2::ggplot(df.plot, aes(x = clusters, y = value)) +
                          geom_line(aes(color = methods)) +
                          geom_hline(aes(yintercept=h.mean), linetype="dashed") +
                          geom_hline(aes(yintercept=s.mean), linetype="dashed") +

R/feasibility_analysis_based_on_data.R

History View file @ f028ae4

@@ -15,6 +15,8 @@
                      #' @examples
                      #' feasibilityAnalysisDataBased(data = toy_genes, classes = 3)
                      #' feasibilityAnalysisDataBased(data = toy_genes, classes = 2)
                     +#'
                     +#' @importFrom fpc speccCBI
                      feasibilityAnalysisDataBased <- function(data, classes = 3) {
                        samples = dim(data)[1]
@@ -39,7 +41,7 @@ feasibilityAnalysisDataBased <- function(data, classes = 3) {
                        cl.index <- 1
                        feature.index <- 1
                        for (i in 1:features) {
                     -    temp <- rnorm(n = samples,
                     +    temp <- stats::rnorm(n = samples,
                                        mean = c(class.means),
                                        sd = class.sd)
                          dataset[, ncol(dataset) + 1] <- temp # Append temp column
@@ -68,7 +70,7 @@ feasibilityAnalysisDataBased <- function(data, classes = 3) {
+                       }
                        for (rep in c.min:c.max) {
                     -    sc.boot <- clusterboot(
                     +    sc.boot <- fpc::clusterboot(
                            stability.dataset,
                            B = 25,
                            bootmethod = "boot",

R/feasibilty_analysis.R

History View file @ f028ae4

@@ -16,6 +16,8 @@
                      #' @examples
                      #' feasibilityAnalysis(classes = 3, samples = 320, features = 400)
                      #' feasibilityAnalysis(classes = 4, samples = 400, features = 120)
                     +#'
                     +#' @importFrom fpc speccCBI
                      feasibilityAnalysis <- function(classes = 3, samples = 320, features = 400) {
@@ -29,7 +31,7 @@ feasibilityAnalysis <- function(classes = 3, samples = 320, features = 400) {
                        cl.index <- 1
                        feature.index <- 1
                        for(i in 1:features) {
                     -    temp <- rnorm(n = samples, mean = c(class.means), sd = class.sd)
                     +    temp <- stats::rnorm(n = samples, mean = c(class.means), sd = class.sd)
                          dataset[ , ncol(dataset) + 1] <- temp # Append temp column
                          colnames(dataset)[ncol(dataset)] <- paste0("feature_", feature.index)
                          feature.index <- feature.index + 1
@@ -56,7 +58,7 @@ feasibilityAnalysis <- function(classes = 3, samples = 320, features = 400) {
                        for(rep in c.min:c.max) {
                     -    sc.boot <- clusterboot(stability.dataset,
                     +    sc.boot <- fpc::clusterboot(stability.dataset,
                                                 B = 25,
                                                 bootmethod = "boot",
                                                 clustermethod = speccCBI,

R/feature_selection.R

History View file @ f028ae4

@@ -16,6 +16,10 @@
                      #' featureSelection(toy_genes, min.k = 3, max.k = 9, step = 3)
                      #' featureSelection(toy_genes, min.k = 2, max.k = 4, step = 4)
                      #'
                     +#' @importFrom fpc speccCBI
                     +#' @import ggplot2
+                    +
+                    +
                      featureSelection <- function(data, min.k = 2, max.k = 4, step = 5) {
                        print("Selecting feature subset...")
@@ -27,7 +31,7 @@ featureSelection <- function(data, min.k = 2, max.k = 4, step = 5) {
                        averages.of.all.k <- list()
                        # Sorted features based on variance across data points
                     -  features.variance <- data.frame(apply(data, 2, var))
                     +  features.variance <- data.frame(apply(data, 2, stats::var))
                        colnames(features.variance) <- "variance"
                        sorted.features.variance <-
                          features.variance[order(features.variance$variance,decreasing = TRUE), ,
@@ -47,7 +51,7 @@ featureSelection <- function(data, min.k = 2, max.k = 4, step = 5) {
                            cur <- sorted.features.variance$names[1:fs]
                     -      sc.boot <- clusterboot(data[,cur],
                     +      sc.boot <- fpc::clusterboot(data[,cur],
                                                   B = 25,
                                                   bootmethod = "boot",
                                                   clustermethod = speccCBI,
@@ -85,7 +89,7 @@ featureSelection <- function(data, min.k = 2, max.k = 4, step = 5) {
                        all.feature.k.stabilities$featureSet <-
                          as.integer(as.character(all.feature.k.stabilities$featureSet))
                     -  stabilities.plot <- ggplot(data = all.feature.k.stabilities,
                     +  stabilities.plot <- ggplot2::ggplot(data = all.feature.k.stabilities,
                                                   aes(x=featureSet, y=means)) +
                          geom_line(color='firebrick',group = 1, size = 0.5) +
                          geom_point(color='firebrick', group = 1) +

R/gene_singatures.R

History View file @ f028ae4

@@ -14,9 +14,15 @@
                      #'
                      #' @examples
                      #' geneSignatures(toy_genes, toy_gene_memberships)
                     +#'
                     +#' @import ggplot2
                     +#' @importFrom dplyr across filter %>% left_join
+                    +
                      geneSignatures <- function(data, memberships) {
                     +    # utils::globalVariables("where", add=FALSE)
+                    +
                          data <- as.data.frame(data)
                          rnames <- row.names(data)
                          data$id <- rnames
@@ -39,19 +45,19 @@ geneSignatures <- function(data, memberships) {
                          # Running cross-validation Lasso to find optimal lambda value
                          data.matrix <- as.matrix(data[,2:dim(data)[2]])
                     -    cv_model <- cv.glmnet(data.matrix(data.matrix), data$membership, family = "multinomial",
                     -                          alpha = 1)
                     +    cv_model <- glmnet::cv.glmnet(data.matrix(data.matrix), data$membership,
                     +                                  family = "multinomial", alpha = 1)
                          # Optimal lambda value (minimizing test MSE)
                          optimal_lambda <- cv_model$lambda.min
                          # Running optimal lasso model
                     -    optimal_lasso <- glmnet(data.matrix(data.matrix), data$membership,
                     +    optimal_lasso <- glmnet::glmnet(data.matrix(data.matrix), data$membership,
                                                  family = "multinomial",
                                                  alpha = 1, lambda = optimal_lambda)
                          # Extract coefficients for minimized test MSE)
                     -    Coefficients <- coef(optimal_lasso, s = "min")
                     +    Coefficients <- stats::coef(optimal_lasso, s = "min")
                          # Formatting coefficient dataframe per cluster
                          ns <- names(Coefficients)
@@ -78,9 +84,10 @@ geneSignatures <- function(data, memberships) {
                          # retain top 30%
                          coef.dataset <- coef.dataset[1:round(dim(coef.dataset)[1]*0.3, digits = 0),]
                     -    coef.data.melt <- melt(coef.dataset)
                     +    coef.data.melt <- reshape::melt(coef.dataset)
                     -    coef.30perc <- ggplot(data = coef.data.melt, aes(x = features, y = value,
                     +    coef.30perc <- ggplot2::ggplot(data = coef.data.melt,
                     +                                   aes(x = features, y = value,
                                                                           fill = variable)) +
                              geom_bar(stat = "identity") +
                              theme(axis.title.x=element_blank(),

R/optimal_clustering.R

History View file @ f028ae4

@@ -13,6 +13,8 @@
                      #' @examples
                      #' optimalClustering(toy_genes, 4,"spectral")
                      #' optimalClustering(toy_genes, 2,"kmeans")
                     +#'
                     +#' @importFrom fpc speccCBI hclustCBI kmeansCBI
                      optimalClustering <- function(data, clusters, algorithm) {
@@ -28,7 +30,7 @@ optimalClustering <- function(data, clusters, algorithm) {
                          for (par in spectral.kernels) {
                     -      sc.boot <- clusterboot(data,
                     +      sc.boot <- fpc::clusterboot(data,
                                                   B = 25,
                                                   bootmethod = "boot",
                                                   clustermethod = speccCBI,
@@ -55,7 +57,7 @@ optimalClustering <- function(data, clusters, algorithm) {
                          for (par in hierarchical.methods) {
                     -      sc.boot <- clusterboot(data,
                     +      sc.boot <- fpc::clusterboot(data,
                                                   B = 25,
                                                   bootmethod = "boot",
                                                   clustermethod = hclustCBI,
@@ -81,7 +83,7 @@ optimalClustering <- function(data, clusters, algorithm) {
                          for (par in kmeans.kernels) {
                     -      sc.boot <- clusterboot(data,
                     +      sc.boot <- fpc::clusterboot(data,
                                                   B = 25,
                                                   bootmethod = "boot",
                                                   clustermethod = kmeansCBI,

R/partition_agreement.R

History View file @ f028ae4

@@ -45,6 +45,9 @@
                      #' partitionAgreement(toy_genes, algorithm.1 = "spectral", measure.1 = "rbfdot",
                      #' algorithm.2 = "kmeans",measure.2 = "Lloyd", number.of.clusters = 5)
                      #' @export
                     +#'
                     +#' @importFrom stats cutree
                     +#' @importFrom pdfCluster adj.rand.index
                      partitionAgreement <- function(data, algorithm.1 = "hierarchical",
                                                     measure.1 = "canberra",
@@ -62,22 +65,22 @@ partitionAgreement <- function(data, algorithm.1 = "hierarchical",
                        for(i in 2:number.of.clusters) {
                          #Spectral clustering
                          if(algorithm.1 == "spectral") {
                     -      cl <- specc(dataset, centers=i, kernel = measure.1)
                     +      cl <- kernlab::specc(dataset, centers=i, kernel = measure.1)
                            temp <- data.frame([email protected])
                            cr1 <- cbind(cr1, temp)
+                         }
                          #Hierarchical
                          else if(algorithm.1 == "hierarchical") {
                     -      dist_mat <- dist(dataset, method = measure.1)
                     -      cl <- hclust(dist_mat, method = hier.agglo.algorithm.1)
                     +      dist_mat <- stats::dist(dataset, method = measure.1)
                     +      cl <- stats::hclust(dist_mat, method = hier.agglo.algorithm.1)
                            temp <- data.frame(cutree(cl, k = i))
                            cr1 <- cbind(cr1, temp)
+                         }
                          # #k-means
                          else if(algorithm.1 == "kmeans") {
                     -      cl <- kmeans(dataset, i, algorithm = measure.1)
                     +      cl <- stats::kmeans(dataset, i, algorithm = measure.1)
                            temp <- data.frame(cl$cluster)
                            cr1 <- cbind(cr1, temp)
+                         }
@@ -89,22 +92,22 @@ partitionAgreement <- function(data, algorithm.1 = "hierarchical",
                        for(i in 2:number.of.clusters) {
                          #Spectral clustering
                          if(algorithm.2 == "spectral") {
                     -      cl <- specc(dataset, centers=i, kernel = measure.2)
                     +      cl <- kernlab::specc(dataset, centers=i, kernel = measure.2)
                            temp <- data.frame([email protected])
                            cr2 <- cbind(cr2, temp)
+                         }
                          #Hierarchical
                          else if(algorithm.2 == "hierarchical") {
                     -      dist_mat <- dist(dataset, method = measure.2)
                     -      cl <- hclust(dist_mat, method = hier.agglo.algorithm.2)
                     +      dist_mat <- stats::dist(dataset, method = measure.2)
                     +      cl <- stats::hclust(dist_mat, method = hier.agglo.algorithm.2)
                            temp <- data.frame(cutree(cl, k = i))
                            cr2 <- cbind(cr2, temp)
+                         }
                          #k-means
                          else if(algorithm.2 == "kmeans") {
                     -      cl <- kmeans(dataset, i, algorithm = measure.2)
                     +      cl <- stats::kmeans(dataset, i, algorithm = measure.2)
                            temp <- data.frame(cl$cluster)
                            cr2 <- cbind(cr2, temp)
+                         }

man/clusterVoting.Rd

History View file @ f028ae4

@@ -18,9 +18,10 @@ clusterVoting(data, min.k, max.k, algorithm)
                      runs to be measured}
+                     }
                      \value{
                     -An object of class "clusterVoting" containing a matrix with metric scores for every k and
                     -internal index, cluster memberships for every k, a dataframe with the k votes
                     - for every index, k vote frequencies and the frequency barplot of the k votes
                     +An object of class "clusterVoting" containing a matrix with metric
                     + scores for every k and internal index, cluster memberships for every k, a
                     + dataframe with the k votes for every index, k vote frequencies and the
                     + frequency barplot of the k votes
+                     }
                      \description{
                      Estimating number of clusters through internal exhaustive ensemble majority
@@ -30,4 +31,5 @@ voting
                      clusterVoting(toy_genes, 4,14,"sc")
                      clusterVoting(toy_genes, 2,7,"hc")
                      clusterVoting(toy_genes, 2,4,"km")
+                    +
+                     }

man/clusteringMethodSelection.Rd

History View file @ f028ae4

@@ -27,4 +27,5 @@ clusteringMethodSelection(toy_genes, method.upper.k = 3,
                      number.of.comparisons = 4)
                      clusteringMethodSelection(toy_genes, method.upper.k = 2,
                      number.of.comparisons = 2)
+                    +
+                     }

man/feasibilityAnalysis.Rd

History View file @ f028ae4

@@ -28,4 +28,5 @@ clusters
                      \examples{
                      feasibilityAnalysis(classes = 3, samples = 320, features = 400)
                      feasibilityAnalysis(classes = 4, samples = 400, features = 120)
+                    +
+                     }

man/feasibilityAnalysisDataBased.Rd

History View file @ f028ae4

@@ -27,4 +27,5 @@ deviation
                      \examples{
                      feasibilityAnalysisDataBased(data = toy_genes, classes = 3)
                      feasibilityAnalysisDataBased(data = toy_genes, classes = 2)
+                    +
+                     }

man/geneSignatures.Rd

History View file @ f028ae4

...	...	@@ -23,4 +23,5 @@ Generating the feature/gene signature per cluster
23	23	}
24	24	\examples{
25	25	geneSignatures(toy_genes, toy_gene_memberships)
	26	+
26	27	}

man/optimalClustering.Rd

History View file @ f028ae4

@@ -24,4 +24,5 @@ Clustering with the optimal parameters estimated by these tools
                      \examples{
                      optimalClustering(toy_genes, 4,"spectral")
                      optimalClustering(toy_genes, 2,"kmeans")
+                    +
+                     }