Browse code

Fixing some dep bugs

Sokratis Kariotis authored on 13/06/2022 06:26:26
Showing 17 changed files

... ...
@@ -32,7 +32,8 @@ Depends:
32 32
     clusterCrit (>= 1.2.8),
33 33
     clValid (>= 0.7),
34 34
     glmnet (>= 4.1.3),
35
-    dplyr(>= 1.0.7)
35
+    dplyr(>= 1.0.7),
36
+    stats (>= 4.1.2)
36 37
 Suggests:
37 38
     rmarkdown,
38 39
     knitr,
... ...
@@ -76,3 +76,15 @@ export(plot_feature_selection)
76 76
 export(plot_partition_agreement)
77 77
 export(plot_signature_feature)
78 78
 export(plot_vote_frequencies)
79
+import(ggplot2)
80
+importFrom(clValid,clusters)
81
+importFrom(diceR,prepare_data)
82
+importFrom(dplyr,"%>%")
83
+importFrom(dplyr,across)
84
+importFrom(dplyr,filter)
85
+importFrom(dplyr,left_join)
86
+importFrom(fpc,hclustCBI)
87
+importFrom(fpc,kmeansCBI)
88
+importFrom(fpc,speccCBI)
89
+importFrom(pdfCluster,adj.rand.index)
90
+importFrom(stats,cutree)
79 91
new file mode 100644
... ...
@@ -7,9 +7,10 @@
7 7
 #' @param algorithm The clustering algorithm to use for the multiple clustering
8 8
 #' runs to be measured
9 9
 #'
10
-#' @return An object of class "clusterVoting" containing a matrix with metric scores for every k and
11
-#' internal index, cluster memberships for every k, a dataframe with the k votes
12
-#'  for every index, k vote frequencies and the frequency barplot of the k votes
10
+#' @return An object of class "clusterVoting" containing a matrix with metric
11
+#'  scores for every k and internal index, cluster memberships for every k, a
12
+#'  dataframe with the k votes for every index, k vote frequencies and the
13
+#'  frequency barplot of the k votes
13 14
 #'
14 15
 #' @export
15 16
 #'
... ...
@@ -17,6 +18,9 @@
17 18
 #' clusterVoting(toy_genes, 4,14,"sc")
18 19
 #' clusterVoting(toy_genes, 2,7,"hc")
19 20
 #' clusterVoting(toy_genes, 2,4,"km")
21
+#'
22
+#' @importFrom diceR prepare_data
23
+#' @import ggplot2
20 24
 
21 25
 clusterVoting <- function(data ,min.k ,max.k, algorithm) {
22 26
 
... ...
@@ -45,24 +49,25 @@ clusterVoting <- function(data ,min.k ,max.k, algorithm) {
45 49
   for(current_k in min.k:max.k) {
46 50
 
47 51
     if(algorithm == "sc") {
48
-      cl <- specc(data, centers=current_k, kernel = "rbfdot")
52
+      cl <- kernlab::specc(data, centers=current_k, kernel = "rbfdot")
49 53
       cls <- [email protected]
50 54
     } else if(algorithm == "hc") {
51
-      dist_mat <- dist(data, method = "euclidean")
52
-      cl <- hclust(dist_mat, method = "average")
53
-      cls <- cutree(cl, k = current_k)
55
+      dist_mat <- stats::dist(data, method = "euclidean")
56
+      cl <- stats::hclust(dist_mat, method = "average")
57
+      cls <- stats::cutree(cl, k = current_k)
54 58
     } else if(algorithm == "km") {
55
-      cl <- kmeans(data, current_k, algorithm = "Hartigan-Wong")
59
+      cl <- stats::kmeans(data, current_k, algorithm = "Hartigan-Wong")
56 60
       cls <- cl$cluster
57 61
     }
58 62
 
59
-    criteria <- intCriteria(data,cls,c("calinski_harabasz","dunn","pbm","tau",
60
-                                       "gamma", "c_index","davies_bouldin",
63
+    criteria <- clusterCrit::intCriteria(data,cls,c("calinski_harabasz","dunn",
64
+                                                    "pbm","tau", "gamma",
65
+                                                    "c_index","davies_bouldin",
61 66
                                        "mcclain_rao","sd_dis", "ray_turi",
62 67
                                        "g_plus","silhouette","s_dbw"))
63 68
 
64 69
     con <- clValid::connectivity(clusters = cls, Data = data)
65
-    comp <- compactness(data, cls)
70
+    comp <- diceR::compactness(data, cls)
66 71
     criteria <- c(criteria, connectivity=con, compactness=comp)
67 72
     criteria <- array(as.numeric(unlist(criteria)))
68 73
     scores[, counter] <- criteria
... ...
@@ -104,7 +109,8 @@ clusterVoting <- function(data ,min.k ,max.k, algorithm) {
104 109
   colnames(ensemble.results) <- c("k", "Frequency")
105 110
   ensemble.results$Frequency <- as.numeric(ensemble.results$Frequency)
106 111
 
107
-  ensemble.plot <- ggplot(ensemble.results, aes(k, Frequency, fill = k)) +
112
+  ensemble.plot <- ggplot2::ggplot(ensemble.results, aes(k, Frequency,
113
+                                                         fill = k)) +
108 114
     geom_col() +
109 115
     scale_fill_brewer(palette="Dark2")
110 116
 
... ...
@@ -16,6 +16,9 @@
16 16
 #' number.of.comparisons = 4)
17 17
 #' clusteringMethodSelection(toy_genes, method.upper.k = 2,
18 18
 #' number.of.comparisons = 2)
19
+#'
20
+#' @import ggplot2
21
+#' @importFrom clValid clusters
19 22
 
20 23
 clusteringMethodSelection <- function(data, method.upper.k = 5,
21 24
                                       number.of.comparisons = 3) {
... ...
@@ -114,10 +117,10 @@ clusteringMethodSelection <- function(data, method.upper.k = 5,
114 117
   s.mean <- mean(df.final$spectral)
115 118
   k.mean <- mean(df.final$kmeans)
116 119
 
117
-  df.plot <- melt(df.final, id=c("clusters"))
120
+  df.plot <- reshape::melt(df.final, id=c("clusters"))
118 121
   colnames(df.plot) <- c("clusters", "methods", "value")
119 122
 
120
-  agreements.plot <- ggplot(df.plot, aes(x = clusters, y = value)) +
123
+  agreements.plot <- ggplot2::ggplot(df.plot, aes(x = clusters, y = value)) +
121 124
     geom_line(aes(color = methods)) +
122 125
     geom_hline(aes(yintercept=h.mean), linetype="dashed") +
123 126
     geom_hline(aes(yintercept=s.mean), linetype="dashed") +
... ...
@@ -15,6 +15,8 @@
15 15
 #' @examples
16 16
 #' feasibilityAnalysisDataBased(data = toy_genes, classes = 3)
17 17
 #' feasibilityAnalysisDataBased(data = toy_genes, classes = 2)
18
+#'
19
+#' @importFrom fpc speccCBI
18 20
 
19 21
 feasibilityAnalysisDataBased <- function(data, classes = 3) {
20 22
   samples = dim(data)[1]
... ...
@@ -39,7 +41,7 @@ feasibilityAnalysisDataBased <- function(data, classes = 3) {
39 41
   cl.index <- 1
40 42
   feature.index <- 1
41 43
   for (i in 1:features) {
42
-    temp <- rnorm(n = samples,
44
+    temp <- stats::rnorm(n = samples,
43 45
                   mean = c(class.means),
44 46
                   sd = class.sd)
45 47
     dataset[, ncol(dataset) + 1] <- temp # Append temp column
... ...
@@ -68,7 +70,7 @@ feasibilityAnalysisDataBased <- function(data, classes = 3) {
68 70
   }
69 71
 
70 72
   for (rep in c.min:c.max) {
71
-    sc.boot <- clusterboot(
73
+    sc.boot <- fpc::clusterboot(
72 74
       stability.dataset,
73 75
       B = 25,
74 76
       bootmethod = "boot",
... ...
@@ -16,6 +16,8 @@
16 16
 #' @examples
17 17
 #' feasibilityAnalysis(classes = 3, samples = 320, features = 400)
18 18
 #' feasibilityAnalysis(classes = 4, samples = 400, features = 120)
19
+#'
20
+#' @importFrom fpc speccCBI
19 21
 
20 22
 feasibilityAnalysis <- function(classes = 3, samples = 320, features = 400) {
21 23
 
... ...
@@ -29,7 +31,7 @@ feasibilityAnalysis <- function(classes = 3, samples = 320, features = 400) {
29 31
   cl.index <- 1
30 32
   feature.index <- 1
31 33
   for(i in 1:features) {
32
-    temp <- rnorm(n = samples, mean = c(class.means), sd = class.sd)
34
+    temp <- stats::rnorm(n = samples, mean = c(class.means), sd = class.sd)
33 35
     dataset[ , ncol(dataset) + 1] <- temp # Append temp column
34 36
     colnames(dataset)[ncol(dataset)] <- paste0("feature_", feature.index)
35 37
     feature.index <- feature.index + 1
... ...
@@ -56,7 +58,7 @@ feasibilityAnalysis <- function(classes = 3, samples = 320, features = 400) {
56 58
 
57 59
   for(rep in c.min:c.max) {
58 60
 
59
-    sc.boot <- clusterboot(stability.dataset,
61
+    sc.boot <- fpc::clusterboot(stability.dataset,
60 62
                            B = 25,
61 63
                            bootmethod = "boot",
62 64
                            clustermethod = speccCBI,
... ...
@@ -16,6 +16,10 @@
16 16
 #' featureSelection(toy_genes, min.k = 3, max.k = 9, step = 3)
17 17
 #' featureSelection(toy_genes, min.k = 2, max.k = 4, step = 4)
18 18
 #'
19
+#' @importFrom fpc speccCBI
20
+#' @import ggplot2
21
+
22
+
19 23
 featureSelection <- function(data, min.k = 2, max.k = 4, step = 5) {
20 24
 
21 25
   print("Selecting feature subset...")
... ...
@@ -27,7 +31,7 @@ featureSelection <- function(data, min.k = 2, max.k = 4, step = 5) {
27 31
   averages.of.all.k <- list()
28 32
 
29 33
   # Sorted features based on variance across data points
30
-  features.variance <- data.frame(apply(data, 2, var))
34
+  features.variance <- data.frame(apply(data, 2, stats::var))
31 35
   colnames(features.variance) <- "variance"
32 36
   sorted.features.variance <-
33 37
     features.variance[order(features.variance$variance,decreasing = TRUE), ,
... ...
@@ -47,7 +51,7 @@ featureSelection <- function(data, min.k = 2, max.k = 4, step = 5) {
47 51
 
48 52
       cur <- sorted.features.variance$names[1:fs]
49 53
 
50
-      sc.boot <- clusterboot(data[,cur],
54
+      sc.boot <- fpc::clusterboot(data[,cur],
51 55
                              B = 25,
52 56
                              bootmethod = "boot",
53 57
                              clustermethod = speccCBI,
... ...
@@ -85,7 +89,7 @@ featureSelection <- function(data, min.k = 2, max.k = 4, step = 5) {
85 89
   all.feature.k.stabilities$featureSet <-
86 90
     as.integer(as.character(all.feature.k.stabilities$featureSet))
87 91
 
88
-  stabilities.plot <- ggplot(data = all.feature.k.stabilities,
92
+  stabilities.plot <- ggplot2::ggplot(data = all.feature.k.stabilities,
89 93
                              aes(x=featureSet, y=means)) +
90 94
     geom_line(color='firebrick',group = 1, size = 0.5) +
91 95
     geom_point(color='firebrick', group = 1) +
... ...
@@ -14,9 +14,15 @@
14 14
 #'
15 15
 #' @examples
16 16
 #' geneSignatures(toy_genes, toy_gene_memberships)
17
+#'
18
+#' @import ggplot2
19
+#' @importFrom dplyr across filter %>% left_join
20
+
17 21
 
18 22
 geneSignatures <- function(data, memberships) {
19 23
 
24
+    # utils::globalVariables("where", add=FALSE)
25
+
20 26
     data <- as.data.frame(data)
21 27
     rnames <- row.names(data)
22 28
     data$id <- rnames
... ...
@@ -39,19 +45,19 @@ geneSignatures <- function(data, memberships) {
39 45
 
40 46
     # Running cross-validation Lasso to find optimal lambda value
41 47
     data.matrix <- as.matrix(data[,2:dim(data)[2]])
42
-    cv_model <- cv.glmnet(data.matrix(data.matrix), data$membership, family = "multinomial",
43
-                          alpha = 1)
48
+    cv_model <- glmnet::cv.glmnet(data.matrix(data.matrix), data$membership,
49
+                                  family = "multinomial", alpha = 1)
44 50
 
45 51
     # Optimal lambda value (minimizing test MSE)
46 52
     optimal_lambda <- cv_model$lambda.min
47 53
 
48 54
     # Running optimal lasso model
49
-    optimal_lasso <- glmnet(data.matrix(data.matrix), data$membership,
55
+    optimal_lasso <- glmnet::glmnet(data.matrix(data.matrix), data$membership,
50 56
                             family = "multinomial",
51 57
                             alpha = 1, lambda = optimal_lambda)
52 58
 
53 59
     # Extract coefficients for minimized test MSE)
54
-    Coefficients <- coef(optimal_lasso, s = "min")
60
+    Coefficients <- stats::coef(optimal_lasso, s = "min")
55 61
 
56 62
     # Formatting coefficient dataframe per cluster
57 63
     ns <- names(Coefficients)
... ...
@@ -78,9 +84,10 @@ geneSignatures <- function(data, memberships) {
78 84
 
79 85
     # retain top 30%
80 86
     coef.dataset <- coef.dataset[1:round(dim(coef.dataset)[1]*0.3, digits = 0),]
81
-    coef.data.melt <- melt(coef.dataset)
87
+    coef.data.melt <- reshape::melt(coef.dataset)
82 88
 
83
-    coef.30perc <- ggplot(data = coef.data.melt, aes(x = features, y = value,
89
+    coef.30perc <- ggplot2::ggplot(data = coef.data.melt,
90
+                                   aes(x = features, y = value,
84 91
                                                      fill = variable)) +
85 92
         geom_bar(stat = "identity") +
86 93
         theme(axis.title.x=element_blank(),
... ...
@@ -13,6 +13,8 @@
13 13
 #' @examples
14 14
 #' optimalClustering(toy_genes, 4,"spectral")
15 15
 #' optimalClustering(toy_genes, 2,"kmeans")
16
+#'
17
+#' @importFrom fpc speccCBI hclustCBI kmeansCBI
16 18
 
17 19
 optimalClustering <- function(data, clusters, algorithm) {
18 20
 
... ...
@@ -28,7 +30,7 @@ optimalClustering <- function(data, clusters, algorithm) {
28 30
 
29 31
     for (par in spectral.kernels) {
30 32
 
31
-      sc.boot <- clusterboot(data,
33
+      sc.boot <- fpc::clusterboot(data,
32 34
                              B = 25,
33 35
                              bootmethod = "boot",
34 36
                              clustermethod = speccCBI,
... ...
@@ -55,7 +57,7 @@ optimalClustering <- function(data, clusters, algorithm) {
55 57
 
56 58
     for (par in hierarchical.methods) {
57 59
 
58
-      sc.boot <- clusterboot(data,
60
+      sc.boot <- fpc::clusterboot(data,
59 61
                              B = 25,
60 62
                              bootmethod = "boot",
61 63
                              clustermethod = hclustCBI,
... ...
@@ -81,7 +83,7 @@ optimalClustering <- function(data, clusters, algorithm) {
81 83
 
82 84
     for (par in kmeans.kernels) {
83 85
 
84
-      sc.boot <- clusterboot(data,
86
+      sc.boot <- fpc::clusterboot(data,
85 87
                              B = 25,
86 88
                              bootmethod = "boot",
87 89
                              clustermethod = kmeansCBI,
... ...
@@ -45,6 +45,9 @@
45 45
 #' partitionAgreement(toy_genes, algorithm.1 = "spectral", measure.1 = "rbfdot",
46 46
 #' algorithm.2 = "kmeans",measure.2 = "Lloyd", number.of.clusters = 5)
47 47
 #' @export
48
+#'
49
+#' @importFrom stats cutree
50
+#' @importFrom pdfCluster adj.rand.index
48 51
 
49 52
 partitionAgreement <- function(data, algorithm.1 = "hierarchical",
50 53
                                measure.1 = "canberra",
... ...
@@ -62,22 +65,22 @@ partitionAgreement <- function(data, algorithm.1 = "hierarchical",
62 65
   for(i in 2:number.of.clusters) {
63 66
     #Spectral clustering
64 67
     if(algorithm.1 == "spectral") {
65
-      cl <- specc(dataset, centers=i, kernel = measure.1)
68
+      cl <- kernlab::specc(dataset, centers=i, kernel = measure.1)
66 69
       temp <- data.frame([email protected])
67 70
       cr1 <- cbind(cr1, temp)
68 71
     }
69 72
 
70 73
     #Hierarchical
71 74
     else if(algorithm.1 == "hierarchical") {
72
-      dist_mat <- dist(dataset, method = measure.1)
73
-      cl <- hclust(dist_mat, method = hier.agglo.algorithm.1)
75
+      dist_mat <- stats::dist(dataset, method = measure.1)
76
+      cl <- stats::hclust(dist_mat, method = hier.agglo.algorithm.1)
74 77
       temp <- data.frame(cutree(cl, k = i))
75 78
       cr1 <- cbind(cr1, temp)
76 79
     }
77 80
 
78 81
     # #k-means
79 82
     else if(algorithm.1 == "kmeans") {
80
-      cl <- kmeans(dataset, i, algorithm = measure.1)
83
+      cl <- stats::kmeans(dataset, i, algorithm = measure.1)
81 84
       temp <- data.frame(cl$cluster)
82 85
       cr1 <- cbind(cr1, temp)
83 86
     }
... ...
@@ -89,22 +92,22 @@ partitionAgreement <- function(data, algorithm.1 = "hierarchical",
89 92
   for(i in 2:number.of.clusters) {
90 93
     #Spectral clustering
91 94
     if(algorithm.2 == "spectral") {
92
-      cl <- specc(dataset, centers=i, kernel = measure.2)
95
+      cl <- kernlab::specc(dataset, centers=i, kernel = measure.2)
93 96
       temp <- data.frame([email protected])
94 97
       cr2 <- cbind(cr2, temp)
95 98
     }
96 99
 
97 100
     #Hierarchical
98 101
     else if(algorithm.2 == "hierarchical") {
99
-      dist_mat <- dist(dataset, method = measure.2)
100
-      cl <- hclust(dist_mat, method = hier.agglo.algorithm.2)
102
+      dist_mat <- stats::dist(dataset, method = measure.2)
103
+      cl <- stats::hclust(dist_mat, method = hier.agglo.algorithm.2)
101 104
       temp <- data.frame(cutree(cl, k = i))
102 105
       cr2 <- cbind(cr2, temp)
103 106
     }
104 107
 
105 108
     #k-means
106 109
     else if(algorithm.2 == "kmeans") {
107
-      cl <- kmeans(dataset, i, algorithm = measure.2)
110
+      cl <- stats::kmeans(dataset, i, algorithm = measure.2)
108 111
       temp <- data.frame(cl$cluster)
109 112
       cr2 <- cbind(cr2, temp)
110 113
     }
... ...
@@ -18,9 +18,10 @@ clusterVoting(data, min.k, max.k, algorithm)
18 18
 runs to be measured}
19 19
 }
20 20
 \value{
21
-An object of class "clusterVoting" containing a matrix with metric scores for every k and
22
-internal index, cluster memberships for every k, a dataframe with the k votes
23
- for every index, k vote frequencies and the frequency barplot of the k votes
21
+An object of class "clusterVoting" containing a matrix with metric
22
+ scores for every k and internal index, cluster memberships for every k, a
23
+ dataframe with the k votes for every index, k vote frequencies and the
24
+ frequency barplot of the k votes
24 25
 }
25 26
 \description{
26 27
 Estimating number of clusters through internal exhaustive ensemble majority
... ...
@@ -30,4 +31,5 @@ voting
30 31
 clusterVoting(toy_genes, 4,14,"sc")
31 32
 clusterVoting(toy_genes, 2,7,"hc")
32 33
 clusterVoting(toy_genes, 2,4,"km")
34
+
33 35
 }
... ...
@@ -27,4 +27,5 @@ clusteringMethodSelection(toy_genes, method.upper.k = 3,
27 27
 number.of.comparisons = 4)
28 28
 clusteringMethodSelection(toy_genes, method.upper.k = 2,
29 29
 number.of.comparisons = 2)
30
+
30 31
 }
... ...
@@ -28,4 +28,5 @@ clusters
28 28
 \examples{
29 29
 feasibilityAnalysis(classes = 3, samples = 320, features = 400)
30 30
 feasibilityAnalysis(classes = 4, samples = 400, features = 120)
31
+
31 32
 }
... ...
@@ -27,4 +27,5 @@ deviation
27 27
 \examples{
28 28
 feasibilityAnalysisDataBased(data = toy_genes, classes = 3)
29 29
 feasibilityAnalysisDataBased(data = toy_genes, classes = 2)
30
+
30 31
 }
... ...
@@ -23,4 +23,5 @@ Generating the feature/gene signature per cluster
23 23
 }
24 24
 \examples{
25 25
 geneSignatures(toy_genes, toy_gene_memberships)
26
+
26 27
 }
... ...
@@ -24,4 +24,5 @@ Clustering with the optimal parameters estimated by these tools
24 24
 \examples{
25 25
 optimalClustering(toy_genes, 4,"spectral")
26 26
 optimalClustering(toy_genes, 2,"kmeans")
27
+
27 28
 }