... | ... |
@@ -76,3 +76,15 @@ export(plot_feature_selection) |
76 | 76 |
export(plot_partition_agreement) |
77 | 77 |
export(plot_signature_feature) |
78 | 78 |
export(plot_vote_frequencies) |
79 |
+import(ggplot2) |
|
80 |
+importFrom(clValid,clusters) |
|
81 |
+importFrom(diceR,prepare_data) |
|
82 |
+importFrom(dplyr,"%>%") |
|
83 |
+importFrom(dplyr,across) |
|
84 |
+importFrom(dplyr,filter) |
|
85 |
+importFrom(dplyr,left_join) |
|
86 |
+importFrom(fpc,hclustCBI) |
|
87 |
+importFrom(fpc,kmeansCBI) |
|
88 |
+importFrom(fpc,speccCBI) |
|
89 |
+importFrom(pdfCluster,adj.rand.index) |
|
90 |
+importFrom(stats,cutree) |
... | ... |
@@ -7,9 +7,10 @@ |
7 | 7 |
#' @param algorithm The clustering algorithm to use for the multiple clustering |
8 | 8 |
#' runs to be measured |
9 | 9 |
#' |
10 |
-#' @return An object of class "clusterVoting" containing a matrix with metric scores for every k and |
|
11 |
-#' internal index, cluster memberships for every k, a dataframe with the k votes |
|
12 |
-#' for every index, k vote frequencies and the frequency barplot of the k votes |
|
10 |
+#' @return An object of class "clusterVoting" containing a matrix with metric |
|
11 |
+#' scores for every k and internal index, cluster memberships for every k, a |
|
12 |
+#' dataframe with the k votes for every index, k vote frequencies and the |
|
13 |
+#' frequency barplot of the k votes |
|
13 | 14 |
#' |
14 | 15 |
#' @export |
15 | 16 |
#' |
... | ... |
@@ -17,6 +18,9 @@ |
17 | 18 |
#' clusterVoting(toy_genes, 4,14,"sc") |
18 | 19 |
#' clusterVoting(toy_genes, 2,7,"hc") |
19 | 20 |
#' clusterVoting(toy_genes, 2,4,"km") |
21 |
+#' |
|
22 |
+#' @importFrom diceR prepare_data |
|
23 |
+#' @import ggplot2 |
|
20 | 24 |
|
21 | 25 |
clusterVoting <- function(data ,min.k ,max.k, algorithm) { |
22 | 26 |
|
... | ... |
@@ -45,24 +49,25 @@ clusterVoting <- function(data ,min.k ,max.k, algorithm) { |
45 | 49 |
for(current_k in min.k:max.k) { |
46 | 50 |
|
47 | 51 |
if(algorithm == "sc") { |
48 |
- cl <- specc(data, centers=current_k, kernel = "rbfdot") |
|
52 |
+ cl <- kernlab::specc(data, centers=current_k, kernel = "rbfdot") |
|
49 | 53 |
cls <- [email protected] |
50 | 54 |
} else if(algorithm == "hc") { |
51 |
- dist_mat <- dist(data, method = "euclidean") |
|
52 |
- cl <- hclust(dist_mat, method = "average") |
|
53 |
- cls <- cutree(cl, k = current_k) |
|
55 |
+ dist_mat <- stats::dist(data, method = "euclidean") |
|
56 |
+ cl <- stats::hclust(dist_mat, method = "average") |
|
57 |
+ cls <- stats::cutree(cl, k = current_k) |
|
54 | 58 |
} else if(algorithm == "km") { |
55 |
- cl <- kmeans(data, current_k, algorithm = "Hartigan-Wong") |
|
59 |
+ cl <- stats::kmeans(data, current_k, algorithm = "Hartigan-Wong") |
|
56 | 60 |
cls <- cl$cluster |
57 | 61 |
} |
58 | 62 |
|
59 |
- criteria <- intCriteria(data,cls,c("calinski_harabasz","dunn","pbm","tau", |
|
60 |
- "gamma", "c_index","davies_bouldin", |
|
63 |
+ criteria <- clusterCrit::intCriteria(data,cls,c("calinski_harabasz","dunn", |
|
64 |
+ "pbm","tau", "gamma", |
|
65 |
+ "c_index","davies_bouldin", |
|
61 | 66 |
"mcclain_rao","sd_dis", "ray_turi", |
62 | 67 |
"g_plus","silhouette","s_dbw")) |
63 | 68 |
|
64 | 69 |
con <- clValid::connectivity(clusters = cls, Data = data) |
65 |
- comp <- compactness(data, cls) |
|
70 |
+ comp <- diceR::compactness(data, cls) |
|
66 | 71 |
criteria <- c(criteria, connectivity=con, compactness=comp) |
67 | 72 |
criteria <- array(as.numeric(unlist(criteria))) |
68 | 73 |
scores[, counter] <- criteria |
... | ... |
@@ -104,7 +109,8 @@ clusterVoting <- function(data ,min.k ,max.k, algorithm) { |
104 | 109 |
colnames(ensemble.results) <- c("k", "Frequency") |
105 | 110 |
ensemble.results$Frequency <- as.numeric(ensemble.results$Frequency) |
106 | 111 |
|
107 |
- ensemble.plot <- ggplot(ensemble.results, aes(k, Frequency, fill = k)) + |
|
112 |
+ ensemble.plot <- ggplot2::ggplot(ensemble.results, aes(k, Frequency, |
|
113 |
+ fill = k)) + |
|
108 | 114 |
geom_col() + |
109 | 115 |
scale_fill_brewer(palette="Dark2") |
110 | 116 |
|
... | ... |
@@ -16,6 +16,9 @@ |
16 | 16 |
#' number.of.comparisons = 4) |
17 | 17 |
#' clusteringMethodSelection(toy_genes, method.upper.k = 2, |
18 | 18 |
#' number.of.comparisons = 2) |
19 |
+#' |
|
20 |
+#' @import ggplot2 |
|
21 |
+#' @importFrom clValid clusters |
|
19 | 22 |
|
20 | 23 |
clusteringMethodSelection <- function(data, method.upper.k = 5, |
21 | 24 |
number.of.comparisons = 3) { |
... | ... |
@@ -114,10 +117,10 @@ clusteringMethodSelection <- function(data, method.upper.k = 5, |
114 | 117 |
s.mean <- mean(df.final$spectral) |
115 | 118 |
k.mean <- mean(df.final$kmeans) |
116 | 119 |
|
117 |
- df.plot <- melt(df.final, id=c("clusters")) |
|
120 |
+ df.plot <- reshape::melt(df.final, id=c("clusters")) |
|
118 | 121 |
colnames(df.plot) <- c("clusters", "methods", "value") |
119 | 122 |
|
120 |
- agreements.plot <- ggplot(df.plot, aes(x = clusters, y = value)) + |
|
123 |
+ agreements.plot <- ggplot2::ggplot(df.plot, aes(x = clusters, y = value)) + |
|
121 | 124 |
geom_line(aes(color = methods)) + |
122 | 125 |
geom_hline(aes(yintercept=h.mean), linetype="dashed") + |
123 | 126 |
geom_hline(aes(yintercept=s.mean), linetype="dashed") + |
... | ... |
@@ -15,6 +15,8 @@ |
15 | 15 |
#' @examples |
16 | 16 |
#' feasibilityAnalysisDataBased(data = toy_genes, classes = 3) |
17 | 17 |
#' feasibilityAnalysisDataBased(data = toy_genes, classes = 2) |
18 |
+#' |
|
19 |
+#' @importFrom fpc speccCBI |
|
18 | 20 |
|
19 | 21 |
feasibilityAnalysisDataBased <- function(data, classes = 3) { |
20 | 22 |
samples = dim(data)[1] |
... | ... |
@@ -39,7 +41,7 @@ feasibilityAnalysisDataBased <- function(data, classes = 3) { |
39 | 41 |
cl.index <- 1 |
40 | 42 |
feature.index <- 1 |
41 | 43 |
for (i in 1:features) { |
42 |
- temp <- rnorm(n = samples, |
|
44 |
+ temp <- stats::rnorm(n = samples, |
|
43 | 45 |
mean = c(class.means), |
44 | 46 |
sd = class.sd) |
45 | 47 |
dataset[, ncol(dataset) + 1] <- temp # Append temp column |
... | ... |
@@ -68,7 +70,7 @@ feasibilityAnalysisDataBased <- function(data, classes = 3) { |
68 | 70 |
} |
69 | 71 |
|
70 | 72 |
for (rep in c.min:c.max) { |
71 |
- sc.boot <- clusterboot( |
|
73 |
+ sc.boot <- fpc::clusterboot( |
|
72 | 74 |
stability.dataset, |
73 | 75 |
B = 25, |
74 | 76 |
bootmethod = "boot", |
... | ... |
@@ -16,6 +16,8 @@ |
16 | 16 |
#' @examples |
17 | 17 |
#' feasibilityAnalysis(classes = 3, samples = 320, features = 400) |
18 | 18 |
#' feasibilityAnalysis(classes = 4, samples = 400, features = 120) |
19 |
+#' |
|
20 |
+#' @importFrom fpc speccCBI |
|
19 | 21 |
|
20 | 22 |
feasibilityAnalysis <- function(classes = 3, samples = 320, features = 400) { |
21 | 23 |
|
... | ... |
@@ -29,7 +31,7 @@ feasibilityAnalysis <- function(classes = 3, samples = 320, features = 400) { |
29 | 31 |
cl.index <- 1 |
30 | 32 |
feature.index <- 1 |
31 | 33 |
for(i in 1:features) { |
32 |
- temp <- rnorm(n = samples, mean = c(class.means), sd = class.sd) |
|
34 |
+ temp <- stats::rnorm(n = samples, mean = c(class.means), sd = class.sd) |
|
33 | 35 |
dataset[ , ncol(dataset) + 1] <- temp # Append temp column |
34 | 36 |
colnames(dataset)[ncol(dataset)] <- paste0("feature_", feature.index) |
35 | 37 |
feature.index <- feature.index + 1 |
... | ... |
@@ -56,7 +58,7 @@ feasibilityAnalysis <- function(classes = 3, samples = 320, features = 400) { |
56 | 58 |
|
57 | 59 |
for(rep in c.min:c.max) { |
58 | 60 |
|
59 |
- sc.boot <- clusterboot(stability.dataset, |
|
61 |
+ sc.boot <- fpc::clusterboot(stability.dataset, |
|
60 | 62 |
B = 25, |
61 | 63 |
bootmethod = "boot", |
62 | 64 |
clustermethod = speccCBI, |
... | ... |
@@ -16,6 +16,10 @@ |
16 | 16 |
#' featureSelection(toy_genes, min.k = 3, max.k = 9, step = 3) |
17 | 17 |
#' featureSelection(toy_genes, min.k = 2, max.k = 4, step = 4) |
18 | 18 |
#' |
19 |
+#' @importFrom fpc speccCBI |
|
20 |
+#' @import ggplot2 |
|
21 |
+ |
|
22 |
+ |
|
19 | 23 |
featureSelection <- function(data, min.k = 2, max.k = 4, step = 5) { |
20 | 24 |
|
21 | 25 |
print("Selecting feature subset...") |
... | ... |
@@ -27,7 +31,7 @@ featureSelection <- function(data, min.k = 2, max.k = 4, step = 5) { |
27 | 31 |
averages.of.all.k <- list() |
28 | 32 |
|
29 | 33 |
# Sorted features based on variance across data points |
30 |
- features.variance <- data.frame(apply(data, 2, var)) |
|
34 |
+ features.variance <- data.frame(apply(data, 2, stats::var)) |
|
31 | 35 |
colnames(features.variance) <- "variance" |
32 | 36 |
sorted.features.variance <- |
33 | 37 |
features.variance[order(features.variance$variance,decreasing = TRUE), , |
... | ... |
@@ -47,7 +51,7 @@ featureSelection <- function(data, min.k = 2, max.k = 4, step = 5) { |
47 | 51 |
|
48 | 52 |
cur <- sorted.features.variance$names[1:fs] |
49 | 53 |
|
50 |
- sc.boot <- clusterboot(data[,cur], |
|
54 |
+ sc.boot <- fpc::clusterboot(data[,cur], |
|
51 | 55 |
B = 25, |
52 | 56 |
bootmethod = "boot", |
53 | 57 |
clustermethod = speccCBI, |
... | ... |
@@ -85,7 +89,7 @@ featureSelection <- function(data, min.k = 2, max.k = 4, step = 5) { |
85 | 89 |
all.feature.k.stabilities$featureSet <- |
86 | 90 |
as.integer(as.character(all.feature.k.stabilities$featureSet)) |
87 | 91 |
|
88 |
- stabilities.plot <- ggplot(data = all.feature.k.stabilities, |
|
92 |
+ stabilities.plot <- ggplot2::ggplot(data = all.feature.k.stabilities, |
|
89 | 93 |
aes(x=featureSet, y=means)) + |
90 | 94 |
geom_line(color='firebrick',group = 1, size = 0.5) + |
91 | 95 |
geom_point(color='firebrick', group = 1) + |
... | ... |
@@ -14,9 +14,15 @@ |
14 | 14 |
#' |
15 | 15 |
#' @examples |
16 | 16 |
#' geneSignatures(toy_genes, toy_gene_memberships) |
17 |
+#' |
|
18 |
+#' @import ggplot2 |
|
19 |
+#' @importFrom dplyr across filter %>% left_join |
|
20 |
+ |
|
17 | 21 |
|
18 | 22 |
geneSignatures <- function(data, memberships) { |
19 | 23 |
|
24 |
+ # utils::globalVariables("where", add=FALSE) |
|
25 |
+ |
|
20 | 26 |
data <- as.data.frame(data) |
21 | 27 |
rnames <- row.names(data) |
22 | 28 |
data$id <- rnames |
... | ... |
@@ -39,19 +45,19 @@ geneSignatures <- function(data, memberships) { |
39 | 45 |
|
40 | 46 |
# Running cross-validation Lasso to find optimal lambda value |
41 | 47 |
data.matrix <- as.matrix(data[,2:dim(data)[2]]) |
42 |
- cv_model <- cv.glmnet(data.matrix(data.matrix), data$membership, family = "multinomial", |
|
43 |
- alpha = 1) |
|
48 |
+ cv_model <- glmnet::cv.glmnet(data.matrix(data.matrix), data$membership, |
|
49 |
+ family = "multinomial", alpha = 1) |
|
44 | 50 |
|
45 | 51 |
# Optimal lambda value (minimizing test MSE) |
46 | 52 |
optimal_lambda <- cv_model$lambda.min |
47 | 53 |
|
48 | 54 |
# Running optimal lasso model |
49 |
- optimal_lasso <- glmnet(data.matrix(data.matrix), data$membership, |
|
55 |
+ optimal_lasso <- glmnet::glmnet(data.matrix(data.matrix), data$membership, |
|
50 | 56 |
family = "multinomial", |
51 | 57 |
alpha = 1, lambda = optimal_lambda) |
52 | 58 |
|
53 | 59 |
# Extract coefficients for minimized test MSE) |
54 |
- Coefficients <- coef(optimal_lasso, s = "min") |
|
60 |
+ Coefficients <- stats::coef(optimal_lasso, s = "min") |
|
55 | 61 |
|
56 | 62 |
# Formatting coefficient dataframe per cluster |
57 | 63 |
ns <- names(Coefficients) |
... | ... |
@@ -78,9 +84,10 @@ geneSignatures <- function(data, memberships) { |
78 | 84 |
|
79 | 85 |
# retain top 30% |
80 | 86 |
coef.dataset <- coef.dataset[1:round(dim(coef.dataset)[1]*0.3, digits = 0),] |
81 |
- coef.data.melt <- melt(coef.dataset) |
|
87 |
+ coef.data.melt <- reshape::melt(coef.dataset) |
|
82 | 88 |
|
83 |
- coef.30perc <- ggplot(data = coef.data.melt, aes(x = features, y = value, |
|
89 |
+ coef.30perc <- ggplot2::ggplot(data = coef.data.melt, |
|
90 |
+ aes(x = features, y = value, |
|
84 | 91 |
fill = variable)) + |
85 | 92 |
geom_bar(stat = "identity") + |
86 | 93 |
theme(axis.title.x=element_blank(), |
... | ... |
@@ -13,6 +13,8 @@ |
13 | 13 |
#' @examples |
14 | 14 |
#' optimalClustering(toy_genes, 4,"spectral") |
15 | 15 |
#' optimalClustering(toy_genes, 2,"kmeans") |
16 |
+#' |
|
17 |
+#' @importFrom fpc speccCBI hclustCBI kmeansCBI |
|
16 | 18 |
|
17 | 19 |
optimalClustering <- function(data, clusters, algorithm) { |
18 | 20 |
|
... | ... |
@@ -28,7 +30,7 @@ optimalClustering <- function(data, clusters, algorithm) { |
28 | 30 |
|
29 | 31 |
for (par in spectral.kernels) { |
30 | 32 |
|
31 |
- sc.boot <- clusterboot(data, |
|
33 |
+ sc.boot <- fpc::clusterboot(data, |
|
32 | 34 |
B = 25, |
33 | 35 |
bootmethod = "boot", |
34 | 36 |
clustermethod = speccCBI, |
... | ... |
@@ -55,7 +57,7 @@ optimalClustering <- function(data, clusters, algorithm) { |
55 | 57 |
|
56 | 58 |
for (par in hierarchical.methods) { |
57 | 59 |
|
58 |
- sc.boot <- clusterboot(data, |
|
60 |
+ sc.boot <- fpc::clusterboot(data, |
|
59 | 61 |
B = 25, |
60 | 62 |
bootmethod = "boot", |
61 | 63 |
clustermethod = hclustCBI, |
... | ... |
@@ -81,7 +83,7 @@ optimalClustering <- function(data, clusters, algorithm) { |
81 | 83 |
|
82 | 84 |
for (par in kmeans.kernels) { |
83 | 85 |
|
84 |
- sc.boot <- clusterboot(data, |
|
86 |
+ sc.boot <- fpc::clusterboot(data, |
|
85 | 87 |
B = 25, |
86 | 88 |
bootmethod = "boot", |
87 | 89 |
clustermethod = kmeansCBI, |
... | ... |
@@ -45,6 +45,9 @@ |
45 | 45 |
#' partitionAgreement(toy_genes, algorithm.1 = "spectral", measure.1 = "rbfdot", |
46 | 46 |
#' algorithm.2 = "kmeans",measure.2 = "Lloyd", number.of.clusters = 5) |
47 | 47 |
#' @export |
48 |
+#' |
|
49 |
+#' @importFrom stats cutree |
|
50 |
+#' @importFrom pdfCluster adj.rand.index |
|
48 | 51 |
|
49 | 52 |
partitionAgreement <- function(data, algorithm.1 = "hierarchical", |
50 | 53 |
measure.1 = "canberra", |
... | ... |
@@ -62,22 +65,22 @@ partitionAgreement <- function(data, algorithm.1 = "hierarchical", |
62 | 65 |
for(i in 2:number.of.clusters) { |
63 | 66 |
#Spectral clustering |
64 | 67 |
if(algorithm.1 == "spectral") { |
65 |
- cl <- specc(dataset, centers=i, kernel = measure.1) |
|
68 |
+ cl <- kernlab::specc(dataset, centers=i, kernel = measure.1) |
|
66 | 69 |
temp <- data.frame([email protected]) |
67 | 70 |
cr1 <- cbind(cr1, temp) |
68 | 71 |
} |
69 | 72 |
|
70 | 73 |
#Hierarchical |
71 | 74 |
else if(algorithm.1 == "hierarchical") { |
72 |
- dist_mat <- dist(dataset, method = measure.1) |
|
73 |
- cl <- hclust(dist_mat, method = hier.agglo.algorithm.1) |
|
75 |
+ dist_mat <- stats::dist(dataset, method = measure.1) |
|
76 |
+ cl <- stats::hclust(dist_mat, method = hier.agglo.algorithm.1) |
|
74 | 77 |
temp <- data.frame(cutree(cl, k = i)) |
75 | 78 |
cr1 <- cbind(cr1, temp) |
76 | 79 |
} |
77 | 80 |
|
78 | 81 |
# #k-means |
79 | 82 |
else if(algorithm.1 == "kmeans") { |
80 |
- cl <- kmeans(dataset, i, algorithm = measure.1) |
|
83 |
+ cl <- stats::kmeans(dataset, i, algorithm = measure.1) |
|
81 | 84 |
temp <- data.frame(cl$cluster) |
82 | 85 |
cr1 <- cbind(cr1, temp) |
83 | 86 |
} |
... | ... |
@@ -89,22 +92,22 @@ partitionAgreement <- function(data, algorithm.1 = "hierarchical", |
89 | 92 |
for(i in 2:number.of.clusters) { |
90 | 93 |
#Spectral clustering |
91 | 94 |
if(algorithm.2 == "spectral") { |
92 |
- cl <- specc(dataset, centers=i, kernel = measure.2) |
|
95 |
+ cl <- kernlab::specc(dataset, centers=i, kernel = measure.2) |
|
93 | 96 |
temp <- data.frame([email protected]) |
94 | 97 |
cr2 <- cbind(cr2, temp) |
95 | 98 |
} |
96 | 99 |
|
97 | 100 |
#Hierarchical |
98 | 101 |
else if(algorithm.2 == "hierarchical") { |
99 |
- dist_mat <- dist(dataset, method = measure.2) |
|
100 |
- cl <- hclust(dist_mat, method = hier.agglo.algorithm.2) |
|
102 |
+ dist_mat <- stats::dist(dataset, method = measure.2) |
|
103 |
+ cl <- stats::hclust(dist_mat, method = hier.agglo.algorithm.2) |
|
101 | 104 |
temp <- data.frame(cutree(cl, k = i)) |
102 | 105 |
cr2 <- cbind(cr2, temp) |
103 | 106 |
} |
104 | 107 |
|
105 | 108 |
#k-means |
106 | 109 |
else if(algorithm.2 == "kmeans") { |
107 |
- cl <- kmeans(dataset, i, algorithm = measure.2) |
|
110 |
+ cl <- stats::kmeans(dataset, i, algorithm = measure.2) |
|
108 | 111 |
temp <- data.frame(cl$cluster) |
109 | 112 |
cr2 <- cbind(cr2, temp) |
110 | 113 |
} |
... | ... |
@@ -18,9 +18,10 @@ clusterVoting(data, min.k, max.k, algorithm) |
18 | 18 |
runs to be measured} |
19 | 19 |
} |
20 | 20 |
\value{ |
21 |
-An object of class "clusterVoting" containing a matrix with metric scores for every k and |
|
22 |
-internal index, cluster memberships for every k, a dataframe with the k votes |
|
23 |
- for every index, k vote frequencies and the frequency barplot of the k votes |
|
21 |
+An object of class "clusterVoting" containing a matrix with metric |
|
22 |
+ scores for every k and internal index, cluster memberships for every k, a |
|
23 |
+ dataframe with the k votes for every index, k vote frequencies and the |
|
24 |
+ frequency barplot of the k votes |
|
24 | 25 |
} |
25 | 26 |
\description{ |
26 | 27 |
Estimating number of clusters through internal exhaustive ensemble majority |
... | ... |
@@ -30,4 +31,5 @@ voting |
30 | 31 |
clusterVoting(toy_genes, 4,14,"sc") |
31 | 32 |
clusterVoting(toy_genes, 2,7,"hc") |
32 | 33 |
clusterVoting(toy_genes, 2,4,"km") |
34 |
+ |
|
33 | 35 |
} |