... | ... |
@@ -2,7 +2,7 @@ Package: omada |
2 | 2 |
Type: Package |
3 | 3 |
Title: Machine learning tools for automated transcriptome |
4 | 4 |
clustering analysis |
5 |
-Version: 1.5.0 |
|
5 |
+Version: 1.5.1 |
|
6 | 6 |
Authors@R: person("Sokratis", "Kariotis", role = c("aut", "cre"), |
7 | 7 |
email = "[email protected]", |
8 | 8 |
comment = c(ORCID = "0000-0001-9993-6017")) |
... | ... |
@@ -42,7 +42,7 @@ Suggests: |
42 | 42 |
testthat |
43 | 43 |
License: GPL-3 |
44 | 44 |
Encoding: UTF-8 |
45 |
-RoxygenNote: 7.2.0 |
|
45 |
+RoxygenNote: 7.2.1 |
|
46 | 46 |
VignetteBuilder: knitr |
47 | 47 |
biocViews: Software, Clustering, RNASeq, GeneExpression |
48 | 48 |
LazyData: true |
... | ... |
@@ -26,6 +26,7 @@ S3method(get_partition_agreement_scores,clusterAnalysis) |
26 | 26 |
S3method(get_partition_agreement_scores,methodSelection) |
27 | 27 |
S3method(get_sample_memberships,clusterAnalysis) |
28 | 28 |
S3method(get_signature_feature_coefs,clusterAnalysis) |
29 |
+S3method(get_top_coefficients,geneSignature) |
|
29 | 30 |
S3method(get_vote_frequencies_k,clusterVoting) |
30 | 31 |
S3method(plot_average_stabilities,featureSelection) |
31 | 32 |
S3method(plot_cluster_voting,clusterAnalysis) |
... | ... |
@@ -33,7 +34,7 @@ S3method(plot_feature_selection,clusterAnalysis) |
33 | 34 |
S3method(plot_partition_agreement,clusterAnalysis) |
34 | 35 |
S3method(plot_partition_agreement,methodSelection) |
35 | 36 |
S3method(plot_signature_feature,clusterAnalysis) |
36 |
-S3method(plot_top30percent_coefficients,geneSignature) |
|
37 |
+S3method(plot_top_coefficients,geneSignature) |
|
37 | 38 |
S3method(plot_vote_frequencies,clusterVoting) |
38 | 39 |
export(clusterVoting) |
39 | 40 |
export(clusteringMethodSelection) |
... | ... |
@@ -66,6 +67,7 @@ export(get_optimal_stability_score) |
66 | 67 |
export(get_partition_agreement_scores) |
67 | 68 |
export(get_sample_memberships) |
68 | 69 |
export(get_signature_feature_coefs) |
70 |
+export(get_top_coefficients) |
|
69 | 71 |
export(get_vote_frequencies_k) |
70 | 72 |
export(omada) |
71 | 73 |
export(optimalClustering) |
... | ... |
@@ -75,7 +77,7 @@ export(plot_cluster_voting) |
75 | 77 |
export(plot_feature_selection) |
76 | 78 |
export(plot_partition_agreement) |
77 | 79 |
export(plot_signature_feature) |
78 |
-export(plot_top30percent_coefficients) |
|
80 |
+export(plot_top_coefficients) |
|
79 | 81 |
export(plot_vote_frequencies) |
80 | 82 |
import(ggplot2) |
81 | 83 |
importFrom(clValid,clusters) |
82 | 84 |
deleted file mode 100644 |
... | ... |
@@ -1,118 +0,0 @@ |
1 |
-#' Generating the feature/gene signature per cluster |
|
2 |
-#' |
|
3 |
-#' @param data A dataframe, where columns are features and rows are data points. |
|
4 |
-#' |
|
5 |
-#' @param memberships A dataframe with column "id" (same samples ids as above) |
|
6 |
-#' and column "membership" containing the cluster membership of each sample. |
|
7 |
-#' The memberships must be strings |
|
8 |
-#' |
|
9 |
-#' @return An object of class "geneSignature" containing a list of LASSO |
|
10 |
-#' (regression analysis) coefficients of each gene and a plot of the highest |
|
11 |
-#' 30% of coefficients per cluster. |
|
12 |
-#' |
|
13 |
-#' @export |
|
14 |
-#' |
|
15 |
-#' @examples |
|
16 |
-#' geneSignatures(toy_genes, toy_gene_memberships) |
|
17 |
-#' |
|
18 |
-#' @import ggplot2 |
|
19 |
-#' @importFrom dplyr across filter %>% left_join |
|
20 |
- |
|
21 |
- |
|
22 |
-geneSignatures <- function(data, memberships) { |
|
23 |
- |
|
24 |
- # utils::globalVariables("where", add=FALSE) |
|
25 |
- |
|
26 |
- data <- as.data.frame(data) |
|
27 |
- rnames <- row.names(data) |
|
28 |
- data$id <- rnames |
|
29 |
- |
|
30 |
- # if(!("id" %in% colnames(data))) |
|
31 |
- # { |
|
32 |
- # id <- paste0("s", 1:dim(data)[1]) |
|
33 |
- # data <- cbind(id,data) |
|
34 |
- # } |
|
35 |
- |
|
36 |
- # Composite data |
|
37 |
- data <- left_join(memberships, data) |
|
38 |
- data$id <- NULL |
|
39 |
- row.names(data) <- rnames |
|
40 |
- |
|
41 |
- # Cluster names |
|
42 |
- clusters <- unique(memberships$membership) |
|
43 |
- |
|
44 |
- # Running cross-validation Lasso to find optimal lambda value |
|
45 |
- data.matrix <- as.matrix(data[,2:dim(data)[2]]) |
|
46 |
- cv_model <- glmnet::cv.glmnet(data.matrix(data.matrix), data$membership, |
|
47 |
- family = "multinomial", alpha = 1) |
|
48 |
- |
|
49 |
- # Optimal lambda value (minimizing test MSE) |
|
50 |
- optimal_lambda <- cv_model$lambda.min |
|
51 |
- |
|
52 |
- # Running optimal lasso model |
|
53 |
- optimal_lasso <- glmnet::glmnet(data.matrix(data.matrix), data$membership, |
|
54 |
- family = "multinomial", |
|
55 |
- alpha = 1, lambda = optimal_lambda) |
|
56 |
- |
|
57 |
- # Extract coefficients for minimized test MSE) |
|
58 |
- Coefficients <- stats::coef(optimal_lasso, s = "min") |
|
59 |
- |
|
60 |
- # Formatting coefficient dataframe per cluster |
|
61 |
- ns <- names(Coefficients) |
|
62 |
- ni <- 1 |
|
63 |
- coef.dataset <- data.frame(matrix(ncol= 0, nrow=dim(data)[2]-1)) |
|
64 |
- |
|
65 |
- for(i in Coefficients) { |
|
66 |
- temp <- as.data.frame(as.matrix(i)) %>% `colnames<-`(ns[ni]) |
|
67 |
- temp$Cluster <- ns[ni] |
|
68 |
- temp <- temp[-1,] |
|
69 |
- temp$Cluster <- NULL |
|
70 |
- coef.dataset <- cbind(coef.dataset, temp) |
|
71 |
- ni <- ni + 1 |
|
72 |
- } |
|
73 |
- |
|
74 |
- # Calculating mean coefficient per feature across clusters |
|
75 |
- coef.dataset <- filter(coef.dataset, |
|
76 |
- rowSums(abs(across(where(is.numeric))))!=0) |
|
77 |
- coef.dataset$means <- rowMeans(coef.dataset) |
|
78 |
- coef.dataset <- coef.dataset[with(coef.dataset, order(abs(means), |
|
79 |
- decreasing = TRUE)),] |
|
80 |
- coef.dataset$features <- rownames(coef.dataset) |
|
81 |
- coef.dataset$means <- NULL #addition |
|
82 |
- |
|
83 |
- # retain top 30% |
|
84 |
- coef.dataset <- coef.dataset[seq_len(round(dim(coef.dataset)[1]*0.3, |
|
85 |
- digits = 0)),] |
|
86 |
- coef.data.melt <- reshape::melt(coef.dataset) |
|
87 |
- |
|
88 |
- coef.30perc <- ggplot2::ggplot(data = coef.data.melt, |
|
89 |
- aes(x = features, y = value, |
|
90 |
- fill = variable)) + |
|
91 |
- geom_bar(stat = "identity") + |
|
92 |
- theme(axis.title.x=element_blank(), |
|
93 |
- axis.text.x = element_text(angle=45, vjust = 1, hjust = 1, |
|
94 |
- size = 12), |
|
95 |
- plot.title = element_text(hjust = 0.5), |
|
96 |
- axis.title.y = element_text(size = 15), |
|
97 |
- legend.position = "none") + |
|
98 |
- geom_hline(yintercept=0, linetype="dashed", color = "red") + |
|
99 |
- labs(title = "Coefficients") + |
|
100 |
- facet_grid(variable~.) |
|
101 |
- |
|
102 |
- geneSignature <- |
|
103 |
- function(coefficient.dataset = coef.dataset, |
|
104 |
- top30percent.coefficients = coef.30perc){ |
|
105 |
- |
|
106 |
- gs <- list(coefficient.dataset = coefficient.dataset, |
|
107 |
- top30percent.coefficients = top30percent.coefficients) |
|
108 |
- |
|
109 |
- ## Set the name for the class |
|
110 |
- class(gs) <- "geneSignature" |
|
111 |
- |
|
112 |
- return(gs) |
|
113 |
- } |
|
114 |
- |
|
115 |
- gene.signature <- geneSignature() |
|
116 |
- |
|
117 |
- return(gene.signature) |
|
118 |
-} |
119 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,12 +0,0 @@ |
1 |
-#' Plot of the highest 30 percentage of coefficients per cluster |
|
2 |
-#' |
|
3 |
-#' @param object An object of class "geneSignature" |
|
4 |
-#' @return A plot of the highest 30 percentage of coefficients per cluster |
|
5 |
-#' @export |
|
6 |
-get_top30percent_coefficients <- function(object) { |
|
7 |
- UseMethod("get_top30percent_coefficients") |
|
8 |
-} |
|
9 |
-#' @export |
|
10 |
-get_top30percent_coefficients.geneSignature <- function(object) { |
|
11 |
- object$top30percent.coefficients |
|
12 |
-} |
... | ... |
@@ -86,7 +86,7 @@ omada <- function(data, method.upper.k = 5) { |
86 | 86 |
# data$id <- rownames(data) |
87 | 87 |
gene.signature.results <- geneSignatures(data, memberships) |
88 | 88 |
gs.matrix <- get_coefficient_dataset(gene.signature.results) |
89 |
- gs.plot <- plot_top30percent_coefficients(gene.signature.results) |
|
89 |
+ gs.plot <- plot_top_coefficients(gene.signature.results) |
|
90 | 90 |
|
91 | 91 |
clusterAnalysis <- function(partition.agreement.scores=pa.df, |
92 | 92 |
partition.agreement.plot=pa.plot, |
93 | 93 |
deleted file mode 100644 |
... | ... |
@@ -1,16 +0,0 @@ |
1 |
-#' Plot of the highest 30 percentage of coefficients per cluster |
|
2 |
-#' |
|
3 |
-#' @param object An object of class "geneSignature" |
|
4 |
-#' @return A plot of the highest 30 percentage of coefficients per cluster |
|
5 |
-#' @export |
|
6 |
-#' |
|
7 |
-#' @examples |
|
8 |
-#' gs.object <- geneSignatures(toy_genes, toy_gene_memberships) |
|
9 |
-#' plot_top30percent_coefficients(gs.object) |
|
10 |
-plot_top30percent_coefficients <- function(object) { |
|
11 |
- UseMethod("plot_top30percent_coefficients") |
|
12 |
-} |
|
13 |
-#' @export |
|
14 |
-plot_top30percent_coefficients.geneSignature <- function(object) { |
|
15 |
- object$top30percent.coefficients |
|
16 |
-} |
6 | 6 |
deleted file mode 100644 |
... | ... |
@@ -1,17 +0,0 @@ |
1 |
-% Generated by roxygen2: do not edit by hand |
|
2 |
-% Please edit documentation in R/get_top30percent_coefficients.R |
|
3 |
-\name{get_top30percent_coefficients} |
|
4 |
-\alias{get_top30percent_coefficients} |
|
5 |
-\title{Plot of the highest 30 percentage of coefficients per cluster} |
|
6 |
-\usage{ |
|
7 |
-get_top30percent_coefficients(object) |
|
8 |
-} |
|
9 |
-\arguments{ |
|
10 |
-\item{object}{An object of class "geneSignature"} |
|
11 |
-} |
|
12 |
-\value{ |
|
13 |
-A plot of the highest 30 percentage of coefficients per cluster |
|
14 |
-} |
|
15 |
-\description{ |
|
16 |
-Plot of the highest 30 percentage of coefficients per cluster |
|
17 |
-} |
18 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,21 +0,0 @@ |
1 |
-% Generated by roxygen2: do not edit by hand |
|
2 |
-% Please edit documentation in R/plot_top30percent_coefficients.R |
|
3 |
-\name{plot_top30percent_coefficients} |
|
4 |
-\alias{plot_top30percent_coefficients} |
|
5 |
-\title{Plot of the highest 30 percentage of coefficients per cluster} |
|
6 |
-\usage{ |
|
7 |
-plot_top30percent_coefficients(object) |
|
8 |
-} |
|
9 |
-\arguments{ |
|
10 |
-\item{object}{An object of class "geneSignature"} |
|
11 |
-} |
|
12 |
-\value{ |
|
13 |
-A plot of the highest 30 percentage of coefficients per cluster |
|
14 |
-} |
|
15 |
-\description{ |
|
16 |
-Plot of the highest 30 percentage of coefficients per cluster |
|
17 |
-} |
|
18 |
-\examples{ |
|
19 |
-gs.object <- geneSignatures(toy_genes, toy_gene_memberships) |
|
20 |
-plot_top30percent_coefficients(gs.object) |
|
21 |
-} |
... | ... |
@@ -165,6 +165,6 @@ signature.results <- geneSignatures(toy_genes, toy_gene_memberships) |
165 | 165 |
signature.dataframe <- get_coefficient_dataset(signature.results) |
166 | 166 |
|
167 | 167 |
# Plot results |
168 |
-plot_top30percent_coefficients(signature.results) |
|
168 |
+plot_top_coefficients(signature.results) |
|
169 | 169 |
``` |
170 | 170 |
|