Browse code

updating naming

Sokratis Kariotis authored on 06/02/2024 07:59:21
Showing 10 changed files

... ...
@@ -2,7 +2,7 @@ Package: omada
2 2
 Type: Package
3 3
 Title: Machine learning tools for automated transcriptome 
4 4
     clustering analysis
5
-Version: 1.5.0
5
+Version: 1.5.1
6 6
 Authors@R: person("Sokratis", "Kariotis", role = c("aut", "cre"),
7 7
                         email = "[email protected]",
8 8
                         comment = c(ORCID = "0000-0001-9993-6017"))
... ...
@@ -42,7 +42,7 @@ Suggests:
42 42
     testthat
43 43
 License: GPL-3
44 44
 Encoding: UTF-8
45
-RoxygenNote: 7.2.0
45
+RoxygenNote: 7.2.1
46 46
 VignetteBuilder: knitr
47 47
 biocViews: Software, Clustering, RNASeq, GeneExpression
48 48
 LazyData: true
... ...
@@ -26,6 +26,7 @@ S3method(get_partition_agreement_scores,clusterAnalysis)
26 26
 S3method(get_partition_agreement_scores,methodSelection)
27 27
 S3method(get_sample_memberships,clusterAnalysis)
28 28
 S3method(get_signature_feature_coefs,clusterAnalysis)
29
+S3method(get_top_coefficients,geneSignature)
29 30
 S3method(get_vote_frequencies_k,clusterVoting)
30 31
 S3method(plot_average_stabilities,featureSelection)
31 32
 S3method(plot_cluster_voting,clusterAnalysis)
... ...
@@ -33,7 +34,7 @@ S3method(plot_feature_selection,clusterAnalysis)
33 34
 S3method(plot_partition_agreement,clusterAnalysis)
34 35
 S3method(plot_partition_agreement,methodSelection)
35 36
 S3method(plot_signature_feature,clusterAnalysis)
36
-S3method(plot_top30percent_coefficients,geneSignature)
37
+S3method(plot_top_coefficients,geneSignature)
37 38
 S3method(plot_vote_frequencies,clusterVoting)
38 39
 export(clusterVoting)
39 40
 export(clusteringMethodSelection)
... ...
@@ -66,6 +67,7 @@ export(get_optimal_stability_score)
66 67
 export(get_partition_agreement_scores)
67 68
 export(get_sample_memberships)
68 69
 export(get_signature_feature_coefs)
70
+export(get_top_coefficients)
69 71
 export(get_vote_frequencies_k)
70 72
 export(omada)
71 73
 export(optimalClustering)
... ...
@@ -75,7 +77,7 @@ export(plot_cluster_voting)
75 77
 export(plot_feature_selection)
76 78
 export(plot_partition_agreement)
77 79
 export(plot_signature_feature)
78
-export(plot_top30percent_coefficients)
80
+export(plot_top_coefficients)
79 81
 export(plot_vote_frequencies)
80 82
 import(ggplot2)
81 83
 importFrom(clValid,clusters)
82 84
deleted file mode 100644
... ...
@@ -1,118 +0,0 @@
1
-#' Generating the feature/gene signature per cluster
2
-#'
3
-#' @param data A dataframe, where columns are features and rows are data points.
4
-#'
5
-#' @param memberships A dataframe with column "id" (same samples ids as above)
6
-#' and column "membership" containing the cluster membership of each sample.
7
-#' The memberships must be strings
8
-#'
9
-#' @return An object of class "geneSignature" containing a list of LASSO
10
-#' (regression analysis) coefficients of each gene and a plot of the highest
11
-#' 30% of coefficients per cluster.
12
-#'
13
-#' @export
14
-#'
15
-#' @examples
16
-#' geneSignatures(toy_genes, toy_gene_memberships)
17
-#'
18
-#' @import ggplot2
19
-#' @importFrom dplyr across filter %>% left_join
20
-
21
-
22
-geneSignatures <- function(data, memberships) {
23
-
24
-    # utils::globalVariables("where", add=FALSE)
25
-
26
-    data <- as.data.frame(data)
27
-    rnames <- row.names(data)
28
-    data$id <- rnames
29
-
30
-    # if(!("id" %in% colnames(data)))
31
-    # {
32
-    #     id <- paste0("s", 1:dim(data)[1])
33
-    #     data <- cbind(id,data)
34
-    # }
35
-
36
-    # Composite data
37
-    data <- left_join(memberships, data)
38
-    data$id <- NULL
39
-    row.names(data) <- rnames
40
-
41
-    # Cluster names
42
-    clusters <- unique(memberships$membership)
43
-
44
-    # Running cross-validation Lasso to find optimal lambda value
45
-    data.matrix <- as.matrix(data[,2:dim(data)[2]])
46
-    cv_model <- glmnet::cv.glmnet(data.matrix(data.matrix), data$membership,
47
-                                  family = "multinomial", alpha = 1)
48
-
49
-    # Optimal lambda value (minimizing test MSE)
50
-    optimal_lambda <- cv_model$lambda.min
51
-
52
-    # Running optimal lasso model
53
-    optimal_lasso <- glmnet::glmnet(data.matrix(data.matrix), data$membership,
54
-                            family = "multinomial",
55
-                            alpha = 1, lambda = optimal_lambda)
56
-
57
-    # Extract coefficients for minimized test MSE)
58
-    Coefficients <- stats::coef(optimal_lasso, s = "min")
59
-
60
-    # Formatting coefficient dataframe per cluster
61
-    ns <- names(Coefficients)
62
-    ni <- 1
63
-    coef.dataset <- data.frame(matrix(ncol= 0, nrow=dim(data)[2]-1))
64
-
65
-    for(i in Coefficients) {
66
-        temp <- as.data.frame(as.matrix(i)) %>% `colnames<-`(ns[ni])
67
-        temp$Cluster <- ns[ni]
68
-        temp <- temp[-1,]
69
-        temp$Cluster <- NULL
70
-        coef.dataset <- cbind(coef.dataset, temp)
71
-        ni <- ni + 1
72
-    }
73
-
74
-    # Calculating mean coefficient per feature across clusters
75
-    coef.dataset <- filter(coef.dataset,
76
-                           rowSums(abs(across(where(is.numeric))))!=0)
77
-    coef.dataset$means <- rowMeans(coef.dataset)
78
-    coef.dataset <- coef.dataset[with(coef.dataset, order(abs(means),
79
-                                                          decreasing = TRUE)),]
80
-    coef.dataset$features <- rownames(coef.dataset)
81
-    coef.dataset$means <- NULL #addition
82
-
83
-    # retain top 30%
84
-    coef.dataset <- coef.dataset[seq_len(round(dim(coef.dataset)[1]*0.3,
85
-                                               digits = 0)),]
86
-    coef.data.melt <- reshape::melt(coef.dataset)
87
-
88
-    coef.30perc <- ggplot2::ggplot(data = coef.data.melt,
89
-                                   aes(x = features, y = value,
90
-                                                     fill = variable)) +
91
-        geom_bar(stat = "identity") +
92
-        theme(axis.title.x=element_blank(),
93
-              axis.text.x = element_text(angle=45, vjust = 1, hjust = 1,
94
-                                         size = 12),
95
-              plot.title = element_text(hjust = 0.5),
96
-              axis.title.y = element_text(size = 15),
97
-              legend.position = "none") +
98
-        geom_hline(yintercept=0, linetype="dashed", color = "red") +
99
-        labs(title = "Coefficients") +
100
-        facet_grid(variable~.)
101
-
102
-    geneSignature <-
103
-        function(coefficient.dataset = coef.dataset,
104
-                 top30percent.coefficients = coef.30perc){
105
-
106
-            gs <- list(coefficient.dataset = coefficient.dataset,
107
-                       top30percent.coefficients = top30percent.coefficients)
108
-
109
-            ## Set the name for the class
110
-            class(gs) <- "geneSignature"
111
-
112
-            return(gs)
113
-        }
114
-
115
-    gene.signature <- geneSignature()
116
-
117
-    return(gene.signature)
118
-}
119 0
deleted file mode 100644
... ...
@@ -1,12 +0,0 @@
1
-#' Plot of the highest 30 percentage of coefficients per cluster
2
-#'
3
-#' @param object An object of class "geneSignature"
4
-#' @return A plot of the highest 30 percentage of coefficients per cluster
5
-#' @export
6
-get_top30percent_coefficients <- function(object) {
7
-    UseMethod("get_top30percent_coefficients")
8
-}
9
-#' @export
10
-get_top30percent_coefficients.geneSignature <- function(object) {
11
-    object$top30percent.coefficients
12
-}
... ...
@@ -86,7 +86,7 @@ omada <- function(data, method.upper.k = 5) {
86 86
     # data$id <- rownames(data)
87 87
     gene.signature.results <- geneSignatures(data, memberships)
88 88
     gs.matrix <- get_coefficient_dataset(gene.signature.results)
89
-    gs.plot <- plot_top30percent_coefficients(gene.signature.results)
89
+    gs.plot <- plot_top_coefficients(gene.signature.results)
90 90
 
91 91
     clusterAnalysis <- function(partition.agreement.scores=pa.df,
92 92
                                 partition.agreement.plot=pa.plot,
93 93
deleted file mode 100644
... ...
@@ -1,16 +0,0 @@
1
-#' Plot of the highest 30 percentage of coefficients per cluster
2
-#'
3
-#' @param object An object of class "geneSignature"
4
-#' @return A plot of the highest 30 percentage of coefficients per cluster
5
-#' @export
6
-#'
7
-#' @examples
8
-#' gs.object <- geneSignatures(toy_genes, toy_gene_memberships)
9
-#' plot_top30percent_coefficients(gs.object)
10
-plot_top30percent_coefficients <- function(object) {
11
-    UseMethod("plot_top30percent_coefficients")
12
-}
13
-#' @export
14
-plot_top30percent_coefficients.geneSignature <- function(object) {
15
-    object$top30percent.coefficients
16
-}
... ...
@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/gene_singatures.R
2
+% Please edit documentation in R/gene_signatures.R
3 3
 \name{geneSignatures}
4 4
 \alias{geneSignatures}
5 5
 \title{Generating the feature/gene signature per cluster}
6 6
deleted file mode 100644
... ...
@@ -1,17 +0,0 @@
1
-% Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/get_top30percent_coefficients.R
3
-\name{get_top30percent_coefficients}
4
-\alias{get_top30percent_coefficients}
5
-\title{Plot of the highest 30 percentage of coefficients per cluster}
6
-\usage{
7
-get_top30percent_coefficients(object)
8
-}
9
-\arguments{
10
-\item{object}{An object of class "geneSignature"}
11
-}
12
-\value{
13
-A plot of the highest 30 percentage of coefficients per cluster
14
-}
15
-\description{
16
-Plot of the highest 30 percentage of coefficients per cluster
17
-}
18 0
deleted file mode 100644
... ...
@@ -1,21 +0,0 @@
1
-% Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/plot_top30percent_coefficients.R
3
-\name{plot_top30percent_coefficients}
4
-\alias{plot_top30percent_coefficients}
5
-\title{Plot of the highest 30 percentage of coefficients per cluster}
6
-\usage{
7
-plot_top30percent_coefficients(object)
8
-}
9
-\arguments{
10
-\item{object}{An object of class "geneSignature"}
11
-}
12
-\value{
13
-A plot of the highest 30 percentage of coefficients per cluster
14
-}
15
-\description{
16
-Plot of the highest 30 percentage of coefficients per cluster
17
-}
18
-\examples{
19
-gs.object <- geneSignatures(toy_genes, toy_gene_memberships)
20
-plot_top30percent_coefficients(gs.object)
21
-}
... ...
@@ -165,6 +165,6 @@ signature.results <- geneSignatures(toy_genes, toy_gene_memberships)
165 165
 signature.dataframe <- get_coefficient_dataset(signature.results)
166 166
 
167 167
 # Plot results
168
-plot_top30percent_coefficients(signature.results)
168
+plot_top_coefficients(signature.results)
169 169
 ```
170 170