... | ... |
@@ -2,7 +2,7 @@ Package: evaluomeR |
2 | 2 |
Type: Package |
3 | 3 |
Title: Evaluation of Bioinformatics Metrics |
4 | 4 |
URL: http://sele.inf.um.es/evaluome/index.html |
5 |
-Version: 0.99.2 |
|
5 |
+Version: 0.99.3 |
|
6 | 6 |
Author: José Antonio Bernabé-Díaz [aut, cre], Manuel Franco [aut], Juana-María Vivo [aut], Manuel Quesada-Martínez [aut], Astrid Duque-Ramos [aut], Jesualdo Tomás Fernández-Breis [aut]. |
7 | 7 |
Maintainer: José Antonio Bernabé Díaz <[email protected]> |
8 | 8 |
Description: Evaluating the reliability of your own metrics |
... | ... |
@@ -12,7 +12,7 @@ Description: Evaluating the reliability of your own metrics |
12 | 12 |
License: GPL-3 |
13 | 13 |
Encoding: UTF-8 |
14 | 14 |
LazyData: true |
15 |
-Depends: R (>= 3.6.0) |
|
15 |
+Depends: R (>= 3.6.0), SummarizedExperiment |
|
16 | 16 |
Imports: |
17 | 17 |
fpc (>= 2.1-11.1), |
18 | 18 |
cluster (>= 2.0.7-1), |
... | ... |
@@ -21,8 +21,6 @@ Imports: |
21 | 21 |
graphics, |
22 | 22 |
stats, |
23 | 23 |
utils, |
24 |
- SummarizedExperiment, |
|
25 |
- airway, |
|
26 | 24 |
Rdpack |
27 | 25 |
Suggests: BiocStyle, knitr, rmarkdown, kableExtra, magrittr |
28 | 26 |
VignetteBuilder: knitr |
... | ... |
@@ -13,7 +13,7 @@ importFrom("utils", "capture.output") |
13 | 13 |
importFrom("utils", "read.csv") |
14 | 14 |
importFrom("SummarizedExperiment", "assays") |
15 | 15 |
importFrom("SummarizedExperiment", "assay") |
16 |
-import(airway) |
|
16 |
+importFrom("SummarizedExperiment", "SummarizedExperiment") |
|
17 | 17 |
|
18 | 18 |
export(stability) |
19 | 19 |
export(stabilityRange) |
... | ... |
@@ -22,4 +22,3 @@ export(qualityRange) |
22 | 22 |
export(correlations) |
23 | 23 |
export(loadSample) |
24 | 24 |
export(getDataQualityRange) |
25 |
-export(seToDataFrame) |
... | ... |
@@ -1,3 +1,6 @@ |
1 |
+Changes in version 0.99.3 (2019-04-16) |
|
2 |
++ All methods depend on `SummarizedExperiment` as input/output object. |
|
3 |
+ |
|
1 | 4 |
Changes in version 0.99.2 (2019-04-15) |
2 | 5 |
+ SummarizedExperiment can be now processed via `seToDataFrame` method. Adding SummarizedExperiment and airway dependencies. |
3 | 6 |
|
... | ... |
@@ -6,7 +6,7 @@ |
6 | 6 |
#' |
7 | 7 |
#' @param descriptor Sample file to load: "ont-metrics", "rna-metrics" or "biopathways-metrics". |
8 | 8 |
#' |
9 |
-#' @return The dataset specified via \code{descriptor} as a dataframe. |
|
9 |
+#' @return The \code{\link{SummarizedExperiment}} specified via \code{descriptor}. |
|
10 | 10 |
#' |
11 | 11 |
#' @examples |
12 | 12 |
#' # Using example data from our package |
... | ... |
@@ -16,7 +16,8 @@ loadSample <- function(descriptor) { |
16 | 16 |
samples <- c('ont-metrics','rna-metrics','biopathways-metrics') |
17 | 17 |
if (is.element(descriptor, samples)) { |
18 | 18 |
dataFrame <- read.csv(file=system.file('extdata',descriptor, package="evaluomeR"), header=TRUE); |
19 |
- return(dataFrame) |
|
19 |
+ se <- createSE(dataFrame) |
|
20 |
+ return(se) |
|
20 | 21 |
} else { |
21 | 22 |
stop("Invalid descriptor") |
22 | 23 |
} |
... | ... |
@@ -26,20 +27,20 @@ loadSample <- function(descriptor) { |
26 | 27 |
#' @name getDataQualityRange |
27 | 28 |
#' @aliases getDataQualityRange |
28 | 29 |
#' @description |
29 |
-#' This method is a wrapper to retrieve a specific dataframe given a \code{k} value from |
|
30 |
+#' This method is a wrapper to retrieve a specific \code{\link{SummarizedExperiment}} given a \code{k} value from |
|
30 | 31 |
#' the object returned by \code{\link{qualityRange}} function. |
31 | 32 |
#' |
32 | 33 |
#' @param data The object returned by \code{\link{qualityRange}} function. |
33 | 34 |
#' @param k The desired \code{k} cluster. |
34 | 35 |
#' |
35 |
-#' @return The dataframe that contains information about the selected \code{k} cluster. |
|
36 |
+#' @return The \code{\link{SummarizedExperiment}} that contains information about the selected \code{k} cluster. |
|
36 | 37 |
#' |
37 | 38 |
#' @examples |
38 | 39 |
#' # Using example data from our package |
39 | 40 |
#' metrics = loadSample("ont-metrics") |
40 | 41 |
#' qualityRangeData <- qualityRange(data=metrics, k.range=c(3,5), getImages = FALSE) |
41 | 42 |
#' # Getting dataframe that contains information about k=5 |
42 |
-#' k5DataFrame = getDataQualityRange(qualityRangeData, 5) |
|
43 |
+#' k5Data = getDataQualityRange(qualityRangeData, 5) |
|
43 | 44 |
#' |
44 | 45 |
getDataQualityRange <- function(data, k) { |
45 | 46 |
dataNames = names(data) |
... | ... |
@@ -50,6 +51,7 @@ getDataQualityRange <- function(data, k) { |
50 | 51 |
|
51 | 52 |
if (k >= kValues[1] && k <= kValues[kValues.length]) { |
52 | 53 |
column = paste("k_", k, sep="") |
54 |
+ |
|
53 | 55 |
return(data[[column]]) |
54 | 56 |
} else { |
55 | 57 |
error=paste("Selected k (",k,") is not in the range of k.range [" |
... | ... |
@@ -58,45 +60,6 @@ getDataQualityRange <- function(data, k) { |
58 | 60 |
} |
59 | 61 |
} |
60 | 62 |
|
61 |
-#' @title SummarizedExperiment to Dataframe |
|
62 |
-#' @name seToDataFrame |
|
63 |
-#' @aliases seToDataFrame |
|
64 |
-#' @description |
|
65 |
-#' This method is a wrapper to transform a SummarizedExperiment object to a |
|
66 |
-#' Dataframe processable in our methods. |
|
67 |
-#' |
|
68 |
-#' @param SummarizedExperiment A \code{SummarizedExperiment} object |
|
69 |
-#' (see \code{\link{SummarizedExperiment}}). |
|
70 |
-#' |
|
71 |
-#' @return The dataframe that contains information of the first |
|
72 |
-#' assay in \code{SummarizedExperiment}. |
|
73 |
-#' |
|
74 |
-#' @examples |
|
75 |
-#' # Using example data from airway package |
|
76 |
-#' library(airway) |
|
77 |
-#' data(airway) |
|
78 |
-#' airwayData = seToDataFrame(airway) |
|
79 |
-#' airwayData = airwayData[1:10000,1:4] |
|
80 |
-#' stability(airwayData, bs = 20, getImages=FALSE) |
|
81 |
-#' correlations(airwayData, getImages=FALSE) |
|
82 |
-#' |
|
83 |
-seToDataFrame <- function(SummarizedExperiment) { |
|
84 |
- se=SummarizedExperiment |
|
85 |
- if (length(assays(se)) == 0) { |
|
86 |
- stop("SummarizedExperiment has no assays, length is 0") |
|
87 |
- } |
|
88 |
- test = assay(se,1) |
|
89 |
- Datasets <- NULL |
|
90 |
- if (is.null(rownames(test))) { |
|
91 |
- Datasets <- paste("Dataset_", c(1:length(test[,1])), sep="") |
|
92 |
- } else { |
|
93 |
- Datasets <- rownames(test) |
|
94 |
- } |
|
95 |
- |
|
96 |
- test <- data.frame(Datasets,test) |
|
97 |
- return(test) |
|
98 |
-} |
|
99 |
- |
|
100 | 63 |
##################### |
101 | 64 |
## Private methods ## |
102 | 65 |
##################### |
... | ... |
@@ -141,3 +104,61 @@ checkDirectory <- function(path) { |
141 | 104 |
} |
142 | 105 |
return(path) |
143 | 106 |
} |
107 |
+ |
|
108 |
+getAssay <- function(SummarizedExperiment, position) { |
|
109 |
+ se=SummarizedExperiment |
|
110 |
+ se.length <- length(assays(se)) |
|
111 |
+ if (se.length == 0) { |
|
112 |
+ stop("SummarizedExperiment has no assays, length is 0") |
|
113 |
+ } |
|
114 |
+ if (position > se.length) { |
|
115 |
+ error <- paste("SummarizedExperiment has no assay in position ", |
|
116 |
+ position, sep="") |
|
117 |
+ stop(error) |
|
118 |
+ } |
|
119 |
+ test = assay(se, position) |
|
120 |
+ # Datasets <- test[,1] |
|
121 |
+ # if (is.null(rownames(test))) { |
|
122 |
+ # Datasets <- paste("Dataset_", c(1:length(test[,1])), sep="") |
|
123 |
+ # } else { |
|
124 |
+ # Datasets <- rownames(test) |
|
125 |
+ # } |
|
126 |
+ |
|
127 |
+ # test <- data.frame(Datasets,test) |
|
128 |
+ test <- data.frame(test) |
|
129 |
+ names(test) <- colnames(test) |
|
130 |
+ return(test) |
|
131 |
+} |
|
132 |
+ |
|
133 |
+# data: One dataframe, thus one assay |
|
134 |
+createSE <- function(data) { |
|
135 |
+ nrows <- nrow(data); ncols <- ncol(data) |
|
136 |
+ counts <- data.matrix(data) |
|
137 |
+ colnames(counts) <- NULL |
|
138 |
+ colData <- DataFrame(metrics=colnames(data), |
|
139 |
+ row.names=colnames(data)) |
|
140 |
+ se <- SummarizedExperiment(assays=SimpleList(counts), |
|
141 |
+ colData=colData) |
|
142 |
+ return(se) |
|
143 |
+} |
|
144 |
+ |
|
145 |
+# data: A list of dataframes |
|
146 |
+createSEList <- function(data) { |
|
147 |
+ if (!is.list(data)) { |
|
148 |
+ stop("Input variable is not a list") |
|
149 |
+ } |
|
150 |
+ if (length(data) == 0) { |
|
151 |
+ stop("Input variable is an empty list") |
|
152 |
+ } |
|
153 |
+ length = length(names(data)) |
|
154 |
+ seList <- list() |
|
155 |
+ for (i in 1:length) { |
|
156 |
+ cur.data <- data[[i]] |
|
157 |
+ dataMatrix <- suppressWarnings(data.matrix(cur.data)) |
|
158 |
+ dataMatrix[,1] <- cur.data$Metric |
|
159 |
+ se <- createSE(dataMatrix) |
|
160 |
+ seList <- c(seList, se) |
|
161 |
+ } |
|
162 |
+ names(seList) <- names(data) |
|
163 |
+ return(seList) |
|
164 |
+} |
... | ... |
@@ -23,7 +23,7 @@ |
23 | 23 |
#' |
24 | 24 |
#' @inheritParams stability |
25 | 25 |
#' |
26 |
-#' @return A dataframe containing the silhouette width measurements and |
|
26 |
+#' @return A \code{\link{SummarizedExperiment}} containing the silhouette width measurements and |
|
27 | 27 |
#' cluster sizes for cluster \code{k}. |
28 | 28 |
#' |
29 | 29 |
#' @examples |
... | ... |
@@ -43,6 +43,7 @@ quality <- function(data, k=5, getImages=TRUE, |
43 | 43 |
if (!is.null(label)) { |
44 | 44 |
isString(label) |
45 | 45 |
} |
46 |
+ data <- getAssay(data, 1) |
|
46 | 47 |
|
47 | 48 |
cur.env <- new.env() |
48 | 49 |
suppressWarnings( |
... | ... |
@@ -59,7 +60,8 @@ quality <- function(data, k=5, getImages=TRUE, |
59 | 60 |
suppressWarnings( |
60 | 61 |
runSilhouetteIMG(data, k, label, path, cur.env)) |
61 | 62 |
} |
62 |
- return(silhouetteDataFrame) |
|
63 |
+ se <- createSE(silhouetteDataFrame) |
|
64 |
+ return(se) |
|
63 | 65 |
|
64 | 66 |
} |
65 | 67 |
|
... | ... |
@@ -92,7 +94,7 @@ quality <- function(data, k=5, getImages=TRUE, |
92 | 94 |
#' whilst the second one, \code{k.range[2]}, as the higher. Both values must be |
93 | 95 |
#' contained in [2,15] range. |
94 | 96 |
#' |
95 |
-#' @return A list of dataframes containing the silhouette width measurements and |
|
97 |
+#' @return A list of \code{\link{SummarizedExperiment}} containing the silhouette width measurements and |
|
96 | 98 |
#' cluster sizes from \code{k.range[1]} to \code{k.range[2]}. The position on the list matches |
97 | 99 |
#' with the k-value used in that dataframe. For instance, position 5 |
98 | 100 |
#' represents the dataframe with k = 5. |
... | ... |
@@ -125,7 +127,7 @@ qualityRange <- function(data, k.range=c(3,5), getImages=TRUE, |
125 | 127 |
if (!is.null(label)) { |
126 | 128 |
isString(label) |
127 | 129 |
} |
128 |
- |
|
130 |
+ data <- getAssay(data, 1) |
|
129 | 131 |
cur.env <- new.env() |
130 | 132 |
|
131 | 133 |
suppressWarnings( |
... | ... |
@@ -144,7 +146,8 @@ qualityRange <- function(data, k.range=c(3,5), getImages=TRUE, |
144 | 146 |
runQualityIndicesSilhouetteMetric_IMG(k.min = k.min, k.max = k.max, |
145 | 147 |
label, path, cur.env)) |
146 | 148 |
} |
147 |
- return(silhouetteData) |
|
149 |
+ seList <- createSEList(silhouetteData) |
|
150 |
+ return(seList) |
|
148 | 151 |
} |
149 | 152 |
|
150 | 153 |
runQualityIndicesSilhouette <- function(data, k.min, k.max, bs, env) { |
... | ... |
@@ -15,9 +15,11 @@ |
15 | 15 |
#' \item Highly Stable: ]0.85, 1]. |
16 | 16 |
#' } |
17 | 17 |
#' |
18 |
-#' @param data A matrix. The first row is the header. The first |
|
19 |
-#' column of the header is the ID or name of the instance of the dataset |
|
20 |
-#' (e.g., ontology, pathway, etc.) on which the metrics are measured. |
|
18 |
+#' @param data A \code{\link{SummarizedExperiment}}. |
|
19 |
+#' The SummarizedExperiment must contain an assay with the following structure: |
|
20 |
+#' A valid header with names. The first column of the header is the ID or name |
|
21 |
+#' of the instance of the dataset (e.g., ontology, pathway, etc.) on which the |
|
22 |
+#' metrics are measured. |
|
21 | 23 |
#' The other columns of the header contains the names of the metrics. |
22 | 24 |
#' The rows contains the measurements of the metrics for each instance in the dataset. |
23 | 25 |
#' @param k Positive integer. Number of clusters between [2,15] range. |
... | ... |
@@ -26,15 +28,15 @@ |
26 | 28 |
#' @param label String. If not NULL, the label will appear on the title of the plots. |
27 | 29 |
#' @param path String. Path to a valid directory where plots are saved. |
28 | 30 |
#' |
29 |
-#' @return A dataframe containing the stability measurements and |
|
30 |
-#' means for 1 to k clusters. |
|
31 |
+#' @return A \code{\link{SummarizedExperiment}}, |
|
32 |
+#' containing an assay with the stability measurements and means for 1 to k clusters. |
|
31 | 33 |
#' |
32 | 34 |
#' @examples |
33 | 35 |
#' # Using example data from our package |
34 |
-#' metrics = loadSample("ont-metrics") |
|
35 |
-#' result = stability(data=metrics, k=4, getImages=TRUE) |
|
36 |
-#' result = stability(metrics, k=6, getImages=FALSE) |
|
37 |
-#' result = stability(metrics, k=6, getImages=TRUE, label="Experiment 1:") |
|
36 |
+#' metrics <- loadSample("ont-metrics") |
|
37 |
+#' result <- stability(data=metrics, k=4, getImages=TRUE) |
|
38 |
+#' result <- stability(metrics, k=6, getImages=FALSE) |
|
39 |
+#' result <- stability(metrics, k=6, getImages=TRUE, label="Experiment 1:") |
|
38 | 40 |
#' |
39 | 41 |
#' @references |
40 | 42 |
#' \insertRef{milligan1996measuring}{evaluomeR} |
... | ... |
@@ -45,6 +47,8 @@ |
45 | 47 |
stability <- function(data, k=5, bs=100, getImages=TRUE, |
46 | 48 |
label=NULL, path=NULL) { |
47 | 49 |
|
50 |
+ data <- getAssay(data, 1) |
|
51 |
+ |
|
48 | 52 |
checkKValue(k) |
49 | 53 |
if (!is.null(label)) { |
50 | 54 |
isString(label) |
... | ... |
@@ -64,8 +68,8 @@ stability <- function(data, k=5, bs=100, getImages=TRUE, |
64 | 68 |
runStabilityIndexK_IMG(bs, k.min = k, k.max = k, |
65 | 69 |
label, path, cur.env)) |
66 | 70 |
} |
67 |
- |
|
68 |
- return(stabilityDataFrame) |
|
71 |
+ se <- createSE(stabilityDataFrame) |
|
72 |
+ return(se) |
|
69 | 73 |
} |
70 | 74 |
|
71 | 75 |
|
... | ... |
@@ -92,12 +96,13 @@ stability <- function(data, k=5, bs=100, getImages=TRUE, |
92 | 96 |
#' whilst the second one, \code{k.range[2]}, as the higher. Both values must be |
93 | 97 |
#' contained in [2,15] range. |
94 | 98 |
#' |
95 |
-#' @return A dataframe containing the stability measurements and |
|
99 |
+#' @return A \code{\link{SummarizedExperiment}} containing the stability measurements and |
|
96 | 100 |
#' means for 1 to k clusters. |
97 | 101 |
#' |
98 | 102 |
#' @examples |
99 | 103 |
#' # Using example data from our package |
100 |
-#' metrics = loadSample("ont-metrics") |
|
104 |
+#' metrics <- loadSample("ont-metrics") |
|
105 |
+#' result <- stabilityRange(metrics, k.range=c(2,3)) |
|
101 | 106 |
#' |
102 | 107 |
#' @references |
103 | 108 |
#' \insertRef{milligan1996measuring}{evaluomeR} |
... | ... |
@@ -121,6 +126,7 @@ stabilityRange <- function(data, k.range=c(2,15), bs=100, |
121 | 126 |
if (!is.null(label)) { |
122 | 127 |
isString(label) |
123 | 128 |
} |
129 |
+ data <- getAssay(data, 1) |
|
124 | 130 |
|
125 | 131 |
cur.env <- new.env() |
126 | 132 |
suppressWarnings( |
... | ... |
@@ -139,7 +145,8 @@ stabilityRange <- function(data, k.range=c(2,15), bs=100, |
139 | 145 |
runStabilityIndexMetric_IMG(bs, k.min=k.min, k.max=k.max, |
140 | 146 |
label, path, cur.env)) |
141 | 147 |
} |
142 |
- return(stabilityDataFrame) |
|
148 |
+ se <- createSE(stabilityDataFrame) |
|
149 |
+ return(se) |
|
143 | 150 |
} |
144 | 151 |
|
145 | 152 |
runStabilityIndex <- function(data, k.min, k.max, bs, env) { |
... | ... |
@@ -9,6 +9,13 @@ |
9 | 9 |
publisher={Springer} |
10 | 10 |
} |
11 | 11 |
|
12 |
+@Manual{summarizedExperiment, |
|
13 |
+ title = {SummarizedExperiment: SummarizedExperiment container}, |
|
14 |
+ author = {Martin Morgan and Valerie Obenchain and Jim Hester and Hervé Pagès}, |
|
15 |
+ year = {2018}, |
|
16 |
+ note = {R package version 1.12.0}, |
|
17 |
+} |
|
18 |
+ |
|
12 | 19 |
@article{jaccard1901distribution, |
13 | 20 |
title={Distribution de la flore alpine dans le bassin des Dranses et dans quelques r{\'e}gions voisines}, |
14 | 21 |
author={Jaccard, Paul}, |
... | ... |
@@ -8,9 +8,11 @@ correlations(data, margins = c(0, 10, 9, 11), getImages = TRUE, |
8 | 8 |
label = NULL, path = NULL) |
9 | 9 |
} |
10 | 10 |
\arguments{ |
11 |
-\item{data}{A matrix. The first row is the header. The first |
|
12 |
-column of the header is the ID or name of the instance of the dataset |
|
13 |
-(e.g., ontology, pathway, etc.) on which the metrics are measured. |
|
11 |
+\item{data}{A \code{\link{SummarizedExperiment}}. |
|
12 |
+The SummarizedExperiment must contain an assay with the following structure: |
|
13 |
+A valid header with names. The first column of the header is the ID or name |
|
14 |
+of the instance of the dataset (e.g., ontology, pathway, etc.) on which the |
|
15 |
+metrics are measured. |
|
14 | 16 |
The other columns of the header contains the names of the metrics. |
15 | 17 |
The rows contains the measurements of the metrics for each instance in the dataset.} |
16 | 18 |
|
... | ... |
@@ -12,10 +12,10 @@ getDataQualityRange(data, k) |
12 | 12 |
\item{k}{The desired \code{k} cluster.} |
13 | 13 |
} |
14 | 14 |
\value{ |
15 |
-The dataframe that contains information about the selected \code{k} cluster. |
|
15 |
+The \code{\link{SummarizedExperiment}} that contains information about the selected \code{k} cluster. |
|
16 | 16 |
} |
17 | 17 |
\description{ |
18 |
-This method is a wrapper to retrieve a specific dataframe given a \code{k} value from |
|
18 |
+This method is a wrapper to retrieve a specific \code{\link{SummarizedExperiment}} given a \code{k} value from |
|
19 | 19 |
the object returned by \code{\link{qualityRange}} function. |
20 | 20 |
} |
21 | 21 |
\examples{ |
... | ... |
@@ -23,6 +23,6 @@ the object returned by \code{\link{qualityRange}} function. |
23 | 23 |
metrics = loadSample("ont-metrics") |
24 | 24 |
qualityRangeData <- qualityRange(data=metrics, k.range=c(3,5), getImages = FALSE) |
25 | 25 |
# Getting dataframe that contains information about k=5 |
26 |
-k5DataFrame = getDataQualityRange(qualityRangeData, 5) |
|
26 |
+k5Data = getDataQualityRange(qualityRangeData, 5) |
|
27 | 27 |
|
28 | 28 |
} |
... | ... |
@@ -10,7 +10,7 @@ loadSample(descriptor) |
10 | 10 |
\item{descriptor}{Sample file to load: "ont-metrics", "rna-metrics" or "biopathways-metrics".} |
11 | 11 |
} |
12 | 12 |
\value{ |
13 |
-The dataset specified via \code{descriptor} as a dataframe. |
|
13 |
+The \code{\link{SummarizedExperiment}} specified via \code{descriptor}. |
|
14 | 14 |
} |
15 | 15 |
\description{ |
16 | 16 |
This method is a wrapper to load sample input data located inside evaluomeR package. |
... | ... |
@@ -7,9 +7,11 @@ |
7 | 7 |
quality(data, k = 5, getImages = TRUE, label = NULL, path = NULL) |
8 | 8 |
} |
9 | 9 |
\arguments{ |
10 |
-\item{data}{A matrix. The first row is the header. The first |
|
11 |
-column of the header is the ID or name of the instance of the dataset |
|
12 |
-(e.g., ontology, pathway, etc.) on which the metrics are measured. |
|
10 |
+\item{data}{A \code{\link{SummarizedExperiment}}. |
|
11 |
+The SummarizedExperiment must contain an assay with the following structure: |
|
12 |
+A valid header with names. The first column of the header is the ID or name |
|
13 |
+of the instance of the dataset (e.g., ontology, pathway, etc.) on which the |
|
14 |
+metrics are measured. |
|
13 | 15 |
The other columns of the header contains the names of the metrics. |
14 | 16 |
The rows contains the measurements of the metrics for each instance in the dataset.} |
15 | 17 |
|
... | ... |
@@ -22,7 +24,7 @@ The rows contains the measurements of the metrics for each instance in the datas |
22 | 24 |
\item{path}{String. Path to a valid directory where plots are saved.} |
23 | 25 |
} |
24 | 26 |
\value{ |
25 |
-A dataframe containing the silhouette width measurements and |
|
27 |
+A \code{\link{SummarizedExperiment}} containing the silhouette width measurements and |
|
26 | 28 |
cluster sizes for cluster \code{k}. |
27 | 29 |
} |
28 | 30 |
\description{ |
... | ... |
@@ -8,9 +8,11 @@ qualityRange(data, k.range = c(3, 5), getImages = TRUE, label = NULL, |
8 | 8 |
path = NULL) |
9 | 9 |
} |
10 | 10 |
\arguments{ |
11 |
-\item{data}{A matrix. The first row is the header. The first |
|
12 |
-column of the header is the ID or name of the instance of the dataset |
|
13 |
-(e.g., ontology, pathway, etc.) on which the metrics are measured. |
|
11 |
+\item{data}{A \code{\link{SummarizedExperiment}}. |
|
12 |
+The SummarizedExperiment must contain an assay with the following structure: |
|
13 |
+A valid header with names. The first column of the header is the ID or name |
|
14 |
+of the instance of the dataset (e.g., ontology, pathway, etc.) on which the |
|
15 |
+metrics are measured. |
|
14 | 16 |
The other columns of the header contains the names of the metrics. |
15 | 17 |
The rows contains the measurements of the metrics for each instance in the dataset.} |
16 | 18 |
|
... | ... |
@@ -26,7 +28,7 @@ contained in [2,15] range.} |
26 | 28 |
\item{path}{String. Path to a valid directory where plots are saved.} |
27 | 29 |
} |
28 | 30 |
\value{ |
29 |
-A list of dataframes containing the silhouette width measurements and |
|
31 |
+A list of \code{\link{SummarizedExperiment}} containing the silhouette width measurements and |
|
30 | 32 |
cluster sizes from \code{k.range[1]} to \code{k.range[2]}. The position on the list matches |
31 | 33 |
with the k-value used in that dataframe. For instance, position 5 |
32 | 34 |
represents the dataframe with k = 5. |
33 | 35 |
deleted file mode 100755 |
... | ... |
@@ -1,30 +0,0 @@ |
1 |
-% Generated by roxygen2: do not edit by hand |
|
2 |
-% Please edit documentation in R/helpers.R |
|
3 |
-\name{seToDataFrame} |
|
4 |
-\alias{seToDataFrame} |
|
5 |
-\title{SummarizedExperiment to Dataframe} |
|
6 |
-\usage{ |
|
7 |
-seToDataFrame(SummarizedExperiment) |
|
8 |
-} |
|
9 |
-\arguments{ |
|
10 |
-\item{SummarizedExperiment}{A \code{SummarizedExperiment} object |
|
11 |
-(see \code{\link{SummarizedExperiment}}).} |
|
12 |
-} |
|
13 |
-\value{ |
|
14 |
-The dataframe that contains information of the first |
|
15 |
-assay in \code{SummarizedExperiment}. |
|
16 |
-} |
|
17 |
-\description{ |
|
18 |
-This method is a wrapper to transform a SummarizedExperiment object to a |
|
19 |
-Dataframe processable in our methods. |
|
20 |
-} |
|
21 |
-\examples{ |
|
22 |
-# Using example data from airway package |
|
23 |
-library(airway) |
|
24 |
-data(airway) |
|
25 |
-airwayData = seToDataFrame(airway) |
|
26 |
-airwayData = airwayData[1:10000,1:4] |
|
27 |
-stability(airwayData, bs = 20, getImages=FALSE) |
|
28 |
-correlations(airwayData, getImages=FALSE) |
|
29 |
- |
|
30 |
-} |
... | ... |
@@ -8,9 +8,11 @@ stability(data, k = 5, bs = 100, getImages = TRUE, label = NULL, |
8 | 8 |
path = NULL) |
9 | 9 |
} |
10 | 10 |
\arguments{ |
11 |
-\item{data}{A matrix. The first row is the header. The first |
|
12 |
-column of the header is the ID or name of the instance of the dataset |
|
13 |
-(e.g., ontology, pathway, etc.) on which the metrics are measured. |
|
11 |
+\item{data}{A \code{\link{SummarizedExperiment}}. |
|
12 |
+The SummarizedExperiment must contain an assay with the following structure: |
|
13 |
+A valid header with names. The first column of the header is the ID or name |
|
14 |
+of the instance of the dataset (e.g., ontology, pathway, etc.) on which the |
|
15 |
+metrics are measured. |
|
14 | 16 |
The other columns of the header contains the names of the metrics. |
15 | 17 |
The rows contains the measurements of the metrics for each instance in the dataset.} |
16 | 18 |
|
... | ... |
@@ -25,8 +27,8 @@ The rows contains the measurements of the metrics for each instance in the datas |
25 | 27 |
\item{path}{String. Path to a valid directory where plots are saved.} |
26 | 28 |
} |
27 | 29 |
\value{ |
28 |
-A dataframe containing the stability measurements and |
|
29 |
-means for 1 to k clusters. |
|
30 |
+A \code{\link{SummarizedExperiment}}, |
|
31 |
+containing an assay with the stability measurements and means for 1 to k clusters. |
|
30 | 32 |
} |
31 | 33 |
\description{ |
32 | 34 |
This analysis permits to estimate whether the clustering is meaningfully |
... | ... |
@@ -44,10 +46,10 @@ having the following meaning: |
44 | 46 |
} |
45 | 47 |
\examples{ |
46 | 48 |
# Using example data from our package |
47 |
-metrics = loadSample("ont-metrics") |
|
48 |
-result = stability(data=metrics, k=4, getImages=TRUE) |
|
49 |
-result = stability(metrics, k=6, getImages=FALSE) |
|
50 |
-result = stability(metrics, k=6, getImages=TRUE, label="Experiment 1:") |
|
49 |
+metrics <- loadSample("ont-metrics") |
|
50 |
+result <- stability(data=metrics, k=4, getImages=TRUE) |
|
51 |
+result <- stability(metrics, k=6, getImages=FALSE) |
|
52 |
+result <- stability(metrics, k=6, getImages=TRUE, label="Experiment 1:") |
|
51 | 53 |
|
52 | 54 |
} |
53 | 55 |
\references{ |
... | ... |
@@ -8,9 +8,11 @@ stabilityRange(data, k.range = c(2, 15), bs = 100, getImages = TRUE, |
8 | 8 |
label = NULL, path = NULL) |
9 | 9 |
} |
10 | 10 |
\arguments{ |
11 |
-\item{data}{A matrix. The first row is the header. The first |
|
12 |
-column of the header is the ID or name of the instance of the dataset |
|
13 |
-(e.g., ontology, pathway, etc.) on which the metrics are measured. |
|
11 |
+\item{data}{A \code{\link{SummarizedExperiment}}. |
|
12 |
+The SummarizedExperiment must contain an assay with the following structure: |
|
13 |
+A valid header with names. The first column of the header is the ID or name |
|
14 |
+of the instance of the dataset (e.g., ontology, pathway, etc.) on which the |
|
15 |
+metrics are measured. |
|
14 | 16 |
The other columns of the header contains the names of the metrics. |
15 | 17 |
The rows contains the measurements of the metrics for each instance in the dataset.} |
16 | 18 |
|
... | ... |
@@ -28,7 +30,7 @@ contained in [2,15] range.} |
28 | 30 |
\item{path}{String. Path to a valid directory where plots are saved.} |
29 | 31 |
} |
30 | 32 |
\value{ |
31 |
-A dataframe containing the stability measurements and |
|
33 |
+A \code{\link{SummarizedExperiment}} containing the stability measurements and |
|
32 | 34 |
means for 1 to k clusters. |
33 | 35 |
} |
34 | 36 |
\description{ |
... | ... |
@@ -47,7 +49,8 @@ having the following meaning: |
47 | 49 |
} |
48 | 50 |
\examples{ |
49 | 51 |
# Using example data from our package |
50 |
-metrics = loadSample("ont-metrics") |
|
52 |
+metrics <- loadSample("ont-metrics") |
|
53 |
+result <- stabilityRange(metrics, k.range=c(2,3)) |
|
51 | 54 |
|
52 | 55 |
} |
53 | 56 |
\references{ |
... | ... |
@@ -29,10 +29,11 @@ vignette: > |
29 | 29 |
%\VignetteEncoding{UTF-8} |
30 | 30 |
--- |
31 | 31 |
|
32 |
-```{r style, echo = FALSE, results = 'asis'} |
|
32 |
+```{r style, include=FALSE, results='hide'} |
|
33 | 33 |
BiocStyle::markdown() |
34 | 34 |
library(kableExtra) |
35 | 35 |
library(magrittr) |
36 |
+library(SummarizedExperiment) |
|
36 | 37 |
``` |
37 | 38 |
|
38 | 39 |
|
... | ... |
@@ -68,12 +69,12 @@ BiocManager::install("evaluomeR") |
68 | 69 |
|
69 | 70 |
## Prerequisites ## |
70 | 71 |
|
71 |
-The package **evaluomeR** depends on the following CRAN packages for the calculus: *fpc* [@fpc2018], *cluster* [@cluster2018], *corrplot* [@corrplot2017]. Moreover, this package also depends on *grDevices*, *graphics*, *stats* and *utils* from R Core [@rcore] for plotting. |
|
72 |
+The package **evaluomeR** depends on the following CRAN packages for the calculus: *fpc* [@fpc2018], *cluster* [@cluster2018], *corrplot* [@corrplot2017]. Moreover, this package also depends on *grDevices*, *graphics*, *stats* and *utils* from R Core [@rcore] for plotting and on the Bioconductor package *SummarizedExperiment* [@summarizedExperiment] for input/output data. |
|
72 | 73 |
|
73 | 74 |
# Using evaluomeR # |
74 | 75 |
|
75 |
-## Creating an input dataframe ## |
|
76 |
-The input dataframe must follow some structural rules, see Table \@ref(tab:table). The first row is the header. The first column of the header is the ID or name of the instance of the dataset (e.g., ontology, pathway, etc.) on which the metrics are measured. The other columns of the header contains the names of the metrics. The rows contains the measurements of the metrics for each instance in the dataset. |
|
76 |
+## Creating an input SummarizedExperiment ## |
|
77 |
+The input is a `SummarizedExperiment` object. The assay contained in `SummarizedExperiment` must follow a certain structure, see Table \@ref(tab:table): A valid header must be specified. The first column of the header is the ID or name of the instance of the dataset (e.g., ontology, pathway, etc.) on which the metrics are measured. The other columns of the header contains the names of the metrics. The rows contains the measurements of the metrics for each instance in the dataset. |
|
77 | 78 |
|
78 | 79 |
ID | MetricNameA | MetricNameB | MetricNameC | ... | |
79 | 80 |
--------- | ----------- | ----------- | ----------- | --- | |
... | ... |
@@ -81,7 +82,7 @@ instance1 | 1.2 | 6.4 | 0.5 | ... | |
81 | 82 |
instance2 | 2.4 | 5.4 | 0.8 | ... | |
82 | 83 |
instance3 | 1.9 | 8.9 | 1.1 | ... | |
83 | 84 |
|
84 |
-: (\#tab:table) Example of an input dataframe for the **evaluomeR** package. |
|
85 |
+: (\#tab:table) Example of an input assay from a `SummarizedExperiment` for the **evaluomeR** package. |
|
85 | 86 |
|
86 | 87 |
## Using input sample data from evaluomeR ## |
87 | 88 |
|
... | ... |
@@ -104,7 +105,7 @@ biopathwaysMetrics <- loadSample("biopathways-metrics") |
104 | 105 |
|
105 | 106 |
## Correlations ## |
106 | 107 |
|
107 |
-We provide the `correlations` function to evaluate the correlations among the metrics defined in the dataframe: |
|
108 |
+We provide the `correlations` function to evaluate the correlations among the metrics defined in the `SummarizedExperiment`: |
|
108 | 109 |
|
109 | 110 |
```{r correlations-1, echo=TRUE} |
110 | 111 |
library(evaluomeR) |
... | ... |
@@ -124,10 +125,15 @@ The stability index analysis is performed by the `stability` function. For insta |
124 | 125 |
stabilityData <- stability(rnaMetrics, k=2, bs = 100) |
125 | 126 |
``` |
126 | 127 |
|
127 |
-The `stability` function returns the `stabilityData` dataframe, which contains the information shown in the plot: |
|
128 |
+The `stability` function returns the `stabilityData` object, a `SummarizedExperiment` that contains an assay with the information shown in the plot: |
|
129 |
+ |
|
130 |
+```{r stability-1-assay, results='hide', echo=TRUE, eval=FALSE} |
|
131 |
+assay(stabilityData, 1) |
|
132 |
+``` |
|
128 | 133 |
|
129 | 134 |
```{r stability-1-table, results='asis', echo=FALSE} |
130 |
-kable(stabilityData) %>% |
|
135 |
+data <- assay(stabilityData, 1) |
|
136 |
+kable(data) %>% |
|
131 | 137 |
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) |
132 | 138 |
``` |
133 | 139 |
|
... | ... |
@@ -157,12 +163,17 @@ For instance, running a quality analysis for the two metrics of `rnaMetrics` dat |
157 | 163 |
qualityData = quality(rnaMetrics, k = 4) |
158 | 164 |
``` |
159 | 165 |
|
160 |
-The data of the first plot titled as "*Qual. Indices for k=4 across metrics*" according to *Silhouette avg. width*, is stored in *Avg_Silhouette_Width* column from `qualityData` dataframe. The other three plots titled by their metric name display the input rows grouped by colours for each cluster, along with their Silhouette width scores. |
|
166 |
+The data of the first plot titled as "*Qual. Indices for k=4 across metrics*" according to *Silhouette avg. width*, is stored in *Avg_Silhouette_Width* column from the first assay of the `SummarizedExperiment`, `qualityData`. The other three plots titled by their metric name display the input rows grouped by colours for each cluster, along with their Silhouette width scores. |
|
161 | 167 |
|
162 | 168 |
The variable `qualityData` contains information about the clusters of each metric: The average silhouette width per cluster, the overall average sihouette width (taking into account all the clusters) and the number of individuals per cluster: |
163 | 169 |
|
170 |
+```{r quality-1-assay, results='hide', eval=FALSE, echo=TRUE} |
|
171 |
+assay(qualityData,1) |
|
172 |
+``` |
|
173 |
+ |
|
164 | 174 |
```{r quality-1-table, results='asis', echo=FALSE} |
165 |
-kable(qualityData) %>% |
|
175 |
+data <- assay(qualityData,1) |
|
176 |
+kable(data) %>% |
|
166 | 177 |
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>% |
167 | 178 |
scroll_box(width = "100%") |
168 | 179 |
``` |
... | ... |
@@ -180,7 +191,7 @@ qualityRangeData = qualityRange(rnaMetrics, k.range) |
180 | 191 |
|
181 | 192 |
The `qualityRange` function also returns two kind of plots, as seen in [Stability range](#sec:stabilityrange) section. One for each `k` in the `k.range`, showing the quality indices (goodness of the classification) across the metrics, and a second type of plot to show each metric with its respective quality index in each `k` value. |
182 | 193 |
|
183 |
-The `qualityRangeData` object returned by `qualityRange` is an array of dataframes, whose size is `diff(k.range)+1`. In the example shown above, the size of `qualityRangeData` is 3, since the array length would contain the dataframes from `k=4` to `k=6`. |
|
194 |
+The `qualityRangeData` object returned by `qualityRange` is list of `SummarizedExperiment`, whose size is `diff(k.range)+1`. In the example shown above, the size of `qualityRangeData` is 3, since the array length would contain the dataframes from `k=4` to `k=6`. |
|
184 | 195 |
|
185 | 196 |
```{r quality-range-2, eval=TRUE, echo=TRUE} |
186 | 197 |
diff(k.range)+1 |
... | ... |
@@ -190,13 +201,16 @@ length(qualityRangeData) |
190 | 201 |
The user can access a specific dataframe for a given `k` value in three different ways: by dollar notation, brackets notation or using our wrapper method `getDataQualityRange`. For instance, if the user wishes to retrieve the dataframe which contains information of `k=5`, being the `k.range` [4,6]: |
191 | 202 |
|
192 | 203 |
```{r quality-range-3, eval=FALSE, echo=TRUE} |
193 |
-k5DataFrame = qualityRangeData$k_5 |
|
194 |
-k5DataFrame = qualityRangeData[["k_5"]] |
|
195 |
-k5DataFrame = getDataQualityRange(qualityRangeData, 5) |
|
204 |
+k5Data = qualityRangeData$k_5 |
|
205 |
+k5Data = qualityRangeData[["k_5"]] |
|
206 |
+k5Data = getDataQualityRange(qualityRangeData, 5) |
|
207 |
+assay(k5Data, 1) |
|
196 | 208 |
``` |
197 | 209 |
|
210 |
+ |
|
198 | 211 |
```{r quality-range-table, results='asis', echo=FALSE} |
199 |
-kable(qualityRangeData$k_5) %>% |
|
212 |
+data <- assay(qualityRangeData$k_5, 1) |
|
213 |
+kable(data) %>% |
|
200 | 214 |
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>% |
201 | 215 |
scroll_box(width = "100%", height = "150px") |
202 | 216 |
``` |