Browse code

All methods depend on as input/output object.

Neobernad authored on 16/02/2019 13:03:09
Showing 17 changed files

... ...
@@ -2,7 +2,7 @@ Package: evaluomeR
2 2
 Type: Package
3 3
 Title: Evaluation of Bioinformatics Metrics
4 4
 URL: http://sele.inf.um.es/evaluome/index.html
5
-Version: 0.99.2
5
+Version: 0.99.3
6 6
 Author: José Antonio Bernabé-Díaz [aut, cre], Manuel Franco [aut], Juana-María Vivo [aut], Manuel Quesada-Martínez [aut], Astrid Duque-Ramos [aut], Jesualdo Tomás Fernández-Breis [aut].
7 7
 Maintainer: José Antonio Bernabé Díaz <[email protected]>
8 8
 Description: Evaluating the reliability of your own metrics 
... ...
@@ -12,7 +12,7 @@ Description: Evaluating the reliability of your own metrics
12 12
 License: GPL-3
13 13
 Encoding: UTF-8
14 14
 LazyData: true
15
-Depends: R (>= 3.6.0)
15
+Depends: R (>= 3.6.0), SummarizedExperiment
16 16
 Imports:
17 17
     fpc (>= 2.1-11.1),
18 18
     cluster (>= 2.0.7-1),
... ...
@@ -21,8 +21,6 @@ Imports:
21 21
     graphics,
22 22
     stats,
23 23
     utils,
24
-    SummarizedExperiment,
25
-    airway,
26 24
     Rdpack
27 25
 Suggests: BiocStyle, knitr, rmarkdown, kableExtra, magrittr
28 26
 VignetteBuilder: knitr
... ...
@@ -13,7 +13,7 @@ importFrom("utils", "capture.output")
13 13
 importFrom("utils", "read.csv")
14 14
 importFrom("SummarizedExperiment", "assays")
15 15
 importFrom("SummarizedExperiment", "assay")
16
-import(airway)
16
+importFrom("SummarizedExperiment", "SummarizedExperiment")
17 17
 
18 18
 export(stability)
19 19
 export(stabilityRange)
... ...
@@ -22,4 +22,3 @@ export(qualityRange)
22 22
 export(correlations)
23 23
 export(loadSample)
24 24
 export(getDataQualityRange)
25
-export(seToDataFrame)
... ...
@@ -1,3 +1,6 @@
1
+Changes in version 0.99.3 (2019-04-16)
2
++ All methods depend on `SummarizedExperiment` as input/output object.
3
+
1 4
 Changes in version 0.99.2 (2019-04-15)
2 5
 + SummarizedExperiment can be now processed via `seToDataFrame` method. Adding SummarizedExperiment and airway dependencies.
3 6
 
... ...
@@ -25,7 +25,7 @@ correlations <- function(data, margins=c(0,10,9,11), getImages=TRUE,
25 25
   if (!is.null(label)) {
26 26
     isString(label)
27 27
   }
28
-
28
+  data <- getAssay(data, 1)
29 29
   cur.env <- new.env()
30 30
 
31 31
   MatCorr <- cor(data[,2:length(data)])
... ...
@@ -6,7 +6,7 @@
6 6
 #'
7 7
 #' @param descriptor Sample file to load: "ont-metrics", "rna-metrics" or "biopathways-metrics".
8 8
 #'
9
-#' @return The dataset specified via \code{descriptor} as a dataframe.
9
+#' @return The \code{\link{SummarizedExperiment}} specified via \code{descriptor}.
10 10
 #'
11 11
 #' @examples
12 12
 #' # Using example data from our package
... ...
@@ -16,7 +16,8 @@ loadSample <- function(descriptor) {
16 16
   samples <- c('ont-metrics','rna-metrics','biopathways-metrics')
17 17
   if (is.element(descriptor, samples)) {
18 18
     dataFrame <- read.csv(file=system.file('extdata',descriptor, package="evaluomeR"), header=TRUE);
19
-    return(dataFrame)
19
+    se <- createSE(dataFrame)
20
+    return(se)
20 21
   } else {
21 22
     stop("Invalid descriptor")
22 23
   }
... ...
@@ -26,20 +27,20 @@ loadSample <- function(descriptor) {
26 27
 #' @name getDataQualityRange
27 28
 #' @aliases getDataQualityRange
28 29
 #' @description
29
-#' This method is a wrapper to retrieve a specific dataframe given a \code{k} value from
30
+#' This method is a wrapper to retrieve a specific \code{\link{SummarizedExperiment}} given a \code{k} value from
30 31
 #' the object returned by \code{\link{qualityRange}} function.
31 32
 #'
32 33
 #' @param data The object returned by \code{\link{qualityRange}} function.
33 34
 #' @param k The desired \code{k} cluster.
34 35
 #'
35
-#' @return The dataframe that contains information about the selected \code{k} cluster.
36
+#' @return The \code{\link{SummarizedExperiment}} that contains information about the selected \code{k} cluster.
36 37
 #'
37 38
 #' @examples
38 39
 #' # Using example data from our package
39 40
 #' metrics = loadSample("ont-metrics")
40 41
 #' qualityRangeData <- qualityRange(data=metrics, k.range=c(3,5), getImages = FALSE)
41 42
 #' # Getting dataframe that contains information about k=5
42
-#' k5DataFrame = getDataQualityRange(qualityRangeData, 5)
43
+#' k5Data = getDataQualityRange(qualityRangeData, 5)
43 44
 #'
44 45
 getDataQualityRange <- function(data, k) {
45 46
   dataNames = names(data)
... ...
@@ -50,6 +51,7 @@ getDataQualityRange <- function(data, k) {
50 51
 
51 52
   if (k >= kValues[1] && k <= kValues[kValues.length]) {
52 53
     column = paste("k_", k, sep="")
54
+
53 55
     return(data[[column]])
54 56
   } else {
55 57
     error=paste("Selected k (",k,") is not in the range of k.range ["
... ...
@@ -58,45 +60,6 @@ getDataQualityRange <- function(data, k) {
58 60
   }
59 61
 }
60 62
 
61
-#' @title SummarizedExperiment to Dataframe
62
-#' @name seToDataFrame
63
-#' @aliases seToDataFrame
64
-#' @description
65
-#' This method is a wrapper to transform a SummarizedExperiment object to a
66
-#' Dataframe processable in our methods.
67
-#'
68
-#' @param SummarizedExperiment A \code{SummarizedExperiment} object
69
-#' (see \code{\link{SummarizedExperiment}}).
70
-#'
71
-#' @return The dataframe that contains information of the first
72
-#' assay in \code{SummarizedExperiment}.
73
-#'
74
-#' @examples
75
-#' # Using example data from airway package
76
-#' library(airway)
77
-#' data(airway)
78
-#' airwayData = seToDataFrame(airway)
79
-#' airwayData = airwayData[1:10000,1:4]
80
-#' stability(airwayData, bs = 20, getImages=FALSE)
81
-#' correlations(airwayData, getImages=FALSE)
82
-#'
83
-seToDataFrame <- function(SummarizedExperiment) {
84
-  se=SummarizedExperiment
85
-  if (length(assays(se)) == 0) {
86
-    stop("SummarizedExperiment has no assays, length is 0")
87
-  }
88
-  test = assay(se,1)
89
-  Datasets <- NULL
90
-  if (is.null(rownames(test))) {
91
-    Datasets <- paste("Dataset_", c(1:length(test[,1])), sep="")
92
-  } else {
93
-    Datasets <- rownames(test)
94
-  }
95
-
96
-  test <- data.frame(Datasets,test)
97
-  return(test)
98
-}
99
-
100 63
 #####################
101 64
 ## Private methods ##
102 65
 #####################
... ...
@@ -141,3 +104,61 @@ checkDirectory <- function(path) {
141 104
   }
142 105
   return(path)
143 106
 }
107
+
108
+getAssay <- function(SummarizedExperiment, position) {
109
+  se=SummarizedExperiment
110
+  se.length <- length(assays(se))
111
+  if (se.length == 0) {
112
+    stop("SummarizedExperiment has no assays, length is 0")
113
+  }
114
+  if (position > se.length) {
115
+    error <- paste("SummarizedExperiment has no assay in position ",
116
+                   position, sep="")
117
+    stop(error)
118
+  }
119
+  test = assay(se, position)
120
+  # Datasets <- test[,1]
121
+  # if (is.null(rownames(test))) {
122
+  #   Datasets <- paste("Dataset_", c(1:length(test[,1])), sep="")
123
+  # } else {
124
+  #   Datasets <- rownames(test)
125
+  # }
126
+
127
+  # test <- data.frame(Datasets,test)
128
+  test <- data.frame(test)
129
+  names(test) <- colnames(test)
130
+  return(test)
131
+}
132
+
133
+# data: One dataframe, thus one assay
134
+createSE <- function(data) {
135
+  nrows <- nrow(data); ncols <- ncol(data)
136
+  counts <- data.matrix(data)
137
+  colnames(counts) <- NULL
138
+  colData <- DataFrame(metrics=colnames(data),
139
+                       row.names=colnames(data))
140
+  se <- SummarizedExperiment(assays=SimpleList(counts),
141
+                              colData=colData)
142
+  return(se)
143
+}
144
+
145
+# data: A list of dataframes
146
+createSEList <- function(data) {
147
+  if (!is.list(data)) {
148
+    stop("Input variable is not a list")
149
+  }
150
+  if (length(data) == 0) {
151
+    stop("Input variable is an empty list")
152
+  }
153
+  length = length(names(data))
154
+  seList <- list()
155
+  for (i in 1:length) {
156
+    cur.data <- data[[i]]
157
+    dataMatrix <- suppressWarnings(data.matrix(cur.data))
158
+    dataMatrix[,1] <- cur.data$Metric
159
+    se <- createSE(dataMatrix)
160
+    seList <- c(seList, se)
161
+  }
162
+  names(seList) <- names(data)
163
+  return(seList)
164
+}
... ...
@@ -23,7 +23,7 @@
23 23
 #'
24 24
 #' @inheritParams stability
25 25
 #'
26
-#' @return A dataframe containing the silhouette width measurements and
26
+#' @return A \code{\link{SummarizedExperiment}} containing the silhouette width measurements and
27 27
 #' cluster sizes for cluster \code{k}.
28 28
 #'
29 29
 #' @examples
... ...
@@ -43,6 +43,7 @@ quality <- function(data, k=5, getImages=TRUE,
43 43
   if (!is.null(label)) {
44 44
     isString(label)
45 45
   }
46
+  data <- getAssay(data, 1)
46 47
 
47 48
   cur.env <- new.env()
48 49
   suppressWarnings(
... ...
@@ -59,7 +60,8 @@ quality <- function(data, k=5, getImages=TRUE,
59 60
     suppressWarnings(
60 61
       runSilhouetteIMG(data, k, label, path, cur.env))
61 62
   }
62
-  return(silhouetteDataFrame)
63
+  se <- createSE(silhouetteDataFrame)
64
+  return(se)
63 65
 
64 66
 }
65 67
 
... ...
@@ -92,7 +94,7 @@ quality <- function(data, k=5, getImages=TRUE,
92 94
 #' whilst the second one, \code{k.range[2]}, as the higher. Both values must be
93 95
 #' contained in [2,15] range.
94 96
 #'
95
-#' @return A list of dataframes containing the silhouette width measurements and
97
+#' @return A list of \code{\link{SummarizedExperiment}} containing the silhouette width measurements and
96 98
 #' cluster sizes from \code{k.range[1]} to \code{k.range[2]}. The position on the list matches
97 99
 #' with the k-value used in that dataframe. For instance, position 5
98 100
 #' represents the dataframe with k = 5.
... ...
@@ -125,7 +127,7 @@ qualityRange <- function(data, k.range=c(3,5), getImages=TRUE,
125 127
   if (!is.null(label)) {
126 128
     isString(label)
127 129
   }
128
-
130
+  data <- getAssay(data, 1)
129 131
   cur.env <- new.env()
130 132
 
131 133
   suppressWarnings(
... ...
@@ -144,7 +146,8 @@ qualityRange <- function(data, k.range=c(3,5), getImages=TRUE,
144 146
       runQualityIndicesSilhouetteMetric_IMG(k.min = k.min, k.max = k.max,
145 147
                                             label, path, cur.env))
146 148
   }
147
-  return(silhouetteData)
149
+  seList <- createSEList(silhouetteData)
150
+  return(seList)
148 151
 }
149 152
 
150 153
 runQualityIndicesSilhouette <- function(data, k.min, k.max, bs, env) {
... ...
@@ -15,9 +15,11 @@
15 15
 #' \item Highly Stable: ]0.85, 1].
16 16
 #' }
17 17
 #'
18
-#' @param data A matrix. The first row is the header. The first
19
-#' column of the header is the ID or name of the instance of the dataset
20
-#' (e.g., ontology, pathway, etc.) on which the metrics are measured.
18
+#' @param data A \code{\link{SummarizedExperiment}}.
19
+#' The SummarizedExperiment must contain an assay with the following structure:
20
+#' A valid header with names. The first  column of the header is the ID or name
21
+#' of the instance of the dataset (e.g., ontology, pathway, etc.) on which the
22
+#' metrics are measured.
21 23
 #' The other columns of the header contains the names of the metrics.
22 24
 #' The rows contains the measurements of the metrics for each instance in the dataset.
23 25
 #' @param k Positive integer. Number of clusters between [2,15] range.
... ...
@@ -26,15 +28,15 @@
26 28
 #' @param label String. If not NULL, the label will appear on the title of the plots.
27 29
 #' @param path String. Path to a valid directory where plots are saved.
28 30
 #'
29
-#' @return A dataframe containing the stability measurements and
30
-#' means for 1 to k clusters.
31
+#' @return A \code{\link{SummarizedExperiment}},
32
+#' containing an assay with the stability measurements and means for 1 to k clusters.
31 33
 #'
32 34
 #' @examples
33 35
 #' # Using example data from our package
34
-#' metrics = loadSample("ont-metrics")
35
-#' result = stability(data=metrics, k=4, getImages=TRUE)
36
-#' result = stability(metrics, k=6, getImages=FALSE)
37
-#' result = stability(metrics, k=6, getImages=TRUE, label="Experiment 1:")
36
+#' metrics <- loadSample("ont-metrics")
37
+#' result <- stability(data=metrics, k=4, getImages=TRUE)
38
+#' result <- stability(metrics, k=6, getImages=FALSE)
39
+#' result <- stability(metrics, k=6, getImages=TRUE, label="Experiment 1:")
38 40
 #'
39 41
 #' @references
40 42
 #' \insertRef{milligan1996measuring}{evaluomeR}
... ...
@@ -45,6 +47,8 @@
45 47
 stability <- function(data, k=5, bs=100, getImages=TRUE,
46 48
                       label=NULL, path=NULL) {
47 49
 
50
+  data <- getAssay(data, 1)
51
+
48 52
   checkKValue(k)
49 53
   if (!is.null(label)) {
50 54
     isString(label)
... ...
@@ -64,8 +68,8 @@ stability <- function(data, k=5, bs=100, getImages=TRUE,
64 68
       runStabilityIndexK_IMG(bs, k.min = k, k.max = k,
65 69
                            label, path, cur.env))
66 70
   }
67
-
68
-  return(stabilityDataFrame)
71
+  se <- createSE(stabilityDataFrame)
72
+  return(se)
69 73
 }
70 74
 
71 75
 
... ...
@@ -92,12 +96,13 @@ stability <- function(data, k=5, bs=100, getImages=TRUE,
92 96
 #' whilst the second one, \code{k.range[2]}, as the higher. Both values must be
93 97
 #' contained in [2,15] range.
94 98
 #'
95
-#' @return A dataframe containing the stability measurements and
99
+#' @return A \code{\link{SummarizedExperiment}} containing the stability measurements and
96 100
 #' means for 1 to k clusters.
97 101
 #'
98 102
 #' @examples
99 103
 #' # Using example data from our package
100
-#' metrics = loadSample("ont-metrics")
104
+#' metrics <- loadSample("ont-metrics")
105
+#' result <- stabilityRange(metrics, k.range=c(2,3))
101 106
 #'
102 107
 #' @references
103 108
 #' \insertRef{milligan1996measuring}{evaluomeR}
... ...
@@ -121,6 +126,7 @@ stabilityRange <- function(data, k.range=c(2,15), bs=100,
121 126
   if (!is.null(label)) {
122 127
     isString(label)
123 128
   }
129
+  data <- getAssay(data, 1)
124 130
 
125 131
   cur.env <- new.env()
126 132
   suppressWarnings(
... ...
@@ -139,7 +145,8 @@ stabilityRange <- function(data, k.range=c(2,15), bs=100,
139 145
       runStabilityIndexMetric_IMG(bs, k.min=k.min, k.max=k.max,
140 146
                                 label, path, cur.env))
141 147
   }
142
-  return(stabilityDataFrame)
148
+  se <- createSE(stabilityDataFrame)
149
+  return(se)
143 150
 }
144 151
 
145 152
 runStabilityIndex <- function(data, k.min, k.max, bs, env) {
... ...
@@ -9,6 +9,13 @@
9 9
   publisher={Springer}
10 10
 }
11 11
 
12
+@Manual{summarizedExperiment,
13
+  title = {SummarizedExperiment: SummarizedExperiment container},
14
+  author = {Martin Morgan and Valerie Obenchain and Jim Hester and Hervé Pagès},
15
+  year = {2018},
16
+  note = {R package version 1.12.0},
17
+}
18
+
12 19
 @article{jaccard1901distribution,
13 20
   title={Distribution de la flore alpine dans le bassin des Dranses et dans quelques r{\'e}gions voisines},
14 21
   author={Jaccard, Paul},
... ...
@@ -8,9 +8,11 @@ correlations(data, margins = c(0, 10, 9, 11), getImages = TRUE,
8 8
   label = NULL, path = NULL)
9 9
 }
10 10
 \arguments{
11
-\item{data}{A matrix. The first row is the header. The first
12
-column of the header is the ID or name of the instance of the dataset
13
-(e.g., ontology, pathway, etc.) on which the metrics are measured.
11
+\item{data}{A \code{\link{SummarizedExperiment}}.
12
+The SummarizedExperiment must contain an assay with the following structure:
13
+A valid header with names. The first  column of the header is the ID or name
14
+of the instance of the dataset (e.g., ontology, pathway, etc.) on which the
15
+metrics are measured.
14 16
 The other columns of the header contains the names of the metrics.
15 17
 The rows contains the measurements of the metrics for each instance in the dataset.}
16 18
 
... ...
@@ -12,10 +12,10 @@ getDataQualityRange(data, k)
12 12
 \item{k}{The desired \code{k} cluster.}
13 13
 }
14 14
 \value{
15
-The dataframe that contains information about the selected \code{k} cluster.
15
+The \code{\link{SummarizedExperiment}} that contains information about the selected \code{k} cluster.
16 16
 }
17 17
 \description{
18
-This method is a wrapper to retrieve a specific dataframe given a \code{k} value from
18
+This method is a wrapper to retrieve a specific \code{\link{SummarizedExperiment}} given a \code{k} value from
19 19
 the object returned by \code{\link{qualityRange}} function.
20 20
 }
21 21
 \examples{
... ...
@@ -23,6 +23,6 @@ the object returned by \code{\link{qualityRange}} function.
23 23
 metrics = loadSample("ont-metrics")
24 24
 qualityRangeData <- qualityRange(data=metrics, k.range=c(3,5), getImages = FALSE)
25 25
 # Getting dataframe that contains information about k=5
26
-k5DataFrame = getDataQualityRange(qualityRangeData, 5)
26
+k5Data = getDataQualityRange(qualityRangeData, 5)
27 27
 
28 28
 }
... ...
@@ -10,7 +10,7 @@ loadSample(descriptor)
10 10
 \item{descriptor}{Sample file to load: "ont-metrics", "rna-metrics" or "biopathways-metrics".}
11 11
 }
12 12
 \value{
13
-The dataset specified via \code{descriptor} as a dataframe.
13
+The \code{\link{SummarizedExperiment}} specified via \code{descriptor}.
14 14
 }
15 15
 \description{
16 16
 This method is a wrapper to load sample input data located inside evaluomeR package.
... ...
@@ -7,9 +7,11 @@
7 7
 quality(data, k = 5, getImages = TRUE, label = NULL, path = NULL)
8 8
 }
9 9
 \arguments{
10
-\item{data}{A matrix. The first row is the header. The first
11
-column of the header is the ID or name of the instance of the dataset
12
-(e.g., ontology, pathway, etc.) on which the metrics are measured.
10
+\item{data}{A \code{\link{SummarizedExperiment}}.
11
+The SummarizedExperiment must contain an assay with the following structure:
12
+A valid header with names. The first  column of the header is the ID or name
13
+of the instance of the dataset (e.g., ontology, pathway, etc.) on which the
14
+metrics are measured.
13 15
 The other columns of the header contains the names of the metrics.
14 16
 The rows contains the measurements of the metrics for each instance in the dataset.}
15 17
 
... ...
@@ -22,7 +24,7 @@ The rows contains the measurements of the metrics for each instance in the datas
22 24
 \item{path}{String. Path to a valid directory where plots are saved.}
23 25
 }
24 26
 \value{
25
-A dataframe containing the silhouette width measurements and
27
+A \code{\link{SummarizedExperiment}} containing the silhouette width measurements and
26 28
 cluster sizes for cluster \code{k}.
27 29
 }
28 30
 \description{
... ...
@@ -8,9 +8,11 @@ qualityRange(data, k.range = c(3, 5), getImages = TRUE, label = NULL,
8 8
   path = NULL)
9 9
 }
10 10
 \arguments{
11
-\item{data}{A matrix. The first row is the header. The first
12
-column of the header is the ID or name of the instance of the dataset
13
-(e.g., ontology, pathway, etc.) on which the metrics are measured.
11
+\item{data}{A \code{\link{SummarizedExperiment}}.
12
+The SummarizedExperiment must contain an assay with the following structure:
13
+A valid header with names. The first  column of the header is the ID or name
14
+of the instance of the dataset (e.g., ontology, pathway, etc.) on which the
15
+metrics are measured.
14 16
 The other columns of the header contains the names of the metrics.
15 17
 The rows contains the measurements of the metrics for each instance in the dataset.}
16 18
 
... ...
@@ -26,7 +28,7 @@ contained in [2,15] range.}
26 28
 \item{path}{String. Path to a valid directory where plots are saved.}
27 29
 }
28 30
 \value{
29
-A list of dataframes containing the silhouette width measurements and
31
+A list of \code{\link{SummarizedExperiment}} containing the silhouette width measurements and
30 32
 cluster sizes from \code{k.range[1]} to \code{k.range[2]}. The position on the list matches
31 33
 with the k-value used in that dataframe. For instance, position 5
32 34
 represents the dataframe with k = 5.
33 35
deleted file mode 100755
... ...
@@ -1,30 +0,0 @@
1
-% Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/helpers.R
3
-\name{seToDataFrame}
4
-\alias{seToDataFrame}
5
-\title{SummarizedExperiment to Dataframe}
6
-\usage{
7
-seToDataFrame(SummarizedExperiment)
8
-}
9
-\arguments{
10
-\item{SummarizedExperiment}{A \code{SummarizedExperiment} object
11
-(see \code{\link{SummarizedExperiment}}).}
12
-}
13
-\value{
14
-The dataframe that contains information of the first
15
-assay in \code{SummarizedExperiment}.
16
-}
17
-\description{
18
-This method is a wrapper to transform a SummarizedExperiment object to a
19
-Dataframe processable in our methods.
20
-}
21
-\examples{
22
-# Using example data from airway package
23
-library(airway)
24
-data(airway)
25
-airwayData = seToDataFrame(airway)
26
-airwayData = airwayData[1:10000,1:4]
27
-stability(airwayData, bs = 20, getImages=FALSE)
28
-correlations(airwayData, getImages=FALSE)
29
-
30
-}
... ...
@@ -8,9 +8,11 @@ stability(data, k = 5, bs = 100, getImages = TRUE, label = NULL,
8 8
   path = NULL)
9 9
 }
10 10
 \arguments{
11
-\item{data}{A matrix. The first row is the header. The first
12
-column of the header is the ID or name of the instance of the dataset
13
-(e.g., ontology, pathway, etc.) on which the metrics are measured.
11
+\item{data}{A \code{\link{SummarizedExperiment}}.
12
+The SummarizedExperiment must contain an assay with the following structure:
13
+A valid header with names. The first  column of the header is the ID or name
14
+of the instance of the dataset (e.g., ontology, pathway, etc.) on which the
15
+metrics are measured.
14 16
 The other columns of the header contains the names of the metrics.
15 17
 The rows contains the measurements of the metrics for each instance in the dataset.}
16 18
 
... ...
@@ -25,8 +27,8 @@ The rows contains the measurements of the metrics for each instance in the datas
25 27
 \item{path}{String. Path to a valid directory where plots are saved.}
26 28
 }
27 29
 \value{
28
-A dataframe containing the stability measurements and
29
-means for 1 to k clusters.
30
+A \code{\link{SummarizedExperiment}},
31
+containing an assay with the stability measurements and means for 1 to k clusters.
30 32
 }
31 33
 \description{
32 34
 This analysis permits to estimate whether the clustering is meaningfully
... ...
@@ -44,10 +46,10 @@ having the following meaning:
44 46
 }
45 47
 \examples{
46 48
 # Using example data from our package
47
-metrics = loadSample("ont-metrics")
48
-result = stability(data=metrics, k=4, getImages=TRUE)
49
-result = stability(metrics, k=6, getImages=FALSE)
50
-result = stability(metrics, k=6, getImages=TRUE, label="Experiment 1:")
49
+metrics <- loadSample("ont-metrics")
50
+result <- stability(data=metrics, k=4, getImages=TRUE)
51
+result <- stability(metrics, k=6, getImages=FALSE)
52
+result <- stability(metrics, k=6, getImages=TRUE, label="Experiment 1:")
51 53
 
52 54
 }
53 55
 \references{
... ...
@@ -8,9 +8,11 @@ stabilityRange(data, k.range = c(2, 15), bs = 100, getImages = TRUE,
8 8
   label = NULL, path = NULL)
9 9
 }
10 10
 \arguments{
11
-\item{data}{A matrix. The first row is the header. The first
12
-column of the header is the ID or name of the instance of the dataset
13
-(e.g., ontology, pathway, etc.) on which the metrics are measured.
11
+\item{data}{A \code{\link{SummarizedExperiment}}.
12
+The SummarizedExperiment must contain an assay with the following structure:
13
+A valid header with names. The first  column of the header is the ID or name
14
+of the instance of the dataset (e.g., ontology, pathway, etc.) on which the
15
+metrics are measured.
14 16
 The other columns of the header contains the names of the metrics.
15 17
 The rows contains the measurements of the metrics for each instance in the dataset.}
16 18
 
... ...
@@ -28,7 +30,7 @@ contained in [2,15] range.}
28 30
 \item{path}{String. Path to a valid directory where plots are saved.}
29 31
 }
30 32
 \value{
31
-A dataframe containing the stability measurements and
33
+A \code{\link{SummarizedExperiment}} containing the stability measurements and
32 34
 means for 1 to k clusters.
33 35
 }
34 36
 \description{
... ...
@@ -47,7 +49,8 @@ having the following meaning:
47 49
 }
48 50
 \examples{
49 51
 # Using example data from our package
50
-metrics = loadSample("ont-metrics")
52
+metrics <- loadSample("ont-metrics")
53
+result <- stabilityRange(metrics, k.range=c(2,3))
51 54
 
52 55
 }
53 56
 \references{
... ...
@@ -29,10 +29,11 @@ vignette: >
29 29
   %\VignetteEncoding{UTF-8}  
30 30
 ---
31 31
 
32
-```{r style, echo = FALSE, results = 'asis'}
32
+```{r style, include=FALSE, results='hide'}
33 33
 BiocStyle::markdown()
34 34
 library(kableExtra)
35 35
 library(magrittr)
36
+library(SummarizedExperiment)
36 37
 ```
37 38
 
38 39
 
... ...
@@ -68,12 +69,12 @@ BiocManager::install("evaluomeR")
68 69
 
69 70
 ## Prerequisites ##
70 71
 
71
-The package **evaluomeR** depends on the following CRAN packages for the calculus: *fpc* [@fpc2018], *cluster* [@cluster2018], *corrplot* [@corrplot2017]. Moreover, this package also depends on *grDevices*, *graphics*, *stats* and *utils* from R Core [@rcore] for plotting.
72
+The package **evaluomeR** depends on the following CRAN packages for the calculus: *fpc* [@fpc2018], *cluster* [@cluster2018], *corrplot* [@corrplot2017]. Moreover, this package also depends on *grDevices*, *graphics*, *stats* and *utils* from R Core [@rcore] for plotting and on the Bioconductor package *SummarizedExperiment* [@summarizedExperiment] for input/output data.
72 73
 
73 74
 # Using evaluomeR #
74 75
 
75
-## Creating an input dataframe ##
76
-The input dataframe must follow some structural rules, see Table \@ref(tab:table). The first row is the header. The first column of the header is the ID or name of the instance of the dataset (e.g., ontology, pathway, etc.) on which the metrics are measured. The other  columns of the header contains the names of the metrics. The rows contains the measurements of the metrics for each instance in the dataset.
76
+## Creating an input SummarizedExperiment ##
77
+The input is a `SummarizedExperiment` object. The assay contained in `SummarizedExperiment` must follow a certain structure, see Table \@ref(tab:table): A valid header must be specified. The first column of the header is the ID or name of the instance of the dataset (e.g., ontology, pathway, etc.) on which the metrics are measured. The other  columns of the header contains the names of the metrics. The rows contains the measurements of the metrics for each instance in the dataset.
77 78
 
78 79
 ID        | MetricNameA | MetricNameB | MetricNameC | ... |
79 80
 --------- | ----------- | ----------- | ----------- | --- | 
... ...
@@ -81,7 +82,7 @@ instance1 | 1.2         | 6.4         | 0.5         | ... |
81 82
 instance2 | 2.4         | 5.4         | 0.8         | ... |
82 83
 instance3 | 1.9         | 8.9         | 1.1         | ... |
83 84
   
84
-: (\#tab:table) Example of an input dataframe for the **evaluomeR** package.
85
+: (\#tab:table) Example of an input assay from a `SummarizedExperiment` for the **evaluomeR** package.
85 86
 
86 87
 ## Using input sample data from evaluomeR ##
87 88
 
... ...
@@ -104,7 +105,7 @@ biopathwaysMetrics <- loadSample("biopathways-metrics")
104 105
 
105 106
 ## Correlations ##
106 107
 
107
-We provide the `correlations` function to evaluate the correlations among the metrics defined in the dataframe:
108
+We provide the `correlations` function to evaluate the correlations among the metrics defined in the `SummarizedExperiment`:
108 109
 
109 110
 ```{r correlations-1, echo=TRUE}
110 111
 library(evaluomeR)
... ...
@@ -124,10 +125,15 @@ The stability index analysis is performed by the `stability` function. For insta
124 125
 stabilityData <- stability(rnaMetrics, k=2, bs = 100)
125 126
 ```
126 127
 
127
-The `stability` function returns the `stabilityData` dataframe, which contains the information shown in the plot:
128
+The `stability` function returns the `stabilityData` object, a `SummarizedExperiment` that contains an assay with the information shown in the plot:
129
+
130
+```{r stability-1-assay, results='hide', echo=TRUE, eval=FALSE}
131
+assay(stabilityData, 1)
132
+```
128 133
 
129 134
 ```{r stability-1-table, results='asis', echo=FALSE}
130
-kable(stabilityData) %>%
135
+data <- assay(stabilityData, 1)
136
+kable(data) %>%
131 137
   kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
132 138
 ```
133 139
 
... ...
@@ -157,12 +163,17 @@ For instance, running a quality analysis for the two metrics of `rnaMetrics` dat
157 163
 qualityData = quality(rnaMetrics, k = 4)
158 164
 ```
159 165
 
160
-The data of the first plot titled as "*Qual. Indices for k=4 across metrics*" according to *Silhouette avg. width*, is stored in *Avg_Silhouette_Width* column from `qualityData` dataframe. The other three plots titled by their metric name display the input rows grouped by colours for each cluster, along with their Silhouette width scores.
166
+The data of the first plot titled as "*Qual. Indices for k=4 across metrics*" according to *Silhouette avg. width*, is stored in *Avg_Silhouette_Width* column from the first assay of the `SummarizedExperiment`, `qualityData`. The other three plots titled by their metric name display the input rows grouped by colours for each cluster, along with their Silhouette width scores.
161 167
 
162 168
 The variable `qualityData` contains information about the clusters of each metric: The average silhouette width per cluster, the overall average sihouette width (taking into account all the clusters) and the number of individuals per cluster:
163 169
 
170
+```{r quality-1-assay, results='hide', eval=FALSE, echo=TRUE}
171
+assay(qualityData,1)
172
+```
173
+
164 174
 ```{r quality-1-table, results='asis', echo=FALSE}
165
-kable(qualityData) %>%
175
+data <- assay(qualityData,1)
176
+kable(data) %>%
166 177
   kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
167 178
   scroll_box(width = "100%")
168 179
 ```
... ...
@@ -180,7 +191,7 @@ qualityRangeData = qualityRange(rnaMetrics, k.range)
180 191
 
181 192
 The `qualityRange` function also returns two kind of plots, as seen in [Stability range](#sec:stabilityrange) section. One for each `k` in the `k.range`, showing the quality indices (goodness of the classification) across the metrics, and a second type of plot to show each metric with its respective quality index in each `k` value.
182 193
 
183
-The `qualityRangeData` object returned by `qualityRange` is an array of dataframes, whose size is `diff(k.range)+1`. In the example shown above, the size of `qualityRangeData` is 3, since the array length would contain the dataframes from `k=4` to `k=6`.
194
+The `qualityRangeData` object returned by `qualityRange` is list of `SummarizedExperiment`, whose size is `diff(k.range)+1`. In the example shown above, the size of `qualityRangeData` is 3, since the array length would contain the dataframes from `k=4` to `k=6`.
184 195
 
185 196
 ```{r quality-range-2, eval=TRUE, echo=TRUE}
186 197
 diff(k.range)+1
... ...
@@ -190,13 +201,16 @@ length(qualityRangeData)
190 201
 The user can access a specific dataframe for a given `k` value in three different ways: by dollar notation, brackets notation or using our wrapper method `getDataQualityRange`. For instance, if the user wishes to retrieve the dataframe which contains information of `k=5`,  being the `k.range` [4,6]:
191 202
 
192 203
 ```{r quality-range-3, eval=FALSE, echo=TRUE}
193
-k5DataFrame = qualityRangeData$k_5
194
-k5DataFrame = qualityRangeData[["k_5"]]
195
-k5DataFrame = getDataQualityRange(qualityRangeData, 5)
204
+k5Data = qualityRangeData$k_5
205
+k5Data = qualityRangeData[["k_5"]]
206
+k5Data = getDataQualityRange(qualityRangeData, 5)
207
+assay(k5Data, 1)
196 208
 ```
197 209
 
210
+
198 211
 ```{r quality-range-table, results='asis', echo=FALSE}
199
-kable(qualityRangeData$k_5) %>%
212
+data <- assay(qualityRangeData$k_5, 1)
213
+kable(data) %>%
200 214
   kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
201 215
   scroll_box(width = "100%", height = "150px")
202 216
 ```