Bioconductor Code: evaluomeR

Browse code

All methods depend on as input/output object.

Neobernad authored on 16/02/2019 13:03:09
Showing 17 changed files

DESCRIPTION index 1720e5b..e151ab3 100755
NAMESPACE index 4b3ba2d..72622bc 100755
NEWS index 9016508..c7999b3 100755
R/correlation.R index 938a94f..37ee3f9 100755
R/helpers.R index fdbaa9c..79f7227 100755
R/qualityIndices.R index cd2e557..ebba787 100755
R/stabilityIndex.R index 9ef312f..b02b5b5 100755
inst/REFERENCES.bib index 761f3c8..10e91a8 100755
man/correlations.Rd index 82659db..668790f 100755
man/getDataQualityRange.Rd index ef29c27..3594975 100755
man/loadSample.Rd index 4f384da..41a07cb 100755
man/quality.Rd index e5ca5b7..1cd02d3 100755
man/qualityRange.Rd index 5b5362e..2f1d06a 100755
man/seToDataFrame.Rd index 49ff2cf..0000000
man/stability.Rd index 768129b..acb5ecc 100755
man/stabilityRange.Rd index 8625eb6..524715f 100755
vignettes/manual.Rmd index d7d0b99..2d1164d 100755

History View file @ 1374c99

@@ -2,7 +2,7 @@ Package: evaluomeR
                      Type: Package
                      Title: Evaluation of Bioinformatics Metrics
                      URL: http://sele.inf.um.es/evaluome/index.html
                     -Version: 0.99.2
                     +Version: 0.99.3
                      Author: José Antonio Bernabé-Díaz [aut, cre], Manuel Franco [aut], Juana-María Vivo [aut], Manuel Quesada-Martínez [aut], Astrid Duque-Ramos [aut], Jesualdo Tomás Fernández-Breis [aut].
                      Maintainer: José Antonio Bernabé Díaz <[email protected]>
                      Description: Evaluating the reliability of your own metrics
@@ -12,7 +12,7 @@ Description: Evaluating the reliability of your own metrics
                      License: GPL-3
                      Encoding: UTF-8
                      LazyData: true
                     -Depends: R (>= 3.6.0)
                     +Depends: R (>= 3.6.0), SummarizedExperiment
                      Imports:
                          fpc (>= 2.1-11.1),
                          cluster (>= 2.0.7-1),
@@ -21,8 +21,6 @@ Imports:
                          graphics,
                          stats,
                          utils,
                     -    SummarizedExperiment,
                     -    airway,
                          Rdpack
                      Suggests: BiocStyle, knitr, rmarkdown, kableExtra, magrittr
                      VignetteBuilder: knitr

NAMESPACE

History View file @ 1374c99

@@ -13,7 +13,7 @@ importFrom("utils", "capture.output")
                      importFrom("utils", "read.csv")
                      importFrom("SummarizedExperiment", "assays")
                      importFrom("SummarizedExperiment", "assay")
                     -import(airway)
                     +importFrom("SummarizedExperiment", "SummarizedExperiment")
                      export(stability)
                      export(stabilityRange)
@@ -22,4 +22,3 @@ export(qualityRange)
                      export(correlations)
                      export(loadSample)
                      export(getDataQualityRange)
                     -export(seToDataFrame)

NEWS

History View file @ 1374c99

@@ -1,3 +1,6 @@
                     +Changes in version 0.99.3 (2019-04-16)
                     ++ All methods depend on `SummarizedExperiment` as input/output object.
+                    +
                      Changes in version 0.99.2 (2019-04-15)
                      + SummarizedExperiment can be now processed via `seToDataFrame` method. Adding SummarizedExperiment and airway dependencies.

R/correlation.R

History View file @ 1374c99

@@ -25,7 +25,7 @@ correlations <- function(data, margins=c(0,10,9,11), getImages=TRUE,
                        if (!is.null(label)) {
                          isString(label)
+                       }
+                    -
                     +  data <- getAssay(data, 1)
                        cur.env <- new.env()
                        MatCorr <- cor(data[,2:length(data)])

R/helpers.R

History View file @ 1374c99

@@ -6,7 +6,7 @@
                      #'
                      #' @param descriptor Sample file to load: "ont-metrics", "rna-metrics" or "biopathways-metrics".
                      #'
                     -#' @return The dataset specified via \code{descriptor} as a dataframe.
                     +#' @return The \code{\link{SummarizedExperiment}} specified via \code{descriptor}.
                      #'
                      #' @examples
                      #' # Using example data from our package
@@ -16,7 +16,8 @@ loadSample <- function(descriptor) {
                        samples <- c('ont-metrics','rna-metrics','biopathways-metrics')
                        if (is.element(descriptor, samples)) {
                          dataFrame <- read.csv(file=system.file('extdata',descriptor, package="evaluomeR"), header=TRUE);
                     -    return(dataFrame)
                     +    se <- createSE(dataFrame)
                     +    return(se)
                        } else {
                          stop("Invalid descriptor")
+                       }
@@ -26,20 +27,20 @@ loadSample <- function(descriptor) {
                      #' @name getDataQualityRange
                      #' @aliases getDataQualityRange
                      #' @description
                     -#' This method is a wrapper to retrieve a specific dataframe given a \code{k} value from
                     +#' This method is a wrapper to retrieve a specific \code{\link{SummarizedExperiment}} given a \code{k} value from
                      #' the object returned by \code{\link{qualityRange}} function.
                      #'
                      #' @param data The object returned by \code{\link{qualityRange}} function.
                      #' @param k The desired \code{k} cluster.
                      #'
                     -#' @return The dataframe that contains information about the selected \code{k} cluster.
                     +#' @return The \code{\link{SummarizedExperiment}} that contains information about the selected \code{k} cluster.
                      #'
                      #' @examples
                      #' # Using example data from our package
                      #' metrics = loadSample("ont-metrics")
                      #' qualityRangeData <- qualityRange(data=metrics, k.range=c(3,5), getImages = FALSE)
                      #' # Getting dataframe that contains information about k=5
                     -#' k5DataFrame = getDataQualityRange(qualityRangeData, 5)
                     +#' k5Data = getDataQualityRange(qualityRangeData, 5)
                      #'
                      getDataQualityRange <- function(data, k) {
                        dataNames = names(data)
@@ -50,6 +51,7 @@ getDataQualityRange <- function(data, k) {
                        if (k >= kValues[1] && k <= kValues[kValues.length]) {
                          column = paste("k_", k, sep="")
+                    +
                          return(data[[column]])
                        } else {
                          error=paste("Selected k (",k,") is not in the range of k.range ["
@@ -58,45 +60,6 @@ getDataQualityRange <- function(data, k) {
+                       }
+                     }
                     -#' @title SummarizedExperiment to Dataframe
                     -#' @name seToDataFrame
                     -#' @aliases seToDataFrame
                     -#' @description
                     -#' This method is a wrapper to transform a SummarizedExperiment object to a
                     -#' Dataframe processable in our methods.
                     -#'
                     -#' @param SummarizedExperiment A \code{SummarizedExperiment} object
                     -#' (see \code{\link{SummarizedExperiment}}).
                     -#'
                     -#' @return The dataframe that contains information of the first
                     -#' assay in \code{SummarizedExperiment}.
                     -#'
                     -#' @examples
                     -#' # Using example data from airway package
                     -#' library(airway)
                     -#' data(airway)
                     -#' airwayData = seToDataFrame(airway)
                     -#' airwayData = airwayData[1:10000,1:4]
                     -#' stability(airwayData, bs = 20, getImages=FALSE)
                     -#' correlations(airwayData, getImages=FALSE)
                     -#'
                     -seToDataFrame <- function(SummarizedExperiment) {
                     -  se=SummarizedExperiment
                     -  if (length(assays(se)) == 0) {
                     -    stop("SummarizedExperiment has no assays, length is 0")
                     -  }
                     -  test = assay(se,1)
                     -  Datasets <- NULL
                     -  if (is.null(rownames(test))) {
                     -    Datasets <- paste("Dataset_", c(1:length(test[,1])), sep="")
                     -  } else {
                     -    Datasets <- rownames(test)
                     -  }
+                    -
                     -  test <- data.frame(Datasets,test)
                     -  return(test)
                     -}
+                    -
                      #####################
                      ## Private methods ##
                      #####################
@@ -141,3 +104,61 @@ checkDirectory <- function(path) {
+                       }
                        return(path)
+                     }
+                    +
                     +getAssay <- function(SummarizedExperiment, position) {
                     +  se=SummarizedExperiment
                     +  se.length <- length(assays(se))
                     +  if (se.length == 0) {
                     +    stop("SummarizedExperiment has no assays, length is 0")
                     +  }
                     +  if (position > se.length) {
                     +    error <- paste("SummarizedExperiment has no assay in position ",
                     +                   position, sep="")
                     +    stop(error)
                     +  }
                     +  test = assay(se, position)
                     +  # Datasets <- test[,1]
                     +  # if (is.null(rownames(test))) {
                     +  #   Datasets <- paste("Dataset_", c(1:length(test[,1])), sep="")
                     +  # } else {
                     +  #   Datasets <- rownames(test)
                     +  # }
+                    +
                     +  # test <- data.frame(Datasets,test)
                     +  test <- data.frame(test)
                     +  names(test) <- colnames(test)
                     +  return(test)
                     +}
+                    +
                     +# data: One dataframe, thus one assay
                     +createSE <- function(data) {
                     +  nrows <- nrow(data); ncols <- ncol(data)
                     +  counts <- data.matrix(data)
                     +  colnames(counts) <- NULL
                     +  colData <- DataFrame(metrics=colnames(data),
                     +                       row.names=colnames(data))
                     +  se <- SummarizedExperiment(assays=SimpleList(counts),
                     +                              colData=colData)
                     +  return(se)
                     +}
+                    +
                     +# data: A list of dataframes
                     +createSEList <- function(data) {
                     +  if (!is.list(data)) {
                     +    stop("Input variable is not a list")
                     +  }
                     +  if (length(data) == 0) {
                     +    stop("Input variable is an empty list")
                     +  }
                     +  length = length(names(data))
                     +  seList <- list()
                     +  for (i in 1:length) {
                     +    cur.data <- data[[i]]
                     +    dataMatrix <- suppressWarnings(data.matrix(cur.data))
                     +    dataMatrix[,1] <- cur.data$Metric
                     +    se <- createSE(dataMatrix)
                     +    seList <- c(seList, se)
                     +  }
                     +  names(seList) <- names(data)
                     +  return(seList)
                     +}

R/qualityIndices.R

History View file @ 1374c99

@@ -23,7 +23,7 @@
                      #'
                      #' @inheritParams stability
                      #'
                     -#' @return A dataframe containing the silhouette width measurements and
                     +#' @return A \code{\link{SummarizedExperiment}} containing the silhouette width measurements and
                      #' cluster sizes for cluster \code{k}.
                      #'
                      #' @examples
@@ -43,6 +43,7 @@ quality <- function(data, k=5, getImages=TRUE,
                        if (!is.null(label)) {
                          isString(label)
+                       }
                     +  data <- getAssay(data, 1)
                        cur.env <- new.env()
                        suppressWarnings(
@@ -59,7 +60,8 @@ quality <- function(data, k=5, getImages=TRUE,
                          suppressWarnings(
                            runSilhouetteIMG(data, k, label, path, cur.env))
+                       }
                     -  return(silhouetteDataFrame)
                     +  se <- createSE(silhouetteDataFrame)
                     +  return(se)
+                     }
@@ -92,7 +94,7 @@ quality <- function(data, k=5, getImages=TRUE,
                      #' whilst the second one, \code{k.range[2]}, as the higher. Both values must be
                      #' contained in [2,15] range.
                      #'
                     -#' @return A list of dataframes containing the silhouette width measurements and
                     +#' @return A list of \code{\link{SummarizedExperiment}} containing the silhouette width measurements and
                      #' cluster sizes from \code{k.range[1]} to \code{k.range[2]}. The position on the list matches
                      #' with the k-value used in that dataframe. For instance, position 5
                      #' represents the dataframe with k = 5.
@@ -125,7 +127,7 @@ qualityRange <- function(data, k.range=c(3,5), getImages=TRUE,
                        if (!is.null(label)) {
                          isString(label)
+                       }
+                    -
                     +  data <- getAssay(data, 1)
                        cur.env <- new.env()
                        suppressWarnings(
@@ -144,7 +146,8 @@ qualityRange <- function(data, k.range=c(3,5), getImages=TRUE,
                            runQualityIndicesSilhouetteMetric_IMG(k.min = k.min, k.max = k.max,
                                                                  label, path, cur.env))
+                       }
                     -  return(silhouetteData)
                     +  seList <- createSEList(silhouetteData)
                     +  return(seList)
+                     }
                      runQualityIndicesSilhouette <- function(data, k.min, k.max, bs, env) {

R/stabilityIndex.R

History View file @ 1374c99

@@ -15,9 +15,11 @@
                      #' \item Highly Stable: ]0.85, 1].
                      #' }
                      #'
                     -#' @param data A matrix. The first row is the header. The first
                     -#' column of the header is the ID or name of the instance of the dataset
                     -#' (e.g., ontology, pathway, etc.) on which the metrics are measured.
                     +#' @param data A \code{\link{SummarizedExperiment}}.
                     +#' The SummarizedExperiment must contain an assay with the following structure:
                     +#' A valid header with names. The first  column of the header is the ID or name
                     +#' of the instance of the dataset (e.g., ontology, pathway, etc.) on which the
                     +#' metrics are measured.
                      #' The other columns of the header contains the names of the metrics.
                      #' The rows contains the measurements of the metrics for each instance in the dataset.
                      #' @param k Positive integer. Number of clusters between [2,15] range.
@@ -26,15 +28,15 @@
                      #' @param label String. If not NULL, the label will appear on the title of the plots.
                      #' @param path String. Path to a valid directory where plots are saved.
                      #'
                     -#' @return A dataframe containing the stability measurements and
                     -#' means for 1 to k clusters.
                     +#' @return A \code{\link{SummarizedExperiment}},
                     +#' containing an assay with the stability measurements and means for 1 to k clusters.
                      #'
                      #' @examples
                      #' # Using example data from our package
                     -#' metrics = loadSample("ont-metrics")
                     -#' result = stability(data=metrics, k=4, getImages=TRUE)
                     -#' result = stability(metrics, k=6, getImages=FALSE)
                     -#' result = stability(metrics, k=6, getImages=TRUE, label="Experiment 1:")
                     +#' metrics <- loadSample("ont-metrics")
                     +#' result <- stability(data=metrics, k=4, getImages=TRUE)
                     +#' result <- stability(metrics, k=6, getImages=FALSE)
                     +#' result <- stability(metrics, k=6, getImages=TRUE, label="Experiment 1:")
                      #'
                      #' @references
                      #' \insertRef{milligan1996measuring}{evaluomeR}
@@ -45,6 +47,8 @@
                      stability <- function(data, k=5, bs=100, getImages=TRUE,
                                            label=NULL, path=NULL) {
                     +  data <- getAssay(data, 1)
+                    +
                        checkKValue(k)
                        if (!is.null(label)) {
                          isString(label)
@@ -64,8 +68,8 @@ stability <- function(data, k=5, bs=100, getImages=TRUE,
                            runStabilityIndexK_IMG(bs, k.min = k, k.max = k,
                                                 label, path, cur.env))
+                       }
+                    -
                     -  return(stabilityDataFrame)
                     +  se <- createSE(stabilityDataFrame)
                     +  return(se)
+                     }
@@ -92,12 +96,13 @@ stability <- function(data, k=5, bs=100, getImages=TRUE,
                      #' whilst the second one, \code{k.range[2]}, as the higher. Both values must be
                      #' contained in [2,15] range.
                      #'
                     -#' @return A dataframe containing the stability measurements and
                     +#' @return A \code{\link{SummarizedExperiment}} containing the stability measurements and
                      #' means for 1 to k clusters.
                      #'
                      #' @examples
                      #' # Using example data from our package
                     -#' metrics = loadSample("ont-metrics")
                     +#' metrics <- loadSample("ont-metrics")
                     +#' result <- stabilityRange(metrics, k.range=c(2,3))
                      #'
                      #' @references
                      #' \insertRef{milligan1996measuring}{evaluomeR}
@@ -121,6 +126,7 @@ stabilityRange <- function(data, k.range=c(2,15), bs=100,
                        if (!is.null(label)) {
                          isString(label)
+                       }
                     +  data <- getAssay(data, 1)
                        cur.env <- new.env()
                        suppressWarnings(
@@ -139,7 +145,8 @@ stabilityRange <- function(data, k.range=c(2,15), bs=100,
                            runStabilityIndexMetric_IMG(bs, k.min=k.min, k.max=k.max,
                                                      label, path, cur.env))
+                       }
                     -  return(stabilityDataFrame)
                     +  se <- createSE(stabilityDataFrame)
                     +  return(se)
+                     }
                      runStabilityIndex <- function(data, k.min, k.max, bs, env) {

inst/REFERENCES.bib

History View file @ 1374c99

@@ -9,6 +9,13 @@
                        publisher={Springer}
+                     }
                     +@Manual{summarizedExperiment,
                     +  title = {SummarizedExperiment: SummarizedExperiment container},
                     +  author = {Martin Morgan and Valerie Obenchain and Jim Hester and Hervé Pagès},
                     +  year = {2018},
                     +  note = {R package version 1.12.0},
                     +}
+                    +
                      @article{jaccard1901distribution,
                        title={Distribution de la flore alpine dans le bassin des Dranses et dans quelques r{\'e}gions voisines},
                        author={Jaccard, Paul},

man/correlations.Rd

History View file @ 1374c99

@@ -8,9 +8,11 @@ correlations(data, margins = c(0, 10, 9, 11), getImages = TRUE,
                        label = NULL, path = NULL)
+                     }
                      \arguments{
                     -\item{data}{A matrix. The first row is the header. The first
                     -column of the header is the ID or name of the instance of the dataset
                     -(e.g., ontology, pathway, etc.) on which the metrics are measured.
                     +\item{data}{A \code{\link{SummarizedExperiment}}.
                     +The SummarizedExperiment must contain an assay with the following structure:
                     +A valid header with names. The first  column of the header is the ID or name
                     +of the instance of the dataset (e.g., ontology, pathway, etc.) on which the
                     +metrics are measured.
                      The other columns of the header contains the names of the metrics.
                      The rows contains the measurements of the metrics for each instance in the dataset.}

man/getDataQualityRange.Rd

History View file @ 1374c99

@@ -12,10 +12,10 @@ getDataQualityRange(data, k)
                      \item{k}{The desired \code{k} cluster.}
+                     }
                      \value{
                     -The dataframe that contains information about the selected \code{k} cluster.
                     +The \code{\link{SummarizedExperiment}} that contains information about the selected \code{k} cluster.
+                     }
                      \description{
                     -This method is a wrapper to retrieve a specific dataframe given a \code{k} value from
                     +This method is a wrapper to retrieve a specific \code{\link{SummarizedExperiment}} given a \code{k} value from
                      the object returned by \code{\link{qualityRange}} function.
+                     }
                      \examples{
@@ -23,6 +23,6 @@ the object returned by \code{\link{qualityRange}} function.
                      metrics = loadSample("ont-metrics")
                      qualityRangeData <- qualityRange(data=metrics, k.range=c(3,5), getImages = FALSE)
                      # Getting dataframe that contains information about k=5
                     -k5DataFrame = getDataQualityRange(qualityRangeData, 5)
                     +k5Data = getDataQualityRange(qualityRangeData, 5)
+                     }

man/loadSample.Rd

History View file @ 1374c99

@@ -10,7 +10,7 @@ loadSample(descriptor)
 \item{descriptor}{Sample file to load: "ont-metrics", "rna-metrics" or "biopathways-metrics".}
 }
 \value{
-The dataset specified via \code{descriptor} as a dataframe.
+The \code{\link{SummarizedExperiment}} specified via \code{descriptor}.
 }
 \description{
 This method is a wrapper to load sample input data located inside evaluomeR package.

man/quality.Rd

History View file @ 1374c99

@@ -7,9 +7,11 @@
                      quality(data, k = 5, getImages = TRUE, label = NULL, path = NULL)
+                     }
                      \arguments{
                     -\item{data}{A matrix. The first row is the header. The first
                     -column of the header is the ID or name of the instance of the dataset
                     -(e.g., ontology, pathway, etc.) on which the metrics are measured.
                     +\item{data}{A \code{\link{SummarizedExperiment}}.
                     +The SummarizedExperiment must contain an assay with the following structure:
                     +A valid header with names. The first  column of the header is the ID or name
                     +of the instance of the dataset (e.g., ontology, pathway, etc.) on which the
                     +metrics are measured.
                      The other columns of the header contains the names of the metrics.
                      The rows contains the measurements of the metrics for each instance in the dataset.}
@@ -22,7 +24,7 @@ The rows contains the measurements of the metrics for each instance in the datas
                      \item{path}{String. Path to a valid directory where plots are saved.}
+                     }
                      \value{
                     -A dataframe containing the silhouette width measurements and
                     +A \code{\link{SummarizedExperiment}} containing the silhouette width measurements and
                      cluster sizes for cluster \code{k}.
+                     }
                      \description{

man/qualityRange.Rd

History View file @ 1374c99

@@ -8,9 +8,11 @@ qualityRange(data, k.range = c(3, 5), getImages = TRUE, label = NULL,
                        path = NULL)
+                     }
                      \arguments{
                     -\item{data}{A matrix. The first row is the header. The first
                     -column of the header is the ID or name of the instance of the dataset
                     -(e.g., ontology, pathway, etc.) on which the metrics are measured.
                     +\item{data}{A \code{\link{SummarizedExperiment}}.
                     +The SummarizedExperiment must contain an assay with the following structure:
                     +A valid header with names. The first  column of the header is the ID or name
                     +of the instance of the dataset (e.g., ontology, pathway, etc.) on which the
                     +metrics are measured.
                      The other columns of the header contains the names of the metrics.
                      The rows contains the measurements of the metrics for each instance in the dataset.}
@@ -26,7 +28,7 @@ contained in [2,15] range.}
                      \item{path}{String. Path to a valid directory where plots are saved.}
+                     }
                      \value{
                     -A list of dataframes containing the silhouette width measurements and
                     +A list of \code{\link{SummarizedExperiment}} containing the silhouette width measurements and
                      cluster sizes from \code{k.range[1]} to \code{k.range[2]}. The position on the list matches
                      with the k-value used in that dataframe. For instance, position 5
                      represents the dataframe with k = 5.

man/seToDataFrame.Rd

History View file @ 1374c99

                     deleted file mode 100755
@@ -1,30 +0,0 @@
                     -% Generated by roxygen2: do not edit by hand
                     -% Please edit documentation in R/helpers.R
                     -\name{seToDataFrame}
                     -\alias{seToDataFrame}
                     -\title{SummarizedExperiment to Dataframe}
                     -\usage{
                     -seToDataFrame(SummarizedExperiment)
                     -}
                     -\arguments{
                     -\item{SummarizedExperiment}{A \code{SummarizedExperiment} object
                     -(see \code{\link{SummarizedExperiment}}).}
                     -}
                     -\value{
                     -The dataframe that contains information of the first
                     -assay in \code{SummarizedExperiment}.
                     -}
                     -\description{
                     -This method is a wrapper to transform a SummarizedExperiment object to a
                     -Dataframe processable in our methods.
                     -}
                     -\examples{
                     -# Using example data from airway package
                     -library(airway)
                     -data(airway)
                     -airwayData = seToDataFrame(airway)
                     -airwayData = airwayData[1:10000,1:4]
                     -stability(airwayData, bs = 20, getImages=FALSE)
                     -correlations(airwayData, getImages=FALSE)
+                    -
                     -}

man/stability.Rd

History View file @ 1374c99

@@ -8,9 +8,11 @@ stability(data, k = 5, bs = 100, getImages = TRUE, label = NULL,
                        path = NULL)
+                     }
                      \arguments{
                     -\item{data}{A matrix. The first row is the header. The first
                     -column of the header is the ID or name of the instance of the dataset
                     -(e.g., ontology, pathway, etc.) on which the metrics are measured.
                     +\item{data}{A \code{\link{SummarizedExperiment}}.
                     +The SummarizedExperiment must contain an assay with the following structure:
                     +A valid header with names. The first  column of the header is the ID or name
                     +of the instance of the dataset (e.g., ontology, pathway, etc.) on which the
                     +metrics are measured.
                      The other columns of the header contains the names of the metrics.
                      The rows contains the measurements of the metrics for each instance in the dataset.}
@@ -25,8 +27,8 @@ The rows contains the measurements of the metrics for each instance in the datas
                      \item{path}{String. Path to a valid directory where plots are saved.}
+                     }
                      \value{
                     -A dataframe containing the stability measurements and
                     -means for 1 to k clusters.
                     +A \code{\link{SummarizedExperiment}},
                     +containing an assay with the stability measurements and means for 1 to k clusters.
+                     }
                      \description{
                      This analysis permits to estimate whether the clustering is meaningfully
@@ -44,10 +46,10 @@ having the following meaning:
+                     }
                      \examples{
                      # Using example data from our package
                     -metrics = loadSample("ont-metrics")
                     -result = stability(data=metrics, k=4, getImages=TRUE)
                     -result = stability(metrics, k=6, getImages=FALSE)
                     -result = stability(metrics, k=6, getImages=TRUE, label="Experiment 1:")
                     +metrics <- loadSample("ont-metrics")
                     +result <- stability(data=metrics, k=4, getImages=TRUE)
                     +result <- stability(metrics, k=6, getImages=FALSE)
                     +result <- stability(metrics, k=6, getImages=TRUE, label="Experiment 1:")
+                     }
                      \references{

man/stabilityRange.Rd

History View file @ 1374c99

@@ -8,9 +8,11 @@ stabilityRange(data, k.range = c(2, 15), bs = 100, getImages = TRUE,
                        label = NULL, path = NULL)
+                     }
                      \arguments{
                     -\item{data}{A matrix. The first row is the header. The first
                     -column of the header is the ID or name of the instance of the dataset
                     -(e.g., ontology, pathway, etc.) on which the metrics are measured.
                     +\item{data}{A \code{\link{SummarizedExperiment}}.
                     +The SummarizedExperiment must contain an assay with the following structure:
                     +A valid header with names. The first  column of the header is the ID or name
                     +of the instance of the dataset (e.g., ontology, pathway, etc.) on which the
                     +metrics are measured.
                      The other columns of the header contains the names of the metrics.
                      The rows contains the measurements of the metrics for each instance in the dataset.}
@@ -28,7 +30,7 @@ contained in [2,15] range.}
                      \item{path}{String. Path to a valid directory where plots are saved.}
+                     }
                      \value{
                     -A dataframe containing the stability measurements and
                     +A \code{\link{SummarizedExperiment}} containing the stability measurements and
                      means for 1 to k clusters.
+                     }
                      \description{
@@ -47,7 +49,8 @@ having the following meaning:
+                     }
                      \examples{
                      # Using example data from our package
                     -metrics = loadSample("ont-metrics")
                     +metrics <- loadSample("ont-metrics")
                     +result <- stabilityRange(metrics, k.range=c(2,3))
+                     }
                      \references{

vignettes/manual.Rmd

History View file @ 1374c99

@@ -29,10 +29,11 @@ vignette: >
                        %\VignetteEncoding{UTF-8}
                      ---
                     -```{r style, echo = FALSE, results = 'asis'}
                     +```{r style, include=FALSE, results='hide'}
                      BiocStyle::markdown()
                      library(kableExtra)
                      library(magrittr)
                     +library(SummarizedExperiment)
                      ```
@@ -68,12 +69,12 @@ BiocManager::install("evaluomeR")
                      ## Prerequisites ##
                     -The package **evaluomeR** depends on the following CRAN packages for the calculus: *fpc* [@fpc2018], *cluster* [@cluster2018], *corrplot* [@corrplot2017]. Moreover, this package also depends on *grDevices*, *graphics*, *stats* and *utils* from R Core [@rcore] for plotting.
                     +The package **evaluomeR** depends on the following CRAN packages for the calculus: *fpc* [@fpc2018], *cluster* [@cluster2018], *corrplot* [@corrplot2017]. Moreover, this package also depends on *grDevices*, *graphics*, *stats* and *utils* from R Core [@rcore] for plotting and on the Bioconductor package *SummarizedExperiment* [@summarizedExperiment] for input/output data.
                      # Using evaluomeR #
                     -## Creating an input dataframe ##
                     -The input dataframe must follow some structural rules, see Table \@ref(tab:table). The first row is the header. The first column of the header is the ID or name of the instance of the dataset (e.g., ontology, pathway, etc.) on which the metrics are measured. The other  columns of the header contains the names of the metrics. The rows contains the measurements of the metrics for each instance in the dataset.
                     +## Creating an input SummarizedExperiment ##
                     +The input is a `SummarizedExperiment` object. The assay contained in `SummarizedExperiment` must follow a certain structure, see Table \@ref(tab:table): A valid header must be specified. The first column of the header is the ID or name of the instance of the dataset (e.g., ontology, pathway, etc.) on which the metrics are measured. The other  columns of the header contains the names of the metrics. The rows contains the measurements of the metrics for each instance in the dataset.
                      ID        | MetricNameA | MetricNameB | MetricNameC | ... |
                      --------- | ----------- | ----------- | ----------- | --- |
@@ -81,7 +82,7 @@ instance1 | 1.2         | 6.4         | 0.5         | ... |
                      instance2 | 2.4         | 5.4         | 0.8         | ... |
                      instance3 | 1.9         | 8.9         | 1.1         | ... |
                     -: (\#tab:table) Example of an input dataframe for the **evaluomeR** package.
                     +: (\#tab:table) Example of an input assay from a `SummarizedExperiment` for the **evaluomeR** package.
                      ## Using input sample data from evaluomeR ##
@@ -104,7 +105,7 @@ biopathwaysMetrics <- loadSample("biopathways-metrics")
                      ## Correlations ##
                     -We provide the `correlations` function to evaluate the correlations among the metrics defined in the dataframe:
                     +We provide the `correlations` function to evaluate the correlations among the metrics defined in the `SummarizedExperiment`:
                      ```{r correlations-1, echo=TRUE}
                      library(evaluomeR)
@@ -124,10 +125,15 @@ The stability index analysis is performed by the `stability` function. For insta
                      stabilityData <- stability(rnaMetrics, k=2, bs = 100)
                      ```
                     -The `stability` function returns the `stabilityData` dataframe, which contains the information shown in the plot:
                     +The `stability` function returns the `stabilityData` object, a `SummarizedExperiment` that contains an assay with the information shown in the plot:
+                    +
                     +```{r stability-1-assay, results='hide', echo=TRUE, eval=FALSE}
                     +assay(stabilityData, 1)
                     +```
                      ```{r stability-1-table, results='asis', echo=FALSE}
                     -kable(stabilityData) %>%
                     +data <- assay(stabilityData, 1)
                     +kable(data) %>%
                        kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
                      ```
@@ -157,12 +163,17 @@ For instance, running a quality analysis for the two metrics of `rnaMetrics` dat
                      qualityData = quality(rnaMetrics, k = 4)
                      ```
                     -The data of the first plot titled as "*Qual. Indices for k=4 across metrics*" according to *Silhouette avg. width*, is stored in *Avg_Silhouette_Width* column from `qualityData` dataframe. The other three plots titled by their metric name display the input rows grouped by colours for each cluster, along with their Silhouette width scores.
                     +The data of the first plot titled as "*Qual. Indices for k=4 across metrics*" according to *Silhouette avg. width*, is stored in *Avg_Silhouette_Width* column from the first assay of the `SummarizedExperiment`, `qualityData`. The other three plots titled by their metric name display the input rows grouped by colours for each cluster, along with their Silhouette width scores.
                      The variable `qualityData` contains information about the clusters of each metric: The average silhouette width per cluster, the overall average sihouette width (taking into account all the clusters) and the number of individuals per cluster:
                     +```{r quality-1-assay, results='hide', eval=FALSE, echo=TRUE}
                     +assay(qualityData,1)
                     +```
+                    +
                      ```{r quality-1-table, results='asis', echo=FALSE}
                     -kable(qualityData) %>%
                     +data <- assay(qualityData,1)
                     +kable(data) %>%
                        kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
                        scroll_box(width = "100%")
                      ```
@@ -180,7 +191,7 @@ qualityRangeData = qualityRange(rnaMetrics, k.range)
                      The `qualityRange` function also returns two kind of plots, as seen in [Stability range](#sec:stabilityrange) section. One for each `k` in the `k.range`, showing the quality indices (goodness of the classification) across the metrics, and a second type of plot to show each metric with its respective quality index in each `k` value.
                     -The `qualityRangeData` object returned by `qualityRange` is an array of dataframes, whose size is `diff(k.range)+1`. In the example shown above, the size of `qualityRangeData` is 3, since the array length would contain the dataframes from `k=4` to `k=6`.
                     +The `qualityRangeData` object returned by `qualityRange` is list of `SummarizedExperiment`, whose size is `diff(k.range)+1`. In the example shown above, the size of `qualityRangeData` is 3, since the array length would contain the dataframes from `k=4` to `k=6`.
                      ```{r quality-range-2, eval=TRUE, echo=TRUE}
                      diff(k.range)+1
@@ -190,13 +201,16 @@ length(qualityRangeData)
                      The user can access a specific dataframe for a given `k` value in three different ways: by dollar notation, brackets notation or using our wrapper method `getDataQualityRange`. For instance, if the user wishes to retrieve the dataframe which contains information of `k=5`,  being the `k.range` [4,6]:
                      ```{r quality-range-3, eval=FALSE, echo=TRUE}
                     -k5DataFrame = qualityRangeData$k_5
                     -k5DataFrame = qualityRangeData[["k_5"]]
                     -k5DataFrame = getDataQualityRange(qualityRangeData, 5)
                     +k5Data = qualityRangeData$k_5
                     +k5Data = qualityRangeData[["k_5"]]
                     +k5Data = getDataQualityRange(qualityRangeData, 5)
                     +assay(k5Data, 1)
                      ```
+                    +
                      ```{r quality-range-table, results='asis', echo=FALSE}
                     -kable(qualityRangeData$k_5) %>%
                     +data <- assay(qualityRangeData$k_5, 1)
                     +kable(data) %>%
                        kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
                        scroll_box(width = "100%", height = "150px")
                      ```

...	...	@@ -10,7 +10,7 @@ loadSample(descriptor)
10	10	\item{descriptor}{Sample file to load: "ont-metrics", "rna-metrics" or "biopathways-metrics".}
11	11	}
12	12	\value{
13		-The dataset specified via \code{descriptor} as a dataframe.
	13	+The \code{\link{SummarizedExperiment}} specified via \code{descriptor}.
14	14	}
15	15	\description{
16	16	This method is a wrapper to load sample input data located inside evaluomeR package.