Browse code

Prototyped compareSamples function

Jaehyun Joo authored on 02/04/2023 18:38:22
Showing 5 changed files

... ...
@@ -7,6 +7,7 @@ S3method(summary,reduced.pca)
7 7
 S3method(summary,reduced.plsda)
8 8
 S3method(summary,reduced.tsne)
9 9
 export(clusterFeatures)
10
+export(compareSamples)
10 11
 export(imputeIntensity)
11 12
 export(imputeKNN)
12 13
 export(normalizeIntensity)
... ...
@@ -47,7 +47,7 @@
47 47
 ##'
48 48
 ##' @param x A \linkS4class{SummarizedExperiment} object.
49 49
 ##' @param i A string or integer value specifying which assay values to use.
50
-##' @param rtime_var A string specifying the names of variable containing a
50
+##' @param rtime_var A string specifying the name of variable containing a
51 51
 ##'   numeric vector of retention times in `rowData(x)`.
52 52
 ##' @param rt_cut A numeric value specifying a cut-off for the retention-time
53 53
 ##'   based feature grouping.
54 54
new file mode 100644
... ...
@@ -0,0 +1,138 @@
1
+##' Sample comparison
2
+##'
3
+##' Function to make a comparisons between two groups in study samples with a
4
+##' \linkS4class{SummarizedExperiment}.
5
+##'
6
+##' This function provides a simplified interface of fitting a linear model to
7
+##' make a comparison of interest using the [limma::lmFit], [limma::eBayes], and
8
+##' [limma::topTable] functions. For more flexible model specification (e.g.,
9
+##' interaction model, multi-level model), please use a standard workflow
10
+##' outlined in the `limma` package manual.
11
+##'
12
+##' @param x A \linkS4class{SummarizedExperiment} object.
13
+##' @param i A string or integer value specifying which assay values to use. The
14
+##'   assay is expected to contain log-transformed intensities.
15
+##' @param group A string specifying the name of variable containing a class
16
+##'   label of each sample in `colData(x)`.
17
+##' @param class1,class2 A string specifying the class label of samples to be
18
+##'   compared. Must be one of `group` levels. No need to be specified if
19
+##'   `group` has only two levels. This function evaluates the contrast: class2
20
+##'   - class1.
21
+##' @param covariates A vector indicating the names of variables to be included
22
+##'   in the model as covariates. The covariates must be found in `colData(x)`.
23
+##' @param confint A logical specifying whether 95% confidence intervals of
24
+##'   log-fold-change need to be reported. Alternatively, a numeric value
25
+##'   between zero and one specifying the confidence level required.
26
+##' @param number The maximum number of metabolic features to list.
27
+##' @param adjust.method A string specifying which p-value adjustment method to
28
+##'   use. Options, in increasing conservatism, include "none", "BH", "BY" and
29
+##'   "holm". See [p.adjust] for the complete list of options. A NULL value will
30
+##'   result in the default adjustment method, which is "BH".
31
+##' @param sort.by A string specifying which statistic to rank the metabolic
32
+##'   features by. Possible values for topTable are "logFC", "AveExpr", "t",
33
+##'   "P", "p", "B" or "none" (Permitted synonyms are "M" for "logFC", "A" or
34
+##'   "Amean" for "AveExpr", "T" for "t" and "p" for "P").
35
+##' @param resort.by A string specifying statistic to sort the selected
36
+##'   metabolic features by in the output. Possibilities are the same as for
37
+##'   sort.by.
38
+##' @param p.value A numeric value specifying a cut-off for adjusted p-values.
39
+##'   Only metabolic features with lower p-values are listed.
40
+##' @param fc A numeric value specifying a minimum fold-change to be required.
41
+##'   If specified, the function output only includes metabolic features with
42
+##'   absolute fold-change greater than `fc`.
43
+##' @param lfc A numeric value specifying a minimum log2-fold-change required,
44
+##'   equal to log2(fc). `fc` and `lfc` are alternative ways to specify a
45
+##'   fold-change cut-off and, if both are specified, then `fc` take precedence.
46
+##' @param ... Additional arguments passed to [limma::eBayes].
47
+##' @return A data.frame with a row for the metabolic features and the following
48
+##'   columns:
49
+##' * logFC: an estimate of log-fold-change corresponding to the contrast tested
50
+##' * CI.L: a left limit of confidence interval for `logFC` (if `confint` is
51
+##' enabled)
52
+##' * CI.R: a right limit of confidence interval for `logFC` (if `confint` is
53
+##' enabled)
54
+##' * AveExpr: an average log-expression/abundance of metabolic features
55
+##' * t: a moderated t-statistic
56
+##' * P.Value: a raw p-value
57
+##' * adj.P.Value: an adjusted p-value
58
+##' * B: a log-odds that the metabolic feature is differentially expressed
59
+##'
60
+##' @references
61
+##'
62
+##' Ritchie ME, Phipson B, Wu D, Hu Y, Law CW, Shi W, Smyth GK. limma powers
63
+##' differential expression analyses for RNA-sequencing and microarray studies.
64
+##' Nucleic Acids Res. 2015 Apr 20;43(7):e47. doi: 10.1093/nar/gkv007. Epub 2015
65
+##' Jan 20. PMID: 25605792; PMCID: PMC4402510.
66
+##'
67
+##' @seealso
68
+##'
69
+##' See [limma::lmFit], [limma::eBayes], and [limma::topTable] for underlying
70
+##' functions that do work.
71
+##'
72
+##' @examples
73
+##'
74
+##' data(faahko_se)
75
+##'
76
+##' compareSamples(faahko_se, i = "knn_vsn", group = "sample_group", number = 5)
77
+##'
78
+##' @export
79
+compareSamples <- function(x, i, group,
80
+                           class1, class2, covariates = NULL, confint = TRUE,
81
+                           number = nrow(x), adjust.method = "BH",
82
+                           sort.by = "B", resort.by = NULL,
83
+                           p.value = 1, fc = NULL, lfc = NULL, ...) {
84
+  if (!is(x, "SummarizedExperiment")) {
85
+    stop("`x` must be a SummarizedExperiment object.")
86
+  }
87
+  ## Meta data describing the study samples
88
+  cdat <- colData(x)
89
+  ## Assert `group` is a column of colData(x)
90
+  if (!any(colnames(colData(x)) == group)) {
91
+    stop("Variable `", group, "` is not found in `colData(x)`." )
92
+  }
93
+  ## Convert `group` into a factor variable
94
+  if (!is.factor(cdat[[group]])) {
95
+    cdat[[group]] <- as.factor(cdat[[group]])
96
+  }
97
+  ## Two-level group does not have to specify class levels; instead, infer a
98
+  ## contrast to test.
99
+  if (any(missing(class1), missing(class2))) {
100
+    if (length(levels(cdat[[group]])) > 2) {
101
+      stop("`", group, "` has more than two levels.",
102
+           " Please specify class1 and class2 arguments.")
103
+    } else if (missing(class1) && !missing(class2)) {
104
+      class1 <- setdiff(levels(cdat[[group]]), class2)
105
+    } else if (!missing(class1) && missing(class2)) {
106
+      class2 <- setdiff(levels(cdat[[group]]), class1)
107
+    } else {
108
+      class1 <- levels(cdat[[group]])[1]
109
+      class2 <- levels(cdat[[group]])[2]
110
+    }
111
+  }
112
+  if (!all(c(class1, class2) %in% levels(cdat[[group]]))) {
113
+    stop(setdiff(c(class1, class2), levels(cdat[[group]])),
114
+         " is not the levels of '", group, "'")
115
+  }
116
+  message(paste0("Contrast: ", class2, " - ", class1))
117
+  ## Check covariates are found in cdat
118
+  if (!all(covariates %in% colnames(cdat))) {
119
+    stop("`colData(x)` does not have a column(s): ",
120
+         paste(setdiff(covariates, colnames(cdat)), collapse = ", "))
121
+  }
122
+  ## Model formula
123
+  fm <- as.formula(paste(c("~ 0", group, covariates), collapse = " + "))
124
+  ## Design matrix (group mean specification)
125
+  dm <- model.matrix(fm, data = cdat)
126
+  colnames(dm) <- sub(group, "", colnames(dm))
127
+  contrast <- limma::makeContrasts(contrasts = paste(class2, class1, sep = " - "), levels = dm)
128
+  ## Fit model
129
+  fit <- limma::lmFit(assay(x, i), design = dm)
130
+  fit <- limma::contrasts.fit(fit, contrasts = contrast)
131
+  fit <- limma::eBayes(fit, ...)
132
+  ## Output
133
+  limma::topTable(
134
+           fit, number = number, adjust.method = adjust.method,
135
+           sort.by = sort.by, resort.by = resort.by, p.value = p.value,
136
+           fc = fc, lfc = lfc, confint = confint
137
+         )
138
+}
... ...
@@ -24,7 +24,7 @@ clusterFeatures(
24 24
 
25 25
 \item{i}{A string or integer value specifying which assay values to use.}
26 26
 
27
-\item{rtime_var}{A string specifying the names of variable containing a
27
+\item{rtime_var}{A string specifying the name of variable containing a
28 28
 numeric vector of retention times in \code{rowData(x)}.}
29 29
 
30 30
 \item{rt_cut}{A numeric value specifying a cut-off for the retention-time
31 31
new file mode 100644
... ...
@@ -0,0 +1,120 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/compareSamples.R
3
+\name{compareSamples}
4
+\alias{compareSamples}
5
+\title{Sample comparison}
6
+\usage{
7
+compareSamples(
8
+  x,
9
+  i,
10
+  group,
11
+  class1,
12
+  class2,
13
+  covariates = NULL,
14
+  confint = FALSE,
15
+  number = nrow(x),
16
+  adjust.method = "BH",
17
+  sort.by = "B",
18
+  resort.by = NULL,
19
+  p.value = 1,
20
+  fc = NULL,
21
+  lfc = NULL,
22
+  ...
23
+)
24
+}
25
+\arguments{
26
+\item{x}{A \linkS4class{SummarizedExperiment} object.}
27
+
28
+\item{i}{A string or integer value specifying which assay values to use. The
29
+assay is expected to contain log-transformed intensities.}
30
+
31
+\item{group}{A string specifying the name of variable containing a class
32
+label of each sample in \code{colData(x)}.}
33
+
34
+\item{class1, class2}{A string specifying the class label of samples to be
35
+compared. Must be one of \code{group} levels. No need to be specified if
36
+\code{group} has only two levels. This function evaluates the contrast: class2
37
+\itemize{
38
+\item class1.
39
+}}
40
+
41
+\item{covariates}{A vector indicating the names of variables to be included
42
+in the model as covariates. The covariates must be found in \code{colData(x)}.}
43
+
44
+\item{confint}{A logical specifying whether 95\% confidence intervals of
45
+log-fold-change need to be reported. Alternatively, a numeric value
46
+between zero and one specifying the confidence level required.}
47
+
48
+\item{number}{The maximum number of metabolic features to list.}
49
+
50
+\item{adjust.method}{A string specifying which p-value adjustment method to
51
+use. Options, in increasing conservatism, include "none", "BH", "BY" and
52
+"holm". See \link{p.adjust} for the complete list of options. A NULL value will
53
+result in the default adjustment method, which is "BH".}
54
+
55
+\item{sort.by}{A string specifying which statistic to rank the metabolic
56
+features by. Possible values for topTable are "logFC", "AveExpr", "t",
57
+"P", "p", "B" or "none" (Permitted synonyms are "M" for "logFC", "A" or
58
+"Amean" for "AveExpr", "T" for "t" and "p" for "P").}
59
+
60
+\item{resort.by}{A string specifying statistic to sort the selected
61
+metabolic features by in the output. Possibilities are the same as for
62
+sort.by.}
63
+
64
+\item{p.value}{A numeric value specifying a cut-off for adjusted p-values.
65
+Only metabolic features with lower p-values are listed.}
66
+
67
+\item{fc}{A numeric value specifying a minimum fold-change to be required.
68
+If specified, the function output only includes metabolic features with
69
+absolute fold-change greater than \code{fc}.}
70
+
71
+\item{lfc}{A numeric value specifying a minimum log2-fold-change required,
72
+equal to log2(fc). \code{fc} and \code{lfc} are alternative ways to specify a
73
+fold-change cut-off and, if both are specified, then \code{fc} take precedence.}
74
+
75
+\item{...}{Additional arguments passed to \link[limma:ebayes]{limma::eBayes}.}
76
+}
77
+\value{
78
+A data.frame with a row for the metabolic features and the following
79
+columns:
80
+\itemize{
81
+\item logFC: an estimate of log-fold-change corresponding to the contrast tested
82
+\item CI.L: a left limit of confidence interval for \code{logFC} (if \code{confint} is
83
+enabled)
84
+\item CI.R: a right limit of confidence interval for \code{logFC} (if \code{confint} is
85
+enabled)
86
+\item AveExpr: an average log-expression/abundance of metabolic features
87
+\item t: a moderated t-statistic
88
+\item P.Value: a raw p-value
89
+\item adj.P.Value: an adjusted p-value
90
+\item B: a log-odds that the metabolic feature is differentially expressed
91
+}
92
+}
93
+\description{
94
+Function to make a comparisons between two groups in study samples with a
95
+\linkS4class{SummarizedExperiment}.
96
+}
97
+\details{
98
+This function provides a simplified interface of fitting a linear model to
99
+make a comparison of interest using the \link[limma:lmFit]{limma::lmFit}, \link[limma:ebayes]{limma::eBayes}, and
100
+\link[limma:toptable]{limma::topTable} functions. For more flexible model specification (e.g.,
101
+interaction model, multi-level model), please use a standard workflow
102
+outlined in the \code{limma} package manual.
103
+}
104
+\examples{
105
+
106
+data(faahko_se)
107
+
108
+compareSamples(faahko_se, i = "knn_vsn", group = "sample_group", number = 5)
109
+
110
+}
111
+\references{
112
+Ritchie ME, Phipson B, Wu D, Hu Y, Law CW, Shi W, Smyth GK. limma powers
113
+differential expression analyses for RNA-sequencing and microarray studies.
114
+Nucleic Acids Res. 2015 Apr 20;43(7):e47. doi: 10.1093/nar/gkv007. Epub 2015
115
+Jan 20. PMID: 25605792; PMCID: PMC4402510.
116
+}
117
+\seealso{
118
+See \link[limma:lmFit]{limma::lmFit}, \link[limma:ebayes]{limma::eBayes}, and \link[limma:toptable]{limma::topTable} for underlying
119
+functions that do work.
120
+}