... | ... |
@@ -47,7 +47,7 @@ |
47 | 47 |
##' |
48 | 48 |
##' @param x A \linkS4class{SummarizedExperiment} object. |
49 | 49 |
##' @param i A string or integer value specifying which assay values to use. |
50 |
-##' @param rtime_var A string specifying the names of variable containing a |
|
50 |
+##' @param rtime_var A string specifying the name of variable containing a |
|
51 | 51 |
##' numeric vector of retention times in `rowData(x)`. |
52 | 52 |
##' @param rt_cut A numeric value specifying a cut-off for the retention-time |
53 | 53 |
##' based feature grouping. |
54 | 54 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,138 @@ |
1 |
+##' Sample comparison |
|
2 |
+##' |
|
3 |
+##' Function to make a comparisons between two groups in study samples with a |
|
4 |
+##' \linkS4class{SummarizedExperiment}. |
|
5 |
+##' |
|
6 |
+##' This function provides a simplified interface of fitting a linear model to |
|
7 |
+##' make a comparison of interest using the [limma::lmFit], [limma::eBayes], and |
|
8 |
+##' [limma::topTable] functions. For more flexible model specification (e.g., |
|
9 |
+##' interaction model, multi-level model), please use a standard workflow |
|
10 |
+##' outlined in the `limma` package manual. |
|
11 |
+##' |
|
12 |
+##' @param x A \linkS4class{SummarizedExperiment} object. |
|
13 |
+##' @param i A string or integer value specifying which assay values to use. The |
|
14 |
+##' assay is expected to contain log-transformed intensities. |
|
15 |
+##' @param group A string specifying the name of variable containing a class |
|
16 |
+##' label of each sample in `colData(x)`. |
|
17 |
+##' @param class1,class2 A string specifying the class label of samples to be |
|
18 |
+##' compared. Must be one of `group` levels. No need to be specified if |
|
19 |
+##' `group` has only two levels. This function evaluates the contrast: class2 |
|
20 |
+##' - class1. |
|
21 |
+##' @param covariates A vector indicating the names of variables to be included |
|
22 |
+##' in the model as covariates. The covariates must be found in `colData(x)`. |
|
23 |
+##' @param confint A logical specifying whether 95% confidence intervals of |
|
24 |
+##' log-fold-change need to be reported. Alternatively, a numeric value |
|
25 |
+##' between zero and one specifying the confidence level required. |
|
26 |
+##' @param number The maximum number of metabolic features to list. |
|
27 |
+##' @param adjust.method A string specifying which p-value adjustment method to |
|
28 |
+##' use. Options, in increasing conservatism, include "none", "BH", "BY" and |
|
29 |
+##' "holm". See [p.adjust] for the complete list of options. A NULL value will |
|
30 |
+##' result in the default adjustment method, which is "BH". |
|
31 |
+##' @param sort.by A string specifying which statistic to rank the metabolic |
|
32 |
+##' features by. Possible values for topTable are "logFC", "AveExpr", "t", |
|
33 |
+##' "P", "p", "B" or "none" (Permitted synonyms are "M" for "logFC", "A" or |
|
34 |
+##' "Amean" for "AveExpr", "T" for "t" and "p" for "P"). |
|
35 |
+##' @param resort.by A string specifying statistic to sort the selected |
|
36 |
+##' metabolic features by in the output. Possibilities are the same as for |
|
37 |
+##' sort.by. |
|
38 |
+##' @param p.value A numeric value specifying a cut-off for adjusted p-values. |
|
39 |
+##' Only metabolic features with lower p-values are listed. |
|
40 |
+##' @param fc A numeric value specifying a minimum fold-change to be required. |
|
41 |
+##' If specified, the function output only includes metabolic features with |
|
42 |
+##' absolute fold-change greater than `fc`. |
|
43 |
+##' @param lfc A numeric value specifying a minimum log2-fold-change required, |
|
44 |
+##' equal to log2(fc). `fc` and `lfc` are alternative ways to specify a |
|
45 |
+##' fold-change cut-off and, if both are specified, then `fc` take precedence. |
|
46 |
+##' @param ... Additional arguments passed to [limma::eBayes]. |
|
47 |
+##' @return A data.frame with a row for the metabolic features and the following |
|
48 |
+##' columns: |
|
49 |
+##' * logFC: an estimate of log-fold-change corresponding to the contrast tested |
|
50 |
+##' * CI.L: a left limit of confidence interval for `logFC` (if `confint` is |
|
51 |
+##' enabled) |
|
52 |
+##' * CI.R: a right limit of confidence interval for `logFC` (if `confint` is |
|
53 |
+##' enabled) |
|
54 |
+##' * AveExpr: an average log-expression/abundance of metabolic features |
|
55 |
+##' * t: a moderated t-statistic |
|
56 |
+##' * P.Value: a raw p-value |
|
57 |
+##' * adj.P.Value: an adjusted p-value |
|
58 |
+##' * B: a log-odds that the metabolic feature is differentially expressed |
|
59 |
+##' |
|
60 |
+##' @references |
|
61 |
+##' |
|
62 |
+##' Ritchie ME, Phipson B, Wu D, Hu Y, Law CW, Shi W, Smyth GK. limma powers |
|
63 |
+##' differential expression analyses for RNA-sequencing and microarray studies. |
|
64 |
+##' Nucleic Acids Res. 2015 Apr 20;43(7):e47. doi: 10.1093/nar/gkv007. Epub 2015 |
|
65 |
+##' Jan 20. PMID: 25605792; PMCID: PMC4402510. |
|
66 |
+##' |
|
67 |
+##' @seealso |
|
68 |
+##' |
|
69 |
+##' See [limma::lmFit], [limma::eBayes], and [limma::topTable] for underlying |
|
70 |
+##' functions that do work. |
|
71 |
+##' |
|
72 |
+##' @examples |
|
73 |
+##' |
|
74 |
+##' data(faahko_se) |
|
75 |
+##' |
|
76 |
+##' compareSamples(faahko_se, i = "knn_vsn", group = "sample_group", number = 5) |
|
77 |
+##' |
|
78 |
+##' @export |
|
79 |
+compareSamples <- function(x, i, group, |
|
80 |
+ class1, class2, covariates = NULL, confint = TRUE, |
|
81 |
+ number = nrow(x), adjust.method = "BH", |
|
82 |
+ sort.by = "B", resort.by = NULL, |
|
83 |
+ p.value = 1, fc = NULL, lfc = NULL, ...) { |
|
84 |
+ if (!is(x, "SummarizedExperiment")) { |
|
85 |
+ stop("`x` must be a SummarizedExperiment object.") |
|
86 |
+ } |
|
87 |
+ ## Meta data describing the study samples |
|
88 |
+ cdat <- colData(x) |
|
89 |
+ ## Assert `group` is a column of colData(x) |
|
90 |
+ if (!any(colnames(colData(x)) == group)) { |
|
91 |
+ stop("Variable `", group, "` is not found in `colData(x)`." ) |
|
92 |
+ } |
|
93 |
+ ## Convert `group` into a factor variable |
|
94 |
+ if (!is.factor(cdat[[group]])) { |
|
95 |
+ cdat[[group]] <- as.factor(cdat[[group]]) |
|
96 |
+ } |
|
97 |
+ ## Two-level group does not have to specify class levels; instead, infer a |
|
98 |
+ ## contrast to test. |
|
99 |
+ if (any(missing(class1), missing(class2))) { |
|
100 |
+ if (length(levels(cdat[[group]])) > 2) { |
|
101 |
+ stop("`", group, "` has more than two levels.", |
|
102 |
+ " Please specify class1 and class2 arguments.") |
|
103 |
+ } else if (missing(class1) && !missing(class2)) { |
|
104 |
+ class1 <- setdiff(levels(cdat[[group]]), class2) |
|
105 |
+ } else if (!missing(class1) && missing(class2)) { |
|
106 |
+ class2 <- setdiff(levels(cdat[[group]]), class1) |
|
107 |
+ } else { |
|
108 |
+ class1 <- levels(cdat[[group]])[1] |
|
109 |
+ class2 <- levels(cdat[[group]])[2] |
|
110 |
+ } |
|
111 |
+ } |
|
112 |
+ if (!all(c(class1, class2) %in% levels(cdat[[group]]))) { |
|
113 |
+ stop(setdiff(c(class1, class2), levels(cdat[[group]])), |
|
114 |
+ " is not the levels of '", group, "'") |
|
115 |
+ } |
|
116 |
+ message(paste0("Contrast: ", class2, " - ", class1)) |
|
117 |
+ ## Check covariates are found in cdat |
|
118 |
+ if (!all(covariates %in% colnames(cdat))) { |
|
119 |
+ stop("`colData(x)` does not have a column(s): ", |
|
120 |
+ paste(setdiff(covariates, colnames(cdat)), collapse = ", ")) |
|
121 |
+ } |
|
122 |
+ ## Model formula |
|
123 |
+ fm <- as.formula(paste(c("~ 0", group, covariates), collapse = " + ")) |
|
124 |
+ ## Design matrix (group mean specification) |
|
125 |
+ dm <- model.matrix(fm, data = cdat) |
|
126 |
+ colnames(dm) <- sub(group, "", colnames(dm)) |
|
127 |
+ contrast <- limma::makeContrasts(contrasts = paste(class2, class1, sep = " - "), levels = dm) |
|
128 |
+ ## Fit model |
|
129 |
+ fit <- limma::lmFit(assay(x, i), design = dm) |
|
130 |
+ fit <- limma::contrasts.fit(fit, contrasts = contrast) |
|
131 |
+ fit <- limma::eBayes(fit, ...) |
|
132 |
+ ## Output |
|
133 |
+ limma::topTable( |
|
134 |
+ fit, number = number, adjust.method = adjust.method, |
|
135 |
+ sort.by = sort.by, resort.by = resort.by, p.value = p.value, |
|
136 |
+ fc = fc, lfc = lfc, confint = confint |
|
137 |
+ ) |
|
138 |
+} |
... | ... |
@@ -24,7 +24,7 @@ clusterFeatures( |
24 | 24 |
|
25 | 25 |
\item{i}{A string or integer value specifying which assay values to use.} |
26 | 26 |
|
27 |
-\item{rtime_var}{A string specifying the names of variable containing a |
|
27 |
+\item{rtime_var}{A string specifying the name of variable containing a |
|
28 | 28 |
numeric vector of retention times in \code{rowData(x)}.} |
29 | 29 |
|
30 | 30 |
\item{rt_cut}{A numeric value specifying a cut-off for the retention-time |
31 | 31 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,120 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/compareSamples.R |
|
3 |
+\name{compareSamples} |
|
4 |
+\alias{compareSamples} |
|
5 |
+\title{Sample comparison} |
|
6 |
+\usage{ |
|
7 |
+compareSamples( |
|
8 |
+ x, |
|
9 |
+ i, |
|
10 |
+ group, |
|
11 |
+ class1, |
|
12 |
+ class2, |
|
13 |
+ covariates = NULL, |
|
14 |
+ confint = FALSE, |
|
15 |
+ number = nrow(x), |
|
16 |
+ adjust.method = "BH", |
|
17 |
+ sort.by = "B", |
|
18 |
+ resort.by = NULL, |
|
19 |
+ p.value = 1, |
|
20 |
+ fc = NULL, |
|
21 |
+ lfc = NULL, |
|
22 |
+ ... |
|
23 |
+) |
|
24 |
+} |
|
25 |
+\arguments{ |
|
26 |
+\item{x}{A \linkS4class{SummarizedExperiment} object.} |
|
27 |
+ |
|
28 |
+\item{i}{A string or integer value specifying which assay values to use. The |
|
29 |
+assay is expected to contain log-transformed intensities.} |
|
30 |
+ |
|
31 |
+\item{group}{A string specifying the name of variable containing a class |
|
32 |
+label of each sample in \code{colData(x)}.} |
|
33 |
+ |
|
34 |
+\item{class1, class2}{A string specifying the class label of samples to be |
|
35 |
+compared. Must be one of \code{group} levels. No need to be specified if |
|
36 |
+\code{group} has only two levels. This function evaluates the contrast: class2 |
|
37 |
+\itemize{ |
|
38 |
+\item class1. |
|
39 |
+}} |
|
40 |
+ |
|
41 |
+\item{covariates}{A vector indicating the names of variables to be included |
|
42 |
+in the model as covariates. The covariates must be found in \code{colData(x)}.} |
|
43 |
+ |
|
44 |
+\item{confint}{A logical specifying whether 95\% confidence intervals of |
|
45 |
+log-fold-change need to be reported. Alternatively, a numeric value |
|
46 |
+between zero and one specifying the confidence level required.} |
|
47 |
+ |
|
48 |
+\item{number}{The maximum number of metabolic features to list.} |
|
49 |
+ |
|
50 |
+\item{adjust.method}{A string specifying which p-value adjustment method to |
|
51 |
+use. Options, in increasing conservatism, include "none", "BH", "BY" and |
|
52 |
+"holm". See \link{p.adjust} for the complete list of options. A NULL value will |
|
53 |
+result in the default adjustment method, which is "BH".} |
|
54 |
+ |
|
55 |
+\item{sort.by}{A string specifying which statistic to rank the metabolic |
|
56 |
+features by. Possible values for topTable are "logFC", "AveExpr", "t", |
|
57 |
+"P", "p", "B" or "none" (Permitted synonyms are "M" for "logFC", "A" or |
|
58 |
+"Amean" for "AveExpr", "T" for "t" and "p" for "P").} |
|
59 |
+ |
|
60 |
+\item{resort.by}{A string specifying statistic to sort the selected |
|
61 |
+metabolic features by in the output. Possibilities are the same as for |
|
62 |
+sort.by.} |
|
63 |
+ |
|
64 |
+\item{p.value}{A numeric value specifying a cut-off for adjusted p-values. |
|
65 |
+Only metabolic features with lower p-values are listed.} |
|
66 |
+ |
|
67 |
+\item{fc}{A numeric value specifying a minimum fold-change to be required. |
|
68 |
+If specified, the function output only includes metabolic features with |
|
69 |
+absolute fold-change greater than \code{fc}.} |
|
70 |
+ |
|
71 |
+\item{lfc}{A numeric value specifying a minimum log2-fold-change required, |
|
72 |
+equal to log2(fc). \code{fc} and \code{lfc} are alternative ways to specify a |
|
73 |
+fold-change cut-off and, if both are specified, then \code{fc} take precedence.} |
|
74 |
+ |
|
75 |
+\item{...}{Additional arguments passed to \link[limma:ebayes]{limma::eBayes}.} |
|
76 |
+} |
|
77 |
+\value{ |
|
78 |
+A data.frame with a row for the metabolic features and the following |
|
79 |
+columns: |
|
80 |
+\itemize{ |
|
81 |
+\item logFC: an estimate of log-fold-change corresponding to the contrast tested |
|
82 |
+\item CI.L: a left limit of confidence interval for \code{logFC} (if \code{confint} is |
|
83 |
+enabled) |
|
84 |
+\item CI.R: a right limit of confidence interval for \code{logFC} (if \code{confint} is |
|
85 |
+enabled) |
|
86 |
+\item AveExpr: an average log-expression/abundance of metabolic features |
|
87 |
+\item t: a moderated t-statistic |
|
88 |
+\item P.Value: a raw p-value |
|
89 |
+\item adj.P.Value: an adjusted p-value |
|
90 |
+\item B: a log-odds that the metabolic feature is differentially expressed |
|
91 |
+} |
|
92 |
+} |
|
93 |
+\description{ |
|
94 |
+Function to make a comparisons between two groups in study samples with a |
|
95 |
+\linkS4class{SummarizedExperiment}. |
|
96 |
+} |
|
97 |
+\details{ |
|
98 |
+This function provides a simplified interface of fitting a linear model to |
|
99 |
+make a comparison of interest using the \link[limma:lmFit]{limma::lmFit}, \link[limma:ebayes]{limma::eBayes}, and |
|
100 |
+\link[limma:toptable]{limma::topTable} functions. For more flexible model specification (e.g., |
|
101 |
+interaction model, multi-level model), please use a standard workflow |
|
102 |
+outlined in the \code{limma} package manual. |
|
103 |
+} |
|
104 |
+\examples{ |
|
105 |
+ |
|
106 |
+data(faahko_se) |
|
107 |
+ |
|
108 |
+compareSamples(faahko_se, i = "knn_vsn", group = "sample_group", number = 5) |
|
109 |
+ |
|
110 |
+} |
|
111 |
+\references{ |
|
112 |
+Ritchie ME, Phipson B, Wu D, Hu Y, Law CW, Shi W, Smyth GK. limma powers |
|
113 |
+differential expression analyses for RNA-sequencing and microarray studies. |
|
114 |
+Nucleic Acids Res. 2015 Apr 20;43(7):e47. doi: 10.1093/nar/gkv007. Epub 2015 |
|
115 |
+Jan 20. PMID: 25605792; PMCID: PMC4402510. |
|
116 |
+} |
|
117 |
+\seealso{ |
|
118 |
+See \link[limma:lmFit]{limma::lmFit}, \link[limma:ebayes]{limma::eBayes}, and \link[limma:toptable]{limma::topTable} for underlying |
|
119 |
+functions that do work. |
|
120 |
+} |