- New function fitFDistUnequalDF1(), which improves on fitFDist() and
fitFDistRobustly(), with special attention to contexts where the
df1 argument is unequal between cases and can include small
fractional values, such as those resulting from the edgeR 4.0
quasi-likelihood pipeline.
- New argument `legacy` for squeezeVar(). If `legacy=FALSE`, then it
uses fitFDistUnequalDF1() instead of fitFDist() or
fitFDistRobustly(). If `legacy=NULL` then fitFDistUnequalDF1() will
be used when the degrees of freedom `df` are not all equal.
- New argument `legacy` for eBayes() and treat(). If `legacy=FALSE`,
then the new hyperparameter estimation implemented via
fitFDistUnequalDF1() will be used, otherwise the legacy method will
be used. If `legacy=NULL` then the new method will be used when the
residual degrees of freedom are not all equal.
- Improved checking for the class of the input in backgroundCorrect.R.
... | ... |
@@ -1,9 +1,9 @@ |
1 | 1 |
Package: limma |
2 |
-Version: 3.61.7 |
|
3 |
-Date: 2024-07-30 |
|
2 |
+Version: 3.61.8 |
|
3 |
+Date: 2024-08-02 |
|
4 | 4 |
Title: Linear Models for Microarray Data |
5 | 5 |
Description: Data analysis, linear models and differential expression for microarray and RNA-seq data. |
6 |
-Author: Gordon Smyth [cre,aut], Yifang Hu [ctb], Matthew Ritchie [ctb], Jeremy Silver [ctb], James Wettenhall [ctb], Davis McCarthy [ctb], Di Wu [ctb], Wei Shi [ctb], Belinda Phipson [ctb], Aaron Lun [ctb], Natalie Thorne [ctb], Alicia Oshlack [ctb], Carolyn de Graaf [ctb], Yunshun Chen [ctb], Goknur Giner [ctb], Mette Langaas [ctb], Egil Ferkingstad [ctb], Marcus Davy [ctb], Francois Pepin [ctb], Dongseok Choi [ctb], Charity Law [ctb], Mengbo Li [ctb] |
|
6 |
+Author: Gordon Smyth [cre,aut], Yifang Hu [ctb], Matthew Ritchie [ctb], Jeremy Silver [ctb], James Wettenhall [ctb], Davis McCarthy [ctb], Di Wu [ctb], Wei Shi [ctb], Belinda Phipson [ctb], Aaron Lun [ctb], Natalie Thorne [ctb], Alicia Oshlack [ctb], Carolyn de Graaf [ctb], Yunshun Chen [ctb], Goknur Giner [ctb], Mette Langaas [ctb], Egil Ferkingstad [ctb], Marcus Davy [ctb], Francois Pepin [ctb], Dongseok Choi [ctb], Charity Law [ctb], Mengbo Li [ctb], Lizhong Chen [ctb] |
|
7 | 7 |
Maintainer: Gordon Smyth <[email protected]> |
8 | 8 |
License: GPL (>=2) |
9 | 9 |
Depends: R (>= 3.6.0) |
... | ... |
@@ -15,13 +15,13 @@ importFrom("stats", "approx", "approxfun", "as.dendrogram", "as.dist", |
15 | 15 |
"dist", "density", "df", "dhyper", "dnorm", "filter", "fitted", |
16 | 16 |
"hat", "hclust", "integrate", "isoreg", "lm", "lm.fit", "lm.wfit", "loess", |
17 | 17 |
"lowess", "median", "model.matrix", "na.exclude", "na.omit", |
18 |
- "nlminb", "nls", "nls.control", "optim", "p.adjust", |
|
18 |
+ "nlminb", "nls", "nls.control", "optim", "optimize", "p.adjust", |
|
19 | 19 |
"pbeta", "pchisq", "pexp", "pf", "pgamma", "phyper", |
20 | 20 |
"pnorm", "ppoints", "predict", "pt", "qchisq", "qf", |
21 | 21 |
"qnorm", "qt", "quantile", "residuals", "rnorm", "sd", |
22 | 22 |
"uniroot", "var", "weighted.mean") |
23 | 23 |
importFrom("utils", "getFromNamespace", "head", "read.delim", "read.table", "tail", "write.table") |
24 |
-importFrom("statmod", "logmdigamma", "gauss.quad.prob", "mixedModel2Fit", "remlscore") |
|
24 |
+importFrom("statmod", "gauss.quad.prob", "logmdigamma", "mixedModel2Fit", "remlscore") |
|
25 | 25 |
if( tools:::.OStype() == "windows" ) importFrom("utils", "winMenuAddItem") |
26 | 26 |
|
27 | 27 |
S3method("[",MAList) |
... | ... |
@@ -6,18 +6,21 @@ backgroundCorrect <- function(RG, method="auto", offset=0, printer=RG$printer, n |
6 | 6 |
{ |
7 | 7 |
# Apply background correction to microarray data |
8 | 8 |
# Gordon Smyth |
9 |
-# 12 April 2003. Last modified 30 August 2010. |
|
9 |
+# 12 April 2003. Last modified 1 August 2024. |
|
10 | 10 |
|
11 | 11 |
if(is.null(method)) method <- "auto" |
12 | 12 |
|
13 |
+# Single-channel background correction |
|
13 | 14 |
if(is(RG,"EListRaw")) { |
14 | 15 |
RG$E <- backgroundCorrect.matrix(RG$E,RG$Eb,method=method,offset=offset,normexp.method=normexp.method,verbose=verbose) |
15 | 16 |
RG$Eb <- NULL |
16 | 17 |
return(RG) |
17 | 18 |
} |
18 | 19 |
|
19 |
- if(class(RG)=="list" && !is.null(RG$R)) RG <- new("RGList",RG) |
|
20 |
+# If RG is an unclassed list with a red channel, then convert to RGList |
|
21 |
+ if(!isS4(RG) && is.list(RG) && !is.null(RG$R)) RG <- new("RGList",RG) |
|
20 | 22 |
|
23 |
+# Two-color background correction |
|
21 | 24 |
if(is(RG,"RGList")) { |
22 | 25 |
RG$R <- backgroundCorrect.matrix(RG$R,RG$Rb,method=method,offset=offset,normexp.method=normexp.method,verbose=verbose) |
23 | 26 |
RG$Rb <- NULL |
... | ... |
@@ -26,6 +29,7 @@ backgroundCorrect <- function(RG, method="auto", offset=0, printer=RG$printer, n |
26 | 29 |
return(RG) |
27 | 30 |
} |
28 | 31 |
|
32 |
+# Failing all other classes, coerce RG to a matrix |
|
29 | 33 |
RG <- as.matrix(RG) |
30 | 34 |
if(!(method %in% c("none","normexp"))) stop(method,"correction requires background intensities") |
31 | 35 |
RG <- backgroundCorrect.matrix(RG,method=method,offset=offset,normexp.method=normexp.method,verbose=verbose) |
... | ... |
@@ -1,14 +1,14 @@ |
1 | 1 |
# EMPIRICAL BAYES FUNCTIONS |
2 | 2 |
|
3 |
-eBayes <- function(fit,proportion=0.01,stdev.coef.lim=c(0.1,4),trend=FALSE,robust=FALSE,winsor.tail.p=c(0.05,0.1)) |
|
3 |
+eBayes <- function(fit,proportion=0.01,stdev.coef.lim=c(0.1,4),trend=FALSE,robust=FALSE,winsor.tail.p=c(0.05,0.1),legacy=NULL) |
|
4 | 4 |
# Empirical Bayes statistics to select differentially expressed genes. |
5 | 5 |
# Accepts and returns an MArrayLM object. |
6 | 6 |
# Gordon Smyth |
7 |
-# 4 August 2003. Last modified 29 Nov 2022. |
|
7 |
+# Created 4 Aug 2003. Last modified 2 Aug 2024. |
|
8 | 8 |
{ |
9 | 9 |
if(!is.list(fit)) stop("fit is not a valid MArrayLM object") |
10 | 10 |
if(is.logical(trend) && trend && is.null(fit$Amean)) stop("Need Amean component in fit to estimate trend") |
11 |
- eb <- .ebayes(fit=fit,proportion=proportion,stdev.coef.lim=stdev.coef.lim,trend=trend,robust=robust,winsor.tail.p=winsor.tail.p) |
|
11 |
+ eb <- .ebayes(fit=fit,proportion=proportion,stdev.coef.lim=stdev.coef.lim,trend=trend,robust=robust,winsor.tail.p=winsor.tail.p,legacy=legacy) |
|
12 | 12 |
fit$df.prior <- eb$df.prior |
13 | 13 |
fit$s2.prior <- eb$s2.prior |
14 | 14 |
fit$var.prior <- eb$var.prior |
... | ... |
@@ -28,11 +28,10 @@ eBayes <- function(fit,proportion=0.01,stdev.coef.lim=c(0.1,4),trend=FALSE,robus |
28 | 28 |
fit |
29 | 29 |
} |
30 | 30 |
|
31 |
-.ebayes <- function(fit,proportion=0.01,stdev.coef.lim=c(0.1,4),trend=FALSE,robust=FALSE,winsor.tail.p=c(0.05,0.1)) |
|
31 |
+.ebayes <- function(fit,proportion=0.01,stdev.coef.lim=c(0.1,4),trend=FALSE,robust=FALSE,winsor.tail.p=c(0.05,0.1),legacy=NULL) |
|
32 | 32 |
# Empirical Bayes statistics to select differentially expressed genes |
33 | 33 |
# Gordon Smyth |
34 |
-# Created 8 Sept 2002. Last revised 10 Feb 2022. |
|
35 |
-# Made a non-exported function 18 Feb 2018. |
|
34 |
+# Created 8 Sep 2002. Made non-exported function 18 Feb 2018. Last revised 2 Aug 2024. |
|
36 | 35 |
{ |
37 | 36 |
coefficients <- fit$coefficients |
38 | 37 |
stdev.unscaled <- fit$stdev.unscaled |
... | ... |
@@ -55,7 +54,7 @@ eBayes <- function(fit,proportion=0.01,stdev.coef.lim=c(0.1,4),trend=FALSE,robus |
55 | 54 |
} |
56 | 55 |
|
57 | 56 |
# Moderated t-statistic |
58 |
- out <- squeezeVar(sigma^2, df.residual, covariate=covariate, robust=robust, winsor.tail.p=winsor.tail.p) |
|
57 |
+ out <- squeezeVar(sigma^2, df.residual, covariate=covariate, robust=robust, winsor.tail.p=winsor.tail.p, legacy=legacy) |
|
59 | 58 |
out$s2.prior <- out$var.prior |
60 | 59 |
out$s2.post <- out$var.post |
61 | 60 |
out$var.prior <- out$var.post <- NULL |
... | ... |
@@ -1,9 +1,9 @@ |
1 | 1 |
# EMPIRICAL BAYES SQUEEZING OF VARIANCES |
2 | 2 |
|
3 |
-squeezeVar <- function(var, df, covariate=NULL, robust=FALSE, winsor.tail.p=c(0.05,0.1)) |
|
3 |
+squeezeVar <- function(var, df, covariate=NULL, robust=FALSE, winsor.tail.p=c(0.05,0.1), legacy=NULL) |
|
4 | 4 |
# Empirical Bayes posterior variances |
5 | 5 |
# Gordon Smyth |
6 |
-# Created 2 March 2004. Last modified 29 July 2024. |
|
6 |
+# Created 2 March 2004. Last modified 1 August 2024. |
|
7 | 7 |
{ |
8 | 8 |
n <- length(var) |
9 | 9 |
|
... | ... |
@@ -14,13 +14,25 @@ squeezeVar <- function(var, df, covariate=NULL, robust=FALSE, winsor.tail.p=c(0. |
14 | 14 |
# When df==0, guard against missing or infinite values in var |
15 | 15 |
if(length(df)>1L) var[df==0] <- 0 |
16 | 16 |
|
17 |
+# Choose legacy or new method depending whether df are unequal |
|
18 |
+ if(is.null(legacy)) { |
|
19 |
+ dfp <- df[df>0] |
|
20 |
+ legacy <- identical(min(dfp),max(dfp)) |
|
21 |
+ } |
|
22 |
+ |
|
17 | 23 |
# Estimate hyperparameters |
18 |
- if(robust) { |
|
19 |
- fit <- fitFDistRobustly(var, df1=df, covariate=covariate, winsor.tail.p=winsor.tail.p) |
|
20 |
- df.prior <- fit$df2.shrunk |
|
24 |
+ if(legacy) { |
|
25 |
+ if(robust) { |
|
26 |
+ fit <- fitFDistRobustly(var, df1=df, covariate=covariate, winsor.tail.p=winsor.tail.p) |
|
27 |
+ df.prior <- fit$df2.shrunk |
|
28 |
+ } else { |
|
29 |
+ fit <- fitFDist(var, df1=df, covariate=covariate) |
|
30 |
+ df.prior <- fit$df2 |
|
31 |
+ } |
|
21 | 32 |
} else { |
22 |
- fit <- fitFDist(var, df1=df, covariate=covariate) |
|
23 |
- df.prior <- fit$df2 |
|
33 |
+ fit <- fitFDistUnequalDF1(var, df1=df, covariate=covariate, robust=robust) |
|
34 |
+ df.prior <- fit$df.shrunk |
|
35 |
+ if(is.null(df.prior)) df.prior <- fit$df2 |
|
24 | 36 |
} |
25 | 37 |
if(anyNA(df.prior)) stop("Could not estimate prior df") |
26 | 38 |
|
... | ... |
@@ -34,28 +46,29 @@ squeezeVar <- function(var, df, covariate=NULL, robust=FALSE, winsor.tail.p=c(0. |
34 | 46 |
# Squeeze posterior variances given hyperparameters |
35 | 47 |
# NAs not allowed in df.prior |
36 | 48 |
# Gordon Smyth |
37 |
-# Created 5 May 2016. Last modified 29 July 2024. |
|
49 |
+# Created 5 May 2016. Last modified 1 August 2024. |
|
38 | 50 |
{ |
39 |
- n <- length(var) |
|
40 |
- isfin <- is.finite(df.prior) |
|
41 |
- if(all(isfin)) return( (df*var + df.prior*var.prior) / (df+df.prior) ) |
|
42 |
- |
|
43 |
-# From here, at least some df.prior are infinite |
|
51 |
+# If df.prior are all finite, use canonical formula |
|
52 |
+ m <- max(df.prior) |
|
53 |
+ if(is.finite(m)) return( (df*var + df.prior*var.prior) / (df+df.prior) ) |
|
44 | 54 |
|
45 |
-# For infinite df.prior, return var.prior |
|
55 |
+# Set var.post to var.prior of length n |
|
56 |
+ n <- length(var) |
|
46 | 57 |
if(identical(length(var.prior),n)) { |
47 | 58 |
var.post <- var.prior |
48 | 59 |
} else { |
49 | 60 |
var.post <- rep_len(var.prior, length.out=n) |
50 | 61 |
} |
51 | 62 |
|
52 |
-# Maybe some df.prior are finite |
|
53 |
- if(any(isfin)) { |
|
54 |
- i <- which(isfin) |
|
55 |
- if(length(df)>1) df <- df[i] |
|
56 |
- df.prior <- df.prior[i] |
|
57 |
- var.post[i] <- (df*var[i] + df.prior*var.post[i]) / (df+df.prior) |
|
58 |
- } |
|
63 |
+# Check if df.prior all Inf |
|
64 |
+ m <- min(df.prior) |
|
65 |
+ if(m > 1e100) return(var.post) |
|
66 |
+ |
|
67 |
+# Only some df.prior are finite |
|
68 |
+ i <- which(is.finite(df.prior)) |
|
69 |
+ if(length(df)>1L) df <- df[i] |
|
70 |
+ df.prior <- df.prior[i] |
|
71 |
+ var.post[i] <- (df*var[i] + df.prior*var.post[i]) / (df+df.prior) |
|
59 | 72 |
|
60 | 73 |
var.post |
61 | 74 |
} |
... | ... |
@@ -1,9 +1,9 @@ |
1 | 1 |
### treat.R |
2 | 2 |
|
3 |
-treat <- function(fit, fc=1.2, lfc=NULL, trend=FALSE, robust=FALSE, winsor.tail.p=c(0.05,0.1)) |
|
3 |
+treat <- function(fit, fc=1.2, lfc=NULL, trend=FALSE, robust=FALSE, winsor.tail.p=c(0.05,0.1), legacy=NULL) |
|
4 | 4 |
# Moderated t-statistics relative to a logFC threshold. |
5 | 5 |
# Davis McCarthy, Gordon Smyth |
6 |
-# 25 July 2008. Last revised 6 May 2023. |
|
6 |
+# 25 July 2008. Last revised 2 Aug 2024. |
|
7 | 7 |
{ |
8 | 8 |
# Check fit |
9 | 9 |
if(!is(fit,"MArrayLM")) stop("fit must be an MArrayLM object") |
... | ... |
@@ -28,7 +28,7 @@ treat <- function(fit, fc=1.2, lfc=NULL, trend=FALSE, robust=FALSE, winsor.tail. |
28 | 28 |
} else { |
29 | 29 |
covariate <- NULL |
30 | 30 |
} |
31 |
- sv <- squeezeVar(sigma^2, df.residual, covariate=covariate, robust=robust, winsor.tail.p=winsor.tail.p) |
|
31 |
+ sv <- squeezeVar(sigma^2, df.residual, covariate=covariate, robust=robust, winsor.tail.p=winsor.tail.p, legacy=legacy) |
|
32 | 32 |
fit$df.prior <- sv$df.prior |
33 | 33 |
fit$s2.prior <- sv$var.prior |
34 | 34 |
fit$s2.post <- sv$var.post |
... | ... |
@@ -3,6 +3,24 @@ |
3 | 3 |
\encoding{UTF-8} |
4 | 4 |
|
5 | 5 |
|
6 |
+\section{Version 3.61.8}{\itemize{ |
|
7 |
+ |
|
8 |
+ \item |
|
9 |
+ New function fitFDistUnequalDF1() for improved empirical Bayes |
|
10 |
+ hyperparameter estimation when the residual degrees of freedom are |
|
11 |
+ not all equal between genes. The new hyperpameter estimation will be |
|
12 |
+ used by eBayes(), treat() and squeezeVar() by default whenever the |
|
13 |
+ residual degrees of freedom are unequal, but the legacy behavior |
|
14 |
+ can be preserved for backward compatibility by setting |
|
15 |
+ `legacy=TRUE`. |
|
16 |
+ |
|
17 |
+ \item |
|
18 |
+ New argument `keep.EList` for voomaLmFit(). The observation weights |
|
19 |
+ generated by voomaLmFit() are now stored only if `keep.EList=TRUE`. |
|
20 |
+ |
|
21 |
+}} |
|
22 |
+ |
|
23 |
+ |
|
6 | 24 |
\section{Version 3.60.0}{\itemize{ |
7 | 25 |
|
8 | 26 |
\item |
... | ... |
@@ -1,3 +1,24 @@ |
1 |
+ 1 Aug 2024: limma 3.61.8 |
|
2 |
+ |
|
3 |
+- New function fitFDistUnequalDF1(), which improves on fitFDist() and |
|
4 |
+ fitFDistRobustly(), with special attention to contexts where the |
|
5 |
+ df1 argument is unequal between cases and can include small |
|
6 |
+ fractional values, such as those resulting from the edgeR 4.0 |
|
7 |
+ quasi-likelihood pipeline. |
|
8 |
+ |
|
9 |
+- New argument `legacy` for squeezeVar(). If `legacy=FALSE`, then it |
|
10 |
+ uses fitFDistUnequalDF1() instead of fitFDist() or |
|
11 |
+ fitFDistRobustly(). If `legacy=NULL` then fitFDistUnequalDF1() will |
|
12 |
+ be used when the degrees of freedom `df` are not all equal. |
|
13 |
+ |
|
14 |
+- New argument `legacy` for eBayes() and treat(). If `legacy=FALSE`, |
|
15 |
+ then the new hyperparameter estimation implemented via |
|
16 |
+ fitFDistUnequalDF1() will be used, otherwise the legacy method will |
|
17 |
+ be used. If `legacy=NULL` then the new method will be used when the |
|
18 |
+ residual degrees of freedom are not all equal. |
|
19 |
+ |
|
20 |
+- Improved checking for the class of the input in backgroundCorrect.R. |
|
21 |
+ |
|
1 | 22 |
30 Jul 2024: limma 3.61.7 |
2 | 23 |
|
3 | 24 |
- Clean up some code in fitFDistRobustly(). |
... | ... |
@@ -58,7 +58,7 @@ After fitting a linear model, the standard errors are moderated using a simple e |
58 | 58 |
A moderated t-statistic and a log-odds of differential expression is computed for each contrast for each gene. |
59 | 59 |
\code{treat} tests whether log-fold-changes are greater than a threshold rather than merely different to zero. |
60 | 60 |
|
61 |
-\code{\link{eBayes}} and \code{\link{treat}} use internal functions \code{\link{squeezeVar}}, \code{\link{fitFDist}}, \code{\link{tmixture.matrix}} and \code{\link{tmixture.vector}}. |
|
61 |
+\code{\link{eBayes}} and \code{\link{treat}} use internal functions \code{\link{squeezeVar}}, \code{\link{fitFDist}}, \code{\link{fitFDistRobustly}}, \code{\link{fitFDistUnequalDF1}}, \code{\link{tmixture.matrix}} and \code{\link{tmixture.vector}}. |
|
62 | 62 |
} |
63 | 63 |
|
64 | 64 |
\section{Summarizing Model Fits}{ |
... | ... |
@@ -104,7 +104,7 @@ Smyth, G. K. (2004). Linear models and empirical Bayes methods for assessing dif |
104 | 104 |
\emph{Statistical Applications in Genetics and Molecular Biology}, \bold{3}, No. 1, Article 3. |
105 | 105 |
\url{https://gksmyth.github.io/pubs/ebayes.pdf} |
106 | 106 |
|
107 |
-Smyth, G. K., Michaud, J., and Scott, H. (2005). The use of within-array replicate spots for assessing differential expression in microarray experiments. Bioinformatics 21(9), 2067-2075. |
|
107 |
+Smyth, G. K., Michaud, J., and Scott, H. (2005). The use of within-array replicate spots for assessing differential expression in microarray experiments. \emph{Bioinformatics} 21(9), 2067-2075. |
|
108 | 108 |
} |
109 | 109 |
|
110 | 110 |
\seealso{ |
... | ... |
@@ -7,9 +7,9 @@ |
7 | 7 |
|
8 | 8 |
\usage{ |
9 | 9 |
eBayes(fit, proportion = 0.01, stdev.coef.lim = c(0.1,4), |
10 |
- trend = FALSE, robust = FALSE, winsor.tail.p = c(0.05,0.1)) |
|
10 |
+ trend = FALSE, robust = FALSE, winsor.tail.p = c(0.05,0.1), legacy = NULL) |
|
11 | 11 |
treat(fit, fc = 1.2, lfc = NULL, trend = FALSE, |
12 |
- robust = FALSE, winsor.tail.p = c(0.05,0.1)) |
|
12 |
+ robust = FALSE, winsor.tail.p = c(0.05,0.1), legacy = NULL) |
|
13 | 13 |
} |
14 | 14 |
|
15 | 15 |
\arguments{ |
... | ... |
@@ -19,7 +19,8 @@ treat(fit, fc = 1.2, lfc = NULL, trend = FALSE, |
19 | 19 |
\item{stdev.coef.lim}{numeric vector of length 2, assumed lower and upper limits for the standard deviation of log2-fold-changes for differentially expressed genes} |
20 | 20 |
\item{trend}{logical, should an intensity-dependent trend be allowed for the prior variance? If \code{FALSE} then the prior variance is constant. Alternatively, \code{trend} can be a row-wise numeric vector, which will be used as the covariate for the prior variance.} |
21 | 21 |
\item{robust}{logical, should the estimation of \code{df.prior} and \code{var.prior} be robustified against outlier sample variances?} |
22 |
- \item{winsor.tail.p}{numeric vector of length 1 or 2, giving left and right tail proportions of \code{x} to Winsorize. Used only when \code{robust=TRUE}.} |
|
22 |
+ \item{winsor.tail.p}{numeric vector of length 1 or 2, giving left and right tail proportions of \code{x} to Winsorize (if \code{robust=TRUE} and \code{legacy=TRUE}).} |
|
23 |
+ \item{legacy}{logical. If \code{FALSE} then the new hyperparameter estimation (introduced in limma 3.61.8) will be used, otherwise the legacy methods will be used. If \code{NULL}, then the new method will be used whenever the residual degrees of freedom are not all equal.} |
|
23 | 24 |
\item{fc}{a minimum fold-change below which changes are not considered scientifically meaningful.} |
24 | 25 |
\item{lfc}{a minimum log2-fold-change below which changes not considered scientifically meaningful. Defaults to \code{log2(fc)}.} |
25 | 26 |
} |
... | ... |
@@ -60,7 +61,7 @@ For each gene (row), the moderated F-statistic tests whether all the contrasts a |
60 | 61 |
The F-statistic is an overall test computed from the set of t-statistics for that probe. |
61 | 62 |
This is exactly analogous the relationship between t-tests and F-statistics in conventional anova, except that the residual mean squares have been moderated between genes. |
62 | 63 |
|
63 |
-The estimates \code{s2.prior} and \code{df.prior} are computed by \code{fitFDist}. |
|
64 |
+The estimates \code{s2.prior} and \code{df.prior} are computed by one of \code{fitFDist}, \code{fitFDistRobustly} or \code{fitFDistUnequalDF1} (depending on settings for \code{robust} and \code{legacy}). |
|
64 | 65 |
\code{s2.post} is the weighted average of \code{s2.prior} and \code{sigma^2} with weights proportional to \code{df.prior} and \code{df.residual} respectively. |
65 | 66 |
The log-odds of differential expression \code{lods} was called the \emph{B-statistic} by Loennstedt and Speed (2002). |
66 | 67 |
The F-statistics \code{F} are computed by \code{classifyTestsF} with \code{fstat.only=TRUE}. |
... | ... |
@@ -102,6 +103,12 @@ limma-trend is recommended for RNA-seq analysis when the library sizes are reaso |
102 | 103 |
If \code{robust=TRUE} then the robust empirical Bayes procedure of Phipson et al (2016) is used. |
103 | 104 |
This is frequently useful to protect the empirical Bayes procedure against hyper-variable or hypo-variable genes, especially when analysing RNA-seq data. |
104 | 105 |
See \code{\link{squeezeVar}} for more details. |
106 |
+ |
|
107 |
+In limma 3.61.8 (August 2024), the new function \code{\link{fitFDistUnequalDF1}} was introduced to improve estimation of the hyperparameters \code{s2.prior} and \code{df.prior}, especially when not all genes have the same residual degrees of freedom. |
|
108 |
+\code{fitFDistUnequalDF1} is a potential replacement for the original functions \code{fitFDist} and \code{fitFDistRobustly} and the argument \code{legacy} is provided to control backward compatibility. |
|
109 |
+The new hyperparameter estimation will be used if \code{legacy=FALSE} and the original methods will be used if \code{legacy=TRUE}. |
|
110 |
+If \code{legacy=NULL}, then the new method will be used if the residual degrees of freedom are unequal and the original methods otherwise. |
|
111 |
+Unequal residual degrees of freedom arise in limma pipelines when the expression matrix includes missing values or from the quasi-likelihood pipeline in edgeR v4. |
|
105 | 112 |
} |
106 | 113 |
|
107 | 114 |
\note{ |
... | ... |
@@ -1,14 +1,17 @@ |
1 | 1 |
\name{fitFDist} |
2 | 2 |
\alias{fitFDist} |
3 | 3 |
\alias{fitFDistRobustly} |
4 |
+\alias{fitFDistUnequalDF1} |
|
5 |
+ |
|
4 | 6 |
\title{Moment Estimation of Scaled F-Distribution} |
5 | 7 |
\description{ |
6 | 8 |
Moment estimation of the parameters of a scaled F-distribution given one of the degrees of freedom. |
7 |
-This function is called internally by \code{eBayes} and \code{squeezeVar} and is not usually called directly by a user. |
|
9 |
+These functions are called internally by \code{eBayes} and \code{squeezeVar} and is not usually called directly by a user. |
|
8 | 10 |
} |
9 | 11 |
\usage{ |
10 |
-fitFDist(x, df1, covariate=NULL) |
|
11 |
-fitFDistRobustly(x, df1, covariate=NULL, winsor.tail.p=c(0.05,0.1), trace=FALSE) |
|
12 |
+fitFDist(x, df1, covariate = NULL) |
|
13 |
+fitFDistRobustly(x, df1, covariate = NULL, winsor.tail.p = c(0.05,0.1), trace = FALSE) |
|
14 |
+fitFDistUnequalDF1(x, df1, covariate = NULL, robust = FALSE, prior.weights = NULL) |
|
12 | 15 |
} |
13 | 16 |
\arguments{ |
14 | 17 |
\item{x}{numeric vector or array of positive values representing a sample from a scaled F-distribution.} |
... | ... |
@@ -16,9 +19,11 @@ fitFDistRobustly(x, df1, covariate=NULL, winsor.tail.p=c(0.05,0.1), trace=FALSE) |
16 | 19 |
\item{covariate}{if non-\code{NULL}, the estimated scale value will depend on this numeric covariate.} |
17 | 20 |
\item{winsor.tail.p}{numeric vector of length 1 or 2, giving left and right tail proportions of \code{x} to Winsorize.} |
18 | 21 |
\item{trace}{logical value indicating whether a trace of the iteration progress should be printed.} |
22 |
+ \item{robust}{logical. Should outlier values of \code{x} be down-weighted with results similar to \code{fitFDistRobustly}?} |
|
23 |
+ \item{prior.weights}{numeric vector of (non-negative) prior weights.} |
|
19 | 24 |
} |
20 | 25 |
\details{ |
21 |
-\code{fitFDist} implements an algorithm proposed by Smyth (2004) and Phipson et al (2016). |
|
26 |
+\code{fitFDist()} implements an algorithm proposed by Smyth (2004) and Phipson et al (2016). |
|
22 | 27 |
It estimates \code{scale} and \code{df2} under the assumption that \code{x} is distributed as \code{scale} times an F-distributed random variable on \code{df1} and \code{df2} degrees of freedom. |
23 | 28 |
The parameters are estimated using the method of moments, specifically from the mean and variance of the \code{x} values on the log-scale. |
24 | 29 |
|
... | ... |
@@ -30,13 +35,27 @@ When \code{covariate} is supplied, a loess trend is estimated for the \code{x} v |
30 | 35 |
The robust method is described by Phipson et al (2016). |
31 | 36 |
|
32 | 37 |
As well as estimating the F-distribution for the bulk of the cases, i.e., with outliers discounted, \code{fitFDistRobustly} also returns an estimated F-distribution with reduced df2 that might be appropriate for each outlier case. |
38 |
+ |
|
39 |
+\code{fitFDistUnequalDF1} was introduced in limma 3.61.8 and gives special attention to the possibility that the degrees of freedom \code{df1} might be unequal and might include non-integer values. |
|
40 |
+The most important innovation of \code{fitFDistUnequalDF1} is downweighting of observations with lower degrees of freedom, to give more precise estimation overall. |
|
41 |
+It also allows the possibility of prior weights, which can be used to downweight unreliable \code{x} values for reasons other than small \code{df1}. |
|
42 |
+\code{fitFDistUnequalDF1} implements a different robust estimation strategy to \code{fitFDistRobustly}. |
|
43 |
+Instead of Winsorizing the \code{x} values, potential outliers are instead downweighted using the prior weights. |
|
44 |
+Whereas \code{fitFDist} and \code{fitFDistRobustly} use unweighted moment estimation for both \code{scale} and \code{df2}, |
|
45 |
+\code{fitFDistUnequalDF1} uses weighted moment estimation for \code{scale} and profile maximum likelihood for \code{df2}. |
|
46 |
+ |
|
47 |
+\code{fitFDistUnequalDF1} gives improved performance over \code{fitFDist} and \code{fitFDistRobustly}, especially when the degrees of freedom are unequal but also to a lesser extent when the degrees of freedom are equal. |
|
48 |
+Unequal residual degrees of freedom arise in limma pipelines when the expression matrix includes missing values or from the quasi-likelihood pipeline in edgeR v4 (Chen et al 2024). |
|
49 |
+The edgeR v4 pipeline produces fractional degrees of freedom including, potentially, degrees of freedom less than 1. |
|
33 | 50 |
} |
51 |
+ |
|
34 | 52 |
\note{ |
35 | 53 |
The algorithm used by \code{fitFDistRobustly} was revised slightly in limma 3.27.6. |
36 | 54 |
The \code{prob.outlier} value, which is the lower bound for \code{df2.shrunk}, may be slightly smaller than previously. |
37 | 55 |
} |
56 |
+ |
|
38 | 57 |
\value{ |
39 |
-\code{fitFDist} produces a list with the following components: |
|
58 |
+\code{fitFDist} or \code{fitFDistUnequalDF1} with \code{robust=FALSE} produces a list with the following components: |
|
40 | 59 |
\item{scale}{scale factor for F-distribution. A vector if \code{covariate} is non-\code{NULL}, otherwise a scalar.} |
41 | 60 |
\item{df2}{the second degrees of freedom of the fitted F-distribution.} |
42 | 61 |
|
... | ... |
@@ -47,19 +66,24 @@ The \code{prob.outlier} value, which is the lower bound for \code{df2.shrunk}, m |
47 | 66 |
\item{df2.shrunk}{numeric vector of values for the second degrees of freedom, with shrunk values for outliers. Most values are equal to \code{df2}, but outliers have reduced values depending on how extreme each case is. All values lie between \code{df2.outlier} and \code{df2}.} |
48 | 67 |
} |
49 | 68 |
|
50 |
-\author{Gordon Smyth and Belinda Phipson} |
|
69 |
+\author{Gordon Smyth, Belinda Phipson (\code{fitFDistRobustly}) and Lizhong Chen (\code{fitFDistUnequalDF1}).} |
|
51 | 70 |
|
52 | 71 |
\references{ |
53 |
-Smyth, G. K. (2004). |
|
72 |
+Smyth GK (2004). |
|
54 | 73 |
Linear models and empirical Bayes methods for assessing differential expression in microarray experiments. |
55 | 74 |
\emph{Statistical Applications in Genetics and Molecular Biology} Volume 3, Issue 1, Article 3. |
56 | 75 |
\doi{10.2202/1544-6115.1027} |
57 | 76 |
\url{https://gksmyth.github.io/pubs/ebayes.pdf} |
58 | 77 |
|
59 |
-Phipson, B, Lee, S, Majewski, IJ, Alexander, WS, and Smyth, GK (2016). |
|
78 |
+Phipson B, Lee S, Majewski IJ, Alexander WS, Smyth GK (2016). |
|
60 | 79 |
Robust hyperparameter estimation protects against hypervariable genes and improves power to detect differential expression. |
61 | 80 |
\emph{Annals of Applied Statistics} 10, 946-963. |
62 | 81 |
\doi{10.1214/16-AOAS920} |
82 |
+ |
|
83 |
+Chen Y, Chen L, Lun ATL, Baldoni PL, Smyth GK (2024). |
|
84 |
+edgeR 4.0: powerful differential analysis of sequencing data with expanded functionality and improved support for small counts and larger datasets. |
|
85 |
+\emph{bioRxiv} 2024.01.21.576131. |
|
86 |
+\doi{10.1101/2024.01.21.576131} |
|
63 | 87 |
} |
64 | 88 |
|
65 | 89 |
\seealso{ |
... | ... |
@@ -5,7 +5,8 @@ |
5 | 5 |
Normalize the columns of a matrix, cyclicly applying loess normalization to normalize each pair of columns to each other. |
6 | 6 |
} |
7 | 7 |
\usage{ |
8 |
-normalizeCyclicLoess(x, weights = NULL, span = 0.7, adaptive.span = FALSE, iterations = 3, method = "fast") |
|
8 |
+normalizeCyclicLoess(x, weights = NULL, span = 0.7, adaptive.span = FALSE, |
|
9 |
+ iterations = 3, method = "fast") |
|
9 | 10 |
} |
10 | 11 |
\arguments{ |
11 | 12 |
\item{x}{numeric matrix, or object which can be coerced to a numeric matrix, containing log-expression values.} |
... | ... |
@@ -47,7 +48,7 @@ A matrix of the same dimensions as \code{x} containing the normalized values. |
47 | 48 |
\references{ |
48 | 49 |
Bolstad BM, Irizarry RA, Astrand M, Speed TP (2003). |
49 | 50 |
A comparison of normalization methods for high density oligonucleotide array data based on bias and variance. |
50 |
-\emph{Bioinformatics} \bold{19}, 185-193. |
|
51 |
+\emph{Bioinformatics} 19, 185-193. |
|
51 | 52 |
|
52 | 53 |
Ballman KV, Grill DE, Oberg AL, Therneau TM (2004). |
53 | 54 |
Faster cyclic loess: normalizing RNA arrays via linear models. |
... | ... |
@@ -7,15 +7,17 @@ Squeeze a set of sample variances together by computing empirical Bayes posterio |
7 | 7 |
} |
8 | 8 |
|
9 | 9 |
\usage{ |
10 |
-squeezeVar(var, df, covariate=NULL, robust=FALSE, winsor.tail.p=c(0.05,0.1)) |
|
10 |
+squeezeVar(var, df, covariate = NULL, robust = FALSE, winsor.tail.p = c(0.05,0.1), |
|
11 |
+ legacy = NULL) |
|
11 | 12 |
} |
12 | 13 |
|
13 | 14 |
\arguments{ |
14 | 15 |
\item{var}{numeric vector of independent sample variances.} |
15 |
-\item{df}{numeric vector of degrees of freedom for the sample variances.} |
|
16 |
-\item{covariate}{if non-\code{NULL}, \code{var.prior} will depend on this numeric covariate. Otherwise, \code{var.prior} is constant.} |
|
16 |
+\item{df}{numeric vector of degrees of freedom for the sample variances. Can be a unit vector or of same length as \code{var}.} |
|
17 |
+\item{covariate}{numeric covariate of same length as \code{var} for estimating a trended prior variance. If \code{NULL}, then the prior variance \code{var.prior} is constant.} |
|
17 | 18 |
\item{robust}{logical, should the estimation of \code{df.prior} and \code{var.prior} be robustified against outlier sample variances?} |
18 |
-\item{winsor.tail.p}{numeric vector of length 1 or 2, giving left and right tail proportions of \code{x} to Winsorize. Used only when \code{robust=TRUE}.} |
|
19 |
+\item{winsor.tail.p}{numeric vector of length 1 or 2, giving left and right tail proportions of \code{x} to Winsorize when \code{robust=TRUE}.} |
|
20 |
+\item{legacy}{logical. If \code{FALSE} then the new function \code{fitFDistUnequalDF1} will be called internally, otherwise the legacy functions \code{fitFDist} or \code{fitFDistRobustly} will be used. If \code{NULL}, then \code{fitFDistUnequalDF1} will be used whenever the degrees of freedom \code{df} are not all equal.} |
|
19 | 21 |
} |
20 | 22 |
|
21 | 23 |
\details{ |
... | ... |
@@ -32,10 +34,17 @@ The scale and degrees of freedom of this prior distribution are estimated from t |
32 | 34 |
The effect of this function is to squeeze the variances towards a common value, or to a global trend if a \code{covariate} is provided. |
33 | 35 |
The squeezed variances have a smaller expected mean square error to the true variances than do the sample variances themselves. |
34 | 36 |
|
37 |
+The amount of squeezing is controlled by the \code{prior.df}. |
|
38 |
+Both the global trend and the prior df are estimated internally but fitting an F-distribution to the sample variances, using either \code{fitFDist()} or \code{fitFDistRobustly()} or \code{fitFDistUnequalDF1()}. |
|
39 |
+ |
|
35 | 40 |
If \code{covariate} is non-null, then the scale parameter of the prior distribution is assumed to depend on the covariate. |
36 | 41 |
If the covariate is average log-expression, then the effect is an intensity-dependent trend similar to that in Sartor et al (2006). |
37 | 42 |
|
38 |
-\code{robust=TRUE} implements the robust empirical Bayes procedure of Phipson et al (2016) which allows some of the \code{var} values to be outliers. |
|
43 |
+\code{robust=TRUE} implements the robust empirical Bayes procedure of Phipson et al (2016), which allows some of the \code{var} values to be outliers. |
|
44 |
+ |
|
45 |
+The \code{legacy} argument was added in limma version 3.61.8 (August 2024). |
|
46 |
+If \code{legacy=FALSE}, then the new function \code{fitFDistUnequalDF1()} provides improved estimation of the global trend and prior df hyperparameters, especially when the \code{df} values are unequal. |
|
47 |
+\code{legacy=TRUE} provides legacy behavior for backward compatibility. |
|
39 | 48 |
} |
40 | 49 |
|
41 | 50 |
\note{ |
... | ... |
@@ -74,7 +83,7 @@ incorporating corrections to 30 June 2009. |
74 | 83 |
\seealso{ |
75 | 84 |
This function is called by \code{\link{eBayes}}. |
76 | 85 |
|
77 |
-This function calls \code{\link{fitFDist}}. |
|
86 |
+This function calls \code{\link{fitFDist}}, \code{\link{fitFDistRobustly}} or \code{\link{fitFDistUnequalDF1}} . |
|
78 | 87 |
|
79 | 88 |
An overview of linear model functions in limma is given by \link{06.LinearModels}. |
80 | 89 |
} |
... | ... |
@@ -6,7 +6,8 @@ Combine voom observational-level precision weights with sample-specific quality |
6 | 6 |
} |
7 | 7 |
\usage{ |
8 | 8 |
voomWithQualityWeights(counts, design = NULL, lib.size = NULL, normalize.method = "none", |
9 |
- plot = FALSE, span = 0.5, adaptive.span = FALSE, var.design = NULL, var.group = NULL, |
|
9 |
+ plot = FALSE, span = 0.5, adaptive.span = FALSE, |
|
10 |
+ var.design = NULL, var.group = NULL, |
|
10 | 11 |
method = "genebygene", maxiter = 50, tol = 1e-5, trace = FALSE, |
11 | 12 |
col = NULL, \dots) |
12 | 13 |
} |
... | ... |
@@ -12,8 +12,8 @@ Equivalent to calling vooma(), arrayWeights(), duplicateCorrelation() and lmFit( |
12 | 12 |
\usage{ |
13 | 13 |
voomaLmFit(y, design = NULL, prior.weights = NULL, block = NULL, |
14 | 14 |
sample.weights = FALSE, var.design = NULL, var.group = NULL, prior.n = 10, |
15 |
- predictor = NULL, span = NULL, legacy.span = FALSE, plot = FALSE, save.plot = FALSE, |
|
16 |
- keep.EList = TRUE) |
|
15 |
+ predictor = NULL, span = NULL, legacy.span = FALSE, |
|
16 |
+ plot = FALSE, save.plot = FALSE, keep.EList = TRUE) |
|
17 | 17 |
} |
18 | 18 |
|
19 | 19 |
\arguments{ |
... | ... |
@@ -1,5 +1,5 @@ |
1 | 1 |
|
2 |
-R version 4.4.0 RC (2024-04-16 r86458 ucrt) -- "Puppy Cup" |
|
2 |
+R version 4.4.1 (2024-06-14 ucrt) -- "Race for Your Life" |
|
3 | 3 |
Copyright (C) 2024 The R Foundation for Statistical Computing |
4 | 4 |
Platform: x86_64-w64-mingw32/x64 |
5 | 5 |
|
... | ... |
@@ -917,52 +917,52 @@ alpha 1 1 0 |
917 | 917 |
beta -1 0 1 |
918 | 918 |
|
919 | 919 |
$df.prior |
920 |
-[1] 9.306153 |
|
920 |
+[1] 8134.845 |
|
921 | 921 |
|
922 | 922 |
$s2.prior |
923 |
-[1] 0.923179 |
|
923 |
+[1] 1.021387 |
|
924 | 924 |
|
925 | 925 |
$var.prior |
926 |
-[1] 17.33142 17.33142 12.26855 |
|
926 |
+[1] 15.664973 11.397823 9.122785 |
|
927 | 927 |
|
928 | 928 |
$proportion |
929 | 929 |
[1] 0.01 |
930 | 930 |
|
931 | 931 |
$s2.post |
932 |
-[1] 1.2677996 0.6459499 1.1251558 0.9097727 0.9124980 |
|
932 |
+[1] 1.021963 1.020793 1.021694 1.021289 1.021321 |
|
933 | 933 |
|
934 | 934 |
$t |
935 | 935 |
alpha-beta mu+alpha mu+beta |
936 |
-[1,] 3.847656 2.580411 -2.860996 |
|
937 |
-[2,] 6.637308 5.113018 -4.273553 |
|
938 |
-[3,] 3.692066 1.720376 -3.500994 |
|
939 |
-[4,] 3.464003 2.926234 -1.972606 |
|
940 |
-[5,] 3.175181 2.566881 -1.814221 |
|
936 |
+[1,] 4.285525 2.874066 -3.186582 |
|
937 |
+[2,] 5.279861 4.067315 -3.399536 |
|
938 |
+[3,] 3.874497 1.805382 -3.673984 |
|
939 |
+[4,] 3.269417 2.761856 -1.861797 |
|
940 |
+[5,] 3.001258 2.426278 -1.714845 |
|
941 | 941 |
|
942 | 942 |
$df.total |
943 |
-[1] 15.30615 15.30615 15.30615 15.30615 13.30615 |
|
943 |
+[1] 28 28 28 28 28 |
|
944 | 944 |
|
945 | 945 |
$p.value |
946 | 946 |
alpha-beta mu+alpha mu+beta |
947 |
-[1,] 1.529450e-03 0.0206493481 0.0117123495 |
|
948 |
-[2,] 7.144893e-06 0.0001195844 0.0006385076 |
|
949 |
-[3,] 2.109270e-03 0.1055117477 0.0031325769 |
|
950 |
-[4,] 3.381970e-03 0.0102514264 0.0668844448 |
|
951 |
-[5,] 7.124839e-03 0.0230888584 0.0922478630 |
|
947 |
+[1,] 1.945874e-04 0.0076518793 0.0035226472 |
|
948 |
+[2,] 1.290875e-05 0.0003507232 0.0020449743 |
|
949 |
+[3,] 5.877177e-04 0.0817788496 0.0009997966 |
|
950 |
+[4,] 2.854794e-03 0.0100337538 0.0731588447 |
|
951 |
+[5,] 5.599946e-03 0.0219470179 0.0974229293 |
|
952 | 952 |
|
953 | 953 |
$lods |
954 |
- alpha-beta mu+alpha mu+beta |
|
955 |
-[1,] -1.013417 -3.702133 -3.0332393 |
|
956 |
-[2,] 3.981496 1.283349 -0.2615911 |
|
957 |
-[3,] -1.315036 -5.168621 -1.7864101 |
|
958 |
-[4,] -1.757103 -3.043209 -4.6191869 |
|
959 |
-[5,] -2.257358 -3.478267 -4.5683738 |
|
954 |
+ alpha-beta mu+alpha mu+beta |
|
955 |
+[1,] 0.7356274 -2.7480831 -1.9651516 |
|
956 |
+[2,] 3.2466921 0.1146964 -1.4669585 |
|
957 |
+[3,] -0.2839280 -4.8267525 -0.8071472 |
|
958 |
+[4,] -1.7300021 -2.9939736 -4.6385674 |
|
959 |
+[5,] -2.1848828 -3.4282344 -4.5756693 |
|
960 | 960 |
|
961 | 961 |
$F |
962 |
-[1] 7.421911 22.203107 7.608327 6.227010 5.060579 |
|
962 |
+[1] 9.207280 14.049948 8.378781 5.547069 4.521367 |
|
963 | 963 |
|
964 | 964 |
$F.p.value |
965 |
-[1] 5.581800e-03 2.988923e-05 5.080726e-03 1.050148e-02 2.320274e-02 |
|
965 |
+[1] 1.013549e-04 8.103854e-07 2.316764e-04 3.913618e-03 1.090148e-02 |
|
966 | 966 |
|
967 | 967 |
> |
968 | 968 |
> ### uniquegenelist |
... | ... |
@@ -1273,4 +1273,4 @@ GO:0043009 chordate embryonic developm... BP 3 0 2 1.0000000 0.025788497 |
1273 | 1273 |
> |
1274 | 1274 |
> proc.time() |
1275 | 1275 |
user system elapsed |
1276 |
- 3.06 0.31 3.36 |
|
1276 |
+ 1.32 0.15 2.62 |