Browse code

limma 3.61.8

- New function fitFDistUnequalDF1(), which improves on fitFDist() and
fitFDistRobustly(), with special attention to contexts where the
df1 argument is unequal between cases and can include small
fractional values, such as those resulting from the edgeR 4.0
quasi-likelihood pipeline.

- New argument `legacy` for squeezeVar(). If `legacy=FALSE`, then it
uses fitFDistUnequalDF1() instead of fitFDist() or
fitFDistRobustly(). If `legacy=NULL` then fitFDistUnequalDF1() will
be used when the degrees of freedom `df` are not all equal.

- New argument `legacy` for eBayes() and treat(). If `legacy=FALSE`,
then the new hyperparameter estimation implemented via
fitFDistUnequalDF1() will be used, otherwise the legacy method will
be used. If `legacy=NULL` then the new method will be used when the
residual degrees of freedom are not all equal.

- Improved checking for the class of the input in backgroundCorrect.R.

Gordon Smyth authored on 02/08/2024 00:34:36
Showing 16 changed files

... ...
@@ -1,9 +1,9 @@
1 1
 Package: limma
2
-Version: 3.61.7
3
-Date: 2024-07-30
2
+Version: 3.61.8
3
+Date: 2024-08-02
4 4
 Title: Linear Models for Microarray Data
5 5
 Description: Data analysis, linear models and differential expression for microarray and RNA-seq data.
6
-Author: Gordon Smyth [cre,aut], Yifang Hu [ctb], Matthew Ritchie [ctb], Jeremy Silver [ctb], James Wettenhall [ctb], Davis McCarthy [ctb], Di Wu [ctb], Wei Shi [ctb], Belinda Phipson [ctb], Aaron Lun [ctb], Natalie Thorne [ctb], Alicia Oshlack [ctb], Carolyn de Graaf [ctb], Yunshun Chen [ctb], Goknur Giner [ctb], Mette Langaas [ctb], Egil Ferkingstad [ctb], Marcus Davy [ctb], Francois Pepin [ctb], Dongseok Choi [ctb], Charity Law [ctb], Mengbo Li [ctb]
6
+Author: Gordon Smyth [cre,aut], Yifang Hu [ctb], Matthew Ritchie [ctb], Jeremy Silver [ctb], James Wettenhall [ctb], Davis McCarthy [ctb], Di Wu [ctb], Wei Shi [ctb], Belinda Phipson [ctb], Aaron Lun [ctb], Natalie Thorne [ctb], Alicia Oshlack [ctb], Carolyn de Graaf [ctb], Yunshun Chen [ctb], Goknur Giner [ctb], Mette Langaas [ctb], Egil Ferkingstad [ctb], Marcus Davy [ctb], Francois Pepin [ctb], Dongseok Choi [ctb], Charity Law [ctb], Mengbo Li [ctb], Lizhong Chen [ctb]
7 7
 Maintainer: Gordon Smyth <[email protected]>
8 8
 License: GPL (>=2)
9 9
 Depends: R (>= 3.6.0)
... ...
@@ -15,13 +15,13 @@ importFrom("stats", "approx", "approxfun", "as.dendrogram", "as.dist",
15 15
            "dist", "density", "df", "dhyper", "dnorm", "filter", "fitted",
16 16
            "hat", "hclust", "integrate", "isoreg", "lm", "lm.fit", "lm.wfit", "loess",
17 17
            "lowess", "median", "model.matrix", "na.exclude", "na.omit",
18
-           "nlminb", "nls", "nls.control", "optim", "p.adjust",
18
+           "nlminb", "nls", "nls.control", "optim", "optimize", "p.adjust",
19 19
            "pbeta", "pchisq", "pexp", "pf", "pgamma", "phyper",
20 20
            "pnorm", "ppoints", "predict", "pt", "qchisq", "qf",
21 21
            "qnorm", "qt", "quantile", "residuals", "rnorm", "sd",
22 22
            "uniroot", "var", "weighted.mean")
23 23
 importFrom("utils", "getFromNamespace", "head", "read.delim", "read.table", "tail", "write.table")
24
-importFrom("statmod", "logmdigamma", "gauss.quad.prob", "mixedModel2Fit", "remlscore")
24
+importFrom("statmod", "gauss.quad.prob", "logmdigamma", "mixedModel2Fit", "remlscore")
25 25
 if( tools:::.OStype() == "windows" ) importFrom("utils", "winMenuAddItem")
26 26
 
27 27
 S3method("[",MAList)
... ...
@@ -6,18 +6,21 @@ backgroundCorrect <- function(RG, method="auto", offset=0, printer=RG$printer, n
6 6
 {
7 7
 #	Apply background correction to microarray data
8 8
 #	Gordon Smyth
9
-#	12 April 2003.  Last modified 30 August 2010.
9
+#	12 April 2003.  Last modified 1 August 2024.
10 10
 
11 11
 	if(is.null(method)) method <- "auto"
12 12
 
13
+#	Single-channel background correction
13 14
 	if(is(RG,"EListRaw")) {
14 15
 		RG$E <- backgroundCorrect.matrix(RG$E,RG$Eb,method=method,offset=offset,normexp.method=normexp.method,verbose=verbose)
15 16
 		RG$Eb <- NULL
16 17
 		return(RG)
17 18
 	}
18 19
 
19
-	if(class(RG)=="list" && !is.null(RG$R)) RG <- new("RGList",RG)
20
+#	If RG is an unclassed list with a red channel, then convert to RGList
21
+	if(!isS4(RG) && is.list(RG) && !is.null(RG$R)) RG <- new("RGList",RG)
20 22
 
23
+#	Two-color background correction
21 24
 	if(is(RG,"RGList")) {
22 25
 		RG$R <- backgroundCorrect.matrix(RG$R,RG$Rb,method=method,offset=offset,normexp.method=normexp.method,verbose=verbose)
23 26
 		RG$Rb <- NULL
... ...
@@ -26,6 +29,7 @@ backgroundCorrect <- function(RG, method="auto", offset=0, printer=RG$printer, n
26 29
 		return(RG)
27 30
 	}
28 31
 
32
+#	Failing all other classes, coerce RG to a matrix
29 33
 	RG <- as.matrix(RG)
30 34
 	if(!(method %in% c("none","normexp"))) stop(method,"correction requires background intensities")
31 35
 	RG <- backgroundCorrect.matrix(RG,method=method,offset=offset,normexp.method=normexp.method,verbose=verbose)
... ...
@@ -1,14 +1,14 @@
1 1
 #  EMPIRICAL BAYES FUNCTIONS
2 2
 
3
-eBayes <- function(fit,proportion=0.01,stdev.coef.lim=c(0.1,4),trend=FALSE,robust=FALSE,winsor.tail.p=c(0.05,0.1))
3
+eBayes <- function(fit,proportion=0.01,stdev.coef.lim=c(0.1,4),trend=FALSE,robust=FALSE,winsor.tail.p=c(0.05,0.1),legacy=NULL)
4 4
 #	Empirical Bayes statistics to select differentially expressed genes.
5 5
 #	Accepts and returns an MArrayLM object.
6 6
 #	Gordon Smyth
7
-#	4 August 2003.  Last modified 29 Nov 2022.
7
+#	Created 4 Aug 2003.  Last modified 2 Aug 2024.
8 8
 {
9 9
 	if(!is.list(fit)) stop("fit is not a valid MArrayLM object")
10 10
 	if(is.logical(trend) && trend && is.null(fit$Amean)) stop("Need Amean component in fit to estimate trend")
11
-	eb <- .ebayes(fit=fit,proportion=proportion,stdev.coef.lim=stdev.coef.lim,trend=trend,robust=robust,winsor.tail.p=winsor.tail.p)
11
+	eb <- .ebayes(fit=fit,proportion=proportion,stdev.coef.lim=stdev.coef.lim,trend=trend,robust=robust,winsor.tail.p=winsor.tail.p,legacy=legacy)
12 12
 	fit$df.prior <- eb$df.prior
13 13
 	fit$s2.prior <- eb$s2.prior
14 14
 	fit$var.prior <- eb$var.prior
... ...
@@ -28,11 +28,10 @@ eBayes <- function(fit,proportion=0.01,stdev.coef.lim=c(0.1,4),trend=FALSE,robus
28 28
 	fit
29 29
 }
30 30
 
31
-.ebayes <- function(fit,proportion=0.01,stdev.coef.lim=c(0.1,4),trend=FALSE,robust=FALSE,winsor.tail.p=c(0.05,0.1))
31
+.ebayes <- function(fit,proportion=0.01,stdev.coef.lim=c(0.1,4),trend=FALSE,robust=FALSE,winsor.tail.p=c(0.05,0.1),legacy=NULL)
32 32
 #	Empirical Bayes statistics to select differentially expressed genes
33 33
 #	Gordon Smyth
34
-#	Created 8 Sept 2002.  Last revised 10 Feb 2022.
35
-#	Made a non-exported function 18 Feb 2018.
34
+#	Created 8 Sep 2002. Made non-exported function 18 Feb 2018. Last revised 2 Aug 2024.
36 35
 {
37 36
 	coefficients <- fit$coefficients
38 37
 	stdev.unscaled <- fit$stdev.unscaled
... ...
@@ -55,7 +54,7 @@ eBayes <- function(fit,proportion=0.01,stdev.coef.lim=c(0.1,4),trend=FALSE,robus
55 54
 	}
56 55
 
57 56
 #	Moderated t-statistic
58
-	out <- squeezeVar(sigma^2, df.residual, covariate=covariate, robust=robust, winsor.tail.p=winsor.tail.p)
57
+	out <- squeezeVar(sigma^2, df.residual, covariate=covariate, robust=robust, winsor.tail.p=winsor.tail.p, legacy=legacy)
59 58
 	out$s2.prior <- out$var.prior
60 59
 	out$s2.post <- out$var.post
61 60
 	out$var.prior <- out$var.post <- NULL
... ...
@@ -1,9 +1,9 @@
1 1
 #	EMPIRICAL BAYES SQUEEZING OF VARIANCES
2 2
 
3
-squeezeVar <- function(var, df, covariate=NULL, robust=FALSE, winsor.tail.p=c(0.05,0.1))
3
+squeezeVar <- function(var, df, covariate=NULL, robust=FALSE, winsor.tail.p=c(0.05,0.1), legacy=NULL)
4 4
 #	Empirical Bayes posterior variances
5 5
 #	Gordon Smyth
6
-#	Created 2 March 2004.  Last modified 29 July 2024.
6
+#	Created 2 March 2004.  Last modified 1 August 2024.
7 7
 {
8 8
 	n <- length(var)
9 9
 
... ...
@@ -14,13 +14,25 @@ squeezeVar <- function(var, df, covariate=NULL, robust=FALSE, winsor.tail.p=c(0.
14 14
 #	When df==0, guard against missing or infinite values in var
15 15
 	if(length(df)>1L) var[df==0] <- 0
16 16
 
17
+#	Choose legacy or new method depending whether df are unequal
18
+	if(is.null(legacy)) {
19
+		dfp <- df[df>0]
20
+		legacy <- identical(min(dfp),max(dfp))
21
+	}
22
+
17 23
 #	Estimate hyperparameters
18
-	if(robust) {
19
-		fit <- fitFDistRobustly(var, df1=df, covariate=covariate, winsor.tail.p=winsor.tail.p)
20
-		df.prior <- fit$df2.shrunk
24
+	if(legacy) {
25
+		if(robust) {
26
+			fit <- fitFDistRobustly(var, df1=df, covariate=covariate, winsor.tail.p=winsor.tail.p)
27
+			df.prior <- fit$df2.shrunk
28
+		} else {
29
+			fit <- fitFDist(var, df1=df, covariate=covariate)
30
+			df.prior <- fit$df2
31
+		}
21 32
 	} else {
22
-		fit <- fitFDist(var, df1=df, covariate=covariate)
23
-		df.prior <- fit$df2
33
+		fit <- fitFDistUnequalDF1(var, df1=df, covariate=covariate, robust=robust)
34
+		df.prior <- fit$df.shrunk
35
+		if(is.null(df.prior)) df.prior <- fit$df2
24 36
 	}
25 37
 	if(anyNA(df.prior)) stop("Could not estimate prior df")
26 38
 
... ...
@@ -34,28 +46,29 @@ squeezeVar <- function(var, df, covariate=NULL, robust=FALSE, winsor.tail.p=c(0.
34 46
 #	Squeeze posterior variances given hyperparameters
35 47
 #	NAs not allowed in df.prior
36 48
 #	Gordon Smyth
37
-#	Created 5 May 2016. Last modified 29 July 2024.
49
+#	Created 5 May 2016. Last modified 1 August 2024.
38 50
 {
39
-	n <- length(var)
40
-	isfin <- is.finite(df.prior)
41
-	if(all(isfin)) return( (df*var + df.prior*var.prior) / (df+df.prior) )
42
-
43
-#	From here, at least some df.prior are infinite
51
+#	If df.prior are all finite, use canonical formula
52
+	m <- max(df.prior)
53
+	if(is.finite(m)) return( (df*var + df.prior*var.prior) / (df+df.prior) )
44 54
 
45
-#	For infinite df.prior, return var.prior
55
+#	Set var.post to var.prior of length n
56
+	n <- length(var)
46 57
 	if(identical(length(var.prior),n)) {
47 58
 		var.post <- var.prior
48 59
 	} else {
49 60
 		var.post <- rep_len(var.prior, length.out=n)
50 61
 	}
51 62
 
52
-#	Maybe some df.prior are finite
53
-	if(any(isfin)) {
54
-		i <- which(isfin)
55
-		if(length(df)>1) df <- df[i]
56
-		df.prior <- df.prior[i]
57
-		var.post[i] <- (df*var[i] + df.prior*var.post[i]) / (df+df.prior)
58
-	}
63
+#	Check if df.prior all Inf
64
+	m <- min(df.prior)
65
+	if(m > 1e100) return(var.post)
66
+
67
+#	Only some df.prior are finite
68
+	i <- which(is.finite(df.prior))
69
+	if(length(df)>1L) df <- df[i]
70
+	df.prior <- df.prior[i]
71
+	var.post[i] <- (df*var[i] + df.prior*var.post[i]) / (df+df.prior)
59 72
 
60 73
 	var.post
61 74
 }
... ...
@@ -1,9 +1,9 @@
1 1
 ###  treat.R
2 2
 
3
-treat <- function(fit, fc=1.2, lfc=NULL, trend=FALSE, robust=FALSE, winsor.tail.p=c(0.05,0.1))
3
+treat <- function(fit, fc=1.2, lfc=NULL, trend=FALSE, robust=FALSE, winsor.tail.p=c(0.05,0.1), legacy=NULL)
4 4
 #	Moderated t-statistics relative to a logFC threshold.
5 5
 #	Davis McCarthy, Gordon Smyth
6
-#	25 July 2008.  Last revised 6 May 2023.
6
+#	25 July 2008.  Last revised 2 Aug 2024.
7 7
 {
8 8
 #	Check fit
9 9
 	if(!is(fit,"MArrayLM")) stop("fit must be an MArrayLM object")
... ...
@@ -28,7 +28,7 @@ treat <- function(fit, fc=1.2, lfc=NULL, trend=FALSE, robust=FALSE, winsor.tail.
28 28
 	} else {
29 29
 		covariate <- NULL
30 30
 	}
31
-	sv <- squeezeVar(sigma^2, df.residual, covariate=covariate, robust=robust, winsor.tail.p=winsor.tail.p)
31
+	sv <- squeezeVar(sigma^2, df.residual, covariate=covariate, robust=robust, winsor.tail.p=winsor.tail.p, legacy=legacy)
32 32
 	fit$df.prior <- sv$df.prior
33 33
 	fit$s2.prior <- sv$var.prior
34 34
 	fit$s2.post <- sv$var.post
... ...
@@ -3,6 +3,24 @@
3 3
 \encoding{UTF-8}
4 4
 
5 5
 
6
+\section{Version 3.61.8}{\itemize{
7
+
8
+  \item
9
+  New function fitFDistUnequalDF1() for improved empirical Bayes
10
+  hyperparameter estimation when the residual degrees of freedom are
11
+  not all equal between genes. The new hyperpameter estimation will be
12
+  used by eBayes(), treat() and squeezeVar() by default whenever the
13
+  residual degrees of freedom are unequal, but the legacy behavior
14
+  can be preserved for backward compatibility by setting
15
+  `legacy=TRUE`.
16
+
17
+  \item
18
+  New argument `keep.EList` for voomaLmFit(). The observation weights
19
+  generated by voomaLmFit() are now stored only if `keep.EList=TRUE`.
20
+
21
+}}
22
+
23
+
6 24
 \section{Version 3.60.0}{\itemize{
7 25
 
8 26
   \item
... ...
@@ -1,3 +1,24 @@
1
+ 1 Aug 2024: limma 3.61.8
2
+
3
+- New function fitFDistUnequalDF1(), which improves on fitFDist() and
4
+  fitFDistRobustly(), with special attention to contexts where the
5
+  df1 argument is unequal between cases and can include small
6
+  fractional values, such as those resulting from the edgeR 4.0
7
+  quasi-likelihood pipeline.
8
+
9
+- New argument `legacy` for squeezeVar(). If `legacy=FALSE`, then it
10
+  uses fitFDistUnequalDF1() instead of fitFDist() or
11
+  fitFDistRobustly(). If `legacy=NULL` then fitFDistUnequalDF1() will
12
+  be used when the degrees of freedom `df` are not all equal.
13
+
14
+- New argument `legacy` for eBayes() and treat(). If `legacy=FALSE`,
15
+  then the new hyperparameter estimation implemented via
16
+  fitFDistUnequalDF1() will be used, otherwise the legacy method will
17
+  be used. If `legacy=NULL` then the new method will be used when the
18
+  residual degrees of freedom are not all equal.
19
+
20
+- Improved checking for the class of the input in backgroundCorrect.R.
21
+
1 22
 30 Jul 2024: limma 3.61.7
2 23
 
3 24
 - Clean up some code in fitFDistRobustly().
... ...
@@ -58,7 +58,7 @@ After fitting a linear model, the standard errors are moderated using a simple e
58 58
 A moderated t-statistic and a log-odds of differential expression is computed for each contrast for each gene.
59 59
 \code{treat} tests whether log-fold-changes are greater than a threshold rather than merely different to zero.
60 60
 
61
-\code{\link{eBayes}} and \code{\link{treat}} use internal functions \code{\link{squeezeVar}}, \code{\link{fitFDist}}, \code{\link{tmixture.matrix}} and \code{\link{tmixture.vector}}.
61
+\code{\link{eBayes}} and \code{\link{treat}} use internal functions \code{\link{squeezeVar}}, \code{\link{fitFDist}}, \code{\link{fitFDistRobustly}}, \code{\link{fitFDistUnequalDF1}}, \code{\link{tmixture.matrix}} and \code{\link{tmixture.vector}}.
62 62
 }
63 63
 
64 64
 \section{Summarizing Model Fits}{
... ...
@@ -104,7 +104,7 @@ Smyth, G. K. (2004). Linear models and empirical Bayes methods for assessing dif
104 104
 \emph{Statistical Applications in Genetics and Molecular Biology}, \bold{3}, No. 1, Article 3.
105 105
 \url{https://gksmyth.github.io/pubs/ebayes.pdf}
106 106
 
107
-Smyth, G. K., Michaud, J., and Scott, H. (2005). The use of within-array replicate spots for assessing differential expression in microarray experiments. Bioinformatics 21(9), 2067-2075.
107
+Smyth, G. K., Michaud, J., and Scott, H. (2005). The use of within-array replicate spots for assessing differential expression in microarray experiments. \emph{Bioinformatics} 21(9), 2067-2075.
108 108
 }
109 109
 
110 110
 \seealso{
... ...
@@ -7,9 +7,9 @@
7 7
 
8 8
 \usage{
9 9
 eBayes(fit, proportion = 0.01, stdev.coef.lim = c(0.1,4),
10
-       trend = FALSE, robust = FALSE, winsor.tail.p = c(0.05,0.1))
10
+       trend = FALSE, robust = FALSE, winsor.tail.p = c(0.05,0.1), legacy = NULL)
11 11
 treat(fit, fc = 1.2, lfc = NULL, trend = FALSE,
12
-      robust = FALSE, winsor.tail.p = c(0.05,0.1))
12
+      robust = FALSE, winsor.tail.p = c(0.05,0.1), legacy = NULL)
13 13
 }
14 14
 
15 15
 \arguments{
... ...
@@ -19,7 +19,8 @@ treat(fit, fc = 1.2, lfc = NULL, trend = FALSE,
19 19
   \item{stdev.coef.lim}{numeric vector of length 2, assumed lower and upper limits for the standard deviation of log2-fold-changes for differentially expressed genes}
20 20
   \item{trend}{logical, should an intensity-dependent trend be allowed for the prior variance? If \code{FALSE} then the prior variance is constant. Alternatively, \code{trend} can be a row-wise numeric vector, which will be used as the covariate for the prior variance.}
21 21
   \item{robust}{logical, should the estimation of \code{df.prior} and \code{var.prior} be robustified against outlier sample variances?}
22
-  \item{winsor.tail.p}{numeric vector of length 1 or 2, giving left and right tail proportions of \code{x} to Winsorize. Used only when \code{robust=TRUE}.}
22
+  \item{winsor.tail.p}{numeric vector of length 1 or 2, giving left and right tail proportions of \code{x} to Winsorize (if \code{robust=TRUE} and \code{legacy=TRUE}).}
23
+  \item{legacy}{logical. If \code{FALSE} then the new hyperparameter estimation (introduced in limma 3.61.8) will be used, otherwise the legacy methods will be used. If \code{NULL}, then the new method will be used whenever the residual degrees of freedom are not all equal.}
23 24
   \item{fc}{a minimum fold-change below which changes are not considered scientifically meaningful.}
24 25
   \item{lfc}{a minimum log2-fold-change below which changes not considered scientifically meaningful. Defaults to \code{log2(fc)}.}
25 26
 }
... ...
@@ -60,7 +61,7 @@ For each gene (row), the moderated F-statistic tests whether all the contrasts a
60 61
 The F-statistic is an overall test computed from the set of t-statistics for that probe.
61 62
 This is exactly analogous the relationship between t-tests and F-statistics in conventional anova, except that the residual mean squares have been moderated between genes.
62 63
 
63
-The estimates \code{s2.prior} and \code{df.prior} are computed by \code{fitFDist}.
64
+The estimates \code{s2.prior} and \code{df.prior} are computed by one of \code{fitFDist}, \code{fitFDistRobustly} or \code{fitFDistUnequalDF1} (depending on settings for \code{robust} and \code{legacy}).
64 65
 \code{s2.post} is the weighted average of \code{s2.prior} and \code{sigma^2} with weights proportional to \code{df.prior} and \code{df.residual} respectively.
65 66
 The log-odds of differential expression \code{lods} was called the \emph{B-statistic} by Loennstedt and Speed (2002).
66 67
 The F-statistics \code{F} are computed by \code{classifyTestsF} with \code{fstat.only=TRUE}.
... ...
@@ -102,6 +103,12 @@ limma-trend is recommended for RNA-seq analysis when the library sizes are reaso
102 103
 If \code{robust=TRUE} then the robust empirical Bayes procedure of Phipson et al (2016) is used.
103 104
 This is frequently useful to protect the empirical Bayes procedure against hyper-variable or hypo-variable genes, especially when analysing RNA-seq data.
104 105
 See \code{\link{squeezeVar}} for more details.
106
+
107
+In limma 3.61.8 (August 2024), the new function \code{\link{fitFDistUnequalDF1}} was introduced to improve estimation of the hyperparameters \code{s2.prior} and \code{df.prior}, especially when not all genes have the same residual degrees of freedom.
108
+\code{fitFDistUnequalDF1} is a potential replacement for the original functions \code{fitFDist} and \code{fitFDistRobustly} and the argument \code{legacy} is provided to control backward compatibility.
109
+The new hyperparameter estimation will be used if \code{legacy=FALSE} and the original methods will be used if \code{legacy=TRUE}.
110
+If \code{legacy=NULL}, then the new method will be used if the residual degrees of freedom are unequal and the original methods otherwise.
111
+Unequal residual degrees of freedom arise in limma pipelines when the expression matrix includes missing values or from the quasi-likelihood pipeline in edgeR v4.
105 112
 }
106 113
 
107 114
 \note{
... ...
@@ -1,14 +1,17 @@
1 1
 \name{fitFDist}
2 2
 \alias{fitFDist}
3 3
 \alias{fitFDistRobustly}
4
+\alias{fitFDistUnequalDF1}
5
+
4 6
 \title{Moment Estimation of Scaled F-Distribution}
5 7
 \description{
6 8
 Moment estimation of the parameters of a scaled F-distribution given one of the degrees of freedom.
7
-This function is called internally by \code{eBayes} and \code{squeezeVar} and is not usually called directly by a user.
9
+These functions are called internally by \code{eBayes} and \code{squeezeVar} and is not usually called directly by a user.
8 10
 }
9 11
 \usage{
10
-fitFDist(x, df1, covariate=NULL)
11
-fitFDistRobustly(x, df1, covariate=NULL, winsor.tail.p=c(0.05,0.1), trace=FALSE)
12
+fitFDist(x, df1, covariate = NULL)
13
+fitFDistRobustly(x, df1, covariate = NULL, winsor.tail.p = c(0.05,0.1), trace = FALSE)
14
+fitFDistUnequalDF1(x, df1, covariate = NULL, robust = FALSE, prior.weights = NULL)
12 15
 }
13 16
 \arguments{
14 17
   \item{x}{numeric vector or array of positive values representing a sample from a scaled F-distribution.}
... ...
@@ -16,9 +19,11 @@ fitFDistRobustly(x, df1, covariate=NULL, winsor.tail.p=c(0.05,0.1), trace=FALSE)
16 19
   \item{covariate}{if non-\code{NULL}, the estimated scale value will depend on this numeric covariate.}
17 20
   \item{winsor.tail.p}{numeric vector of length 1 or 2, giving left and right tail proportions of \code{x} to Winsorize.}
18 21
   \item{trace}{logical value indicating whether a trace of the iteration progress should be printed.}
22
+  \item{robust}{logical. Should outlier values of \code{x} be down-weighted with results similar to \code{fitFDistRobustly}?}
23
+  \item{prior.weights}{numeric vector of (non-negative) prior weights.}
19 24
 }
20 25
 \details{
21
-\code{fitFDist} implements an algorithm proposed by Smyth (2004) and Phipson et al (2016).
26
+\code{fitFDist()} implements an algorithm proposed by Smyth (2004) and Phipson et al (2016).
22 27
 It estimates \code{scale} and \code{df2} under the assumption that \code{x} is distributed as \code{scale} times an F-distributed random variable on \code{df1} and \code{df2} degrees of freedom.
23 28
 The parameters are estimated using the method of moments, specifically from the mean and variance of the \code{x} values on the log-scale.
24 29
 
... ...
@@ -30,13 +35,27 @@ When \code{covariate} is supplied, a loess trend is estimated for the \code{x} v
30 35
 The robust method is described by Phipson et al (2016).
31 36
 
32 37
 As well as estimating the F-distribution for the bulk of the cases, i.e., with outliers discounted, \code{fitFDistRobustly} also returns an estimated F-distribution with reduced df2 that might be appropriate for each outlier case.
38
+
39
+\code{fitFDistUnequalDF1} was introduced in limma 3.61.8 and gives special attention to the possibility that the degrees of freedom \code{df1} might be unequal and might include non-integer values.
40
+The most important innovation of \code{fitFDistUnequalDF1} is downweighting of observations with lower degrees of freedom, to give more precise estimation overall.
41
+It also allows the possibility of prior weights, which can be used to downweight unreliable \code{x} values for reasons other than small \code{df1}.
42
+\code{fitFDistUnequalDF1} implements a different robust estimation strategy to \code{fitFDistRobustly}.
43
+Instead of Winsorizing the \code{x} values, potential outliers are instead downweighted using the prior weights.
44
+Whereas \code{fitFDist} and \code{fitFDistRobustly} use unweighted moment estimation for both \code{scale} and \code{df2},
45
+\code{fitFDistUnequalDF1} uses weighted moment estimation for \code{scale} and profile maximum likelihood for \code{df2}.
46
+
47
+\code{fitFDistUnequalDF1} gives improved performance over \code{fitFDist} and \code{fitFDistRobustly}, especially when the degrees of freedom are unequal but also to a lesser extent when the degrees of freedom are equal.
48
+Unequal residual degrees of freedom arise in limma pipelines when the expression matrix includes missing values or from the quasi-likelihood pipeline in edgeR v4 (Chen et al 2024).
49
+The edgeR v4 pipeline produces fractional degrees of freedom including, potentially, degrees of freedom less than 1.
33 50
 }
51
+
34 52
 \note{
35 53
 The algorithm used by \code{fitFDistRobustly} was revised slightly in limma 3.27.6.
36 54
 The \code{prob.outlier} value, which is the lower bound for \code{df2.shrunk}, may be slightly smaller than previously.
37 55
 }
56
+
38 57
 \value{
39
-\code{fitFDist} produces a list with the following components:
58
+\code{fitFDist} or \code{fitFDistUnequalDF1} with \code{robust=FALSE} produces a list with the following components:
40 59
   \item{scale}{scale factor for F-distribution. A vector if \code{covariate} is non-\code{NULL}, otherwise a scalar.}
41 60
   \item{df2}{the second degrees of freedom of the fitted F-distribution.}
42 61
 
... ...
@@ -47,19 +66,24 @@ The \code{prob.outlier} value, which is the lower bound for \code{df2.shrunk}, m
47 66
   \item{df2.shrunk}{numeric vector of values for the second degrees of freedom, with shrunk values for outliers. Most values are equal to \code{df2}, but outliers have reduced values depending on how extreme each case is. All values lie between \code{df2.outlier} and \code{df2}.}
48 67
 }
49 68
 
50
-\author{Gordon Smyth and Belinda Phipson}
69
+\author{Gordon Smyth, Belinda Phipson (\code{fitFDistRobustly}) and Lizhong Chen (\code{fitFDistUnequalDF1}).}
51 70
 
52 71
 \references{
53
-Smyth, G. K. (2004).
72
+Smyth GK (2004).
54 73
 Linear models and empirical Bayes methods for assessing differential expression in microarray experiments.
55 74
 \emph{Statistical Applications in Genetics and Molecular Biology} Volume 3, Issue 1, Article 3.
56 75
 \doi{10.2202/1544-6115.1027}
57 76
 \url{https://gksmyth.github.io/pubs/ebayes.pdf}
58 77
 
59
-Phipson, B, Lee, S, Majewski, IJ, Alexander, WS, and Smyth, GK (2016).
78
+Phipson B, Lee S, Majewski IJ, Alexander WS, Smyth GK (2016).
60 79
 Robust hyperparameter estimation protects against hypervariable genes and improves power to detect differential expression.
61 80
 \emph{Annals of Applied Statistics} 10, 946-963.
62 81
 \doi{10.1214/16-AOAS920}
82
+
83
+Chen Y, Chen L, Lun ATL, Baldoni PL, Smyth GK (2024).
84
+edgeR 4.0: powerful differential analysis of sequencing data with expanded functionality and improved support for small counts and larger datasets.
85
+\emph{bioRxiv} 2024.01.21.576131.
86
+\doi{10.1101/2024.01.21.576131}
63 87
 }
64 88
 
65 89
 \seealso{
... ...
@@ -5,7 +5,8 @@
5 5
 Normalize the columns of a matrix, cyclicly applying loess normalization to normalize each pair of columns to each other.
6 6
 }
7 7
 \usage{
8
-normalizeCyclicLoess(x, weights = NULL, span = 0.7, adaptive.span = FALSE, iterations = 3, method = "fast")
8
+normalizeCyclicLoess(x, weights = NULL, span = 0.7, adaptive.span = FALSE,
9
+                     iterations = 3, method = "fast")
9 10
 }
10 11
 \arguments{
11 12
   \item{x}{numeric matrix, or object which can be coerced to a numeric matrix, containing log-expression values.}
... ...
@@ -47,7 +48,7 @@ A matrix of the same dimensions as \code{x} containing the normalized values.
47 48
 \references{
48 49
 Bolstad BM, Irizarry RA, Astrand M, Speed TP (2003).
49 50
 A comparison of normalization methods for high density oligonucleotide array data based on bias and variance.
50
-\emph{Bioinformatics} \bold{19}, 185-193.
51
+\emph{Bioinformatics} 19, 185-193.
51 52
 
52 53
 Ballman KV, Grill DE, Oberg AL, Therneau TM (2004).
53 54
 Faster cyclic loess: normalizing RNA arrays via linear models.
... ...
@@ -7,15 +7,17 @@ Squeeze a set of sample variances together by computing empirical Bayes posterio
7 7
 }
8 8
 
9 9
 \usage{
10
-squeezeVar(var, df, covariate=NULL, robust=FALSE, winsor.tail.p=c(0.05,0.1))
10
+squeezeVar(var, df, covariate = NULL, robust = FALSE, winsor.tail.p = c(0.05,0.1),
11
+           legacy = NULL)
11 12
 }
12 13
 
13 14
 \arguments{
14 15
 \item{var}{numeric vector of independent sample variances.}
15
-\item{df}{numeric vector of degrees of freedom for the sample variances.}
16
-\item{covariate}{if non-\code{NULL}, \code{var.prior} will depend on this numeric covariate. Otherwise, \code{var.prior} is constant.}
16
+\item{df}{numeric vector of degrees of freedom for the sample variances. Can be a unit vector or of same length as \code{var}.}
17
+\item{covariate}{numeric covariate of same length as \code{var} for estimating a trended prior variance. If \code{NULL}, then the prior variance \code{var.prior} is constant.}
17 18
 \item{robust}{logical, should the estimation of \code{df.prior} and \code{var.prior} be robustified against outlier sample variances?}
18
-\item{winsor.tail.p}{numeric vector of length 1 or 2, giving left and right tail proportions of \code{x} to Winsorize. Used only when \code{robust=TRUE}.}
19
+\item{winsor.tail.p}{numeric vector of length 1 or 2, giving left and right tail proportions of \code{x} to Winsorize when \code{robust=TRUE}.}
20
+\item{legacy}{logical. If \code{FALSE} then the new function \code{fitFDistUnequalDF1} will be called internally, otherwise the legacy functions \code{fitFDist} or \code{fitFDistRobustly} will be used. If \code{NULL}, then \code{fitFDistUnequalDF1} will be used whenever the degrees of freedom \code{df} are not all equal.}
19 21
 }
20 22
 
21 23
 \details{
... ...
@@ -32,10 +34,17 @@ The scale and degrees of freedom of this prior distribution are estimated from t
32 34
 The effect of this function is to squeeze the variances towards a common value, or to a global trend if a \code{covariate} is provided.
33 35
 The squeezed variances have a smaller expected mean square error to the true variances than do the sample variances themselves.
34 36
 
37
+The amount of squeezing is controlled by the \code{prior.df}.
38
+Both the global trend and the prior df are estimated internally but fitting an F-distribution to the sample variances, using either \code{fitFDist()} or \code{fitFDistRobustly()} or \code{fitFDistUnequalDF1()}.
39
+
35 40
 If \code{covariate} is non-null, then the scale parameter of the prior distribution is assumed to depend on the covariate.
36 41
 If the covariate is average log-expression, then the effect is an intensity-dependent trend similar to that in Sartor et al (2006).
37 42
 
38
-\code{robust=TRUE} implements the robust empirical Bayes procedure of Phipson et al (2016) which allows some of the \code{var} values to be outliers.
43
+\code{robust=TRUE} implements the robust empirical Bayes procedure of Phipson et al (2016), which allows some of the \code{var} values to be outliers.
44
+
45
+The \code{legacy} argument was added in limma version 3.61.8 (August 2024).
46
+If \code{legacy=FALSE}, then the new function \code{fitFDistUnequalDF1()} provides improved estimation of the global trend and prior df hyperparameters, especially when the \code{df} values are unequal.
47
+\code{legacy=TRUE} provides legacy behavior for backward compatibility.
39 48
 }
40 49
 
41 50
 \note{
... ...
@@ -74,7 +83,7 @@ incorporating corrections to 30 June 2009.
74 83
 \seealso{
75 84
 This function is called by \code{\link{eBayes}}.
76 85
 
77
-This function calls \code{\link{fitFDist}}.
86
+This function calls \code{\link{fitFDist}}, \code{\link{fitFDistRobustly}} or \code{\link{fitFDistUnequalDF1}} .
78 87
 
79 88
 An overview of linear model functions in limma is given by \link{06.LinearModels}.
80 89
 }
... ...
@@ -6,7 +6,8 @@ Combine voom observational-level precision weights with sample-specific quality
6 6
 }
7 7
 \usage{
8 8
 voomWithQualityWeights(counts, design = NULL, lib.size = NULL, normalize.method = "none",
9
-             plot = FALSE, span = 0.5, adaptive.span = FALSE, var.design = NULL, var.group = NULL,
9
+             plot = FALSE, span = 0.5, adaptive.span = FALSE,
10
+             var.design = NULL, var.group = NULL,
10 11
              method = "genebygene", maxiter = 50, tol = 1e-5, trace = FALSE,
11 12
              col = NULL, \dots)
12 13
 }
... ...
@@ -12,8 +12,8 @@ Equivalent to calling vooma(), arrayWeights(), duplicateCorrelation() and lmFit(
12 12
 \usage{
13 13
 voomaLmFit(y, design = NULL, prior.weights = NULL, block = NULL,
14 14
            sample.weights = FALSE, var.design = NULL, var.group = NULL, prior.n = 10,
15
-           predictor = NULL, span = NULL, legacy.span = FALSE, plot = FALSE, save.plot = FALSE,
16
-           keep.EList = TRUE)
15
+           predictor = NULL, span = NULL, legacy.span = FALSE,
16
+           plot = FALSE, save.plot = FALSE, keep.EList = TRUE)
17 17
 }
18 18
 
19 19
 \arguments{
... ...
@@ -1,5 +1,5 @@
1 1
 
2
-R version 4.4.0 RC (2024-04-16 r86458 ucrt) -- "Puppy Cup"
2
+R version 4.4.1 (2024-06-14 ucrt) -- "Race for Your Life"
3 3
 Copyright (C) 2024 The R Foundation for Statistical Computing
4 4
 Platform: x86_64-w64-mingw32/x64
5 5
 
... ...
@@ -917,52 +917,52 @@ alpha          1        1       0
917 917
 beta          -1        0       1
918 918
 
919 919
 $df.prior
920
-[1] 9.306153
920
+[1] 8134.845
921 921
 
922 922
 $s2.prior
923
-[1] 0.923179
923
+[1] 1.021387
924 924
 
925 925
 $var.prior
926
-[1] 17.33142 17.33142 12.26855
926
+[1] 15.664973 11.397823  9.122785
927 927
 
928 928
 $proportion
929 929
 [1] 0.01
930 930
 
931 931
 $s2.post
932
-[1] 1.2677996 0.6459499 1.1251558 0.9097727 0.9124980
932
+[1] 1.021963 1.020793 1.021694 1.021289 1.021321
933 933
 
934 934
 $t
935 935
      alpha-beta mu+alpha   mu+beta
936
-[1,]   3.847656 2.580411 -2.860996
937
-[2,]   6.637308 5.113018 -4.273553
938
-[3,]   3.692066 1.720376 -3.500994
939
-[4,]   3.464003 2.926234 -1.972606
940
-[5,]   3.175181 2.566881 -1.814221
936
+[1,]   4.285525 2.874066 -3.186582
937
+[2,]   5.279861 4.067315 -3.399536
938
+[3,]   3.874497 1.805382 -3.673984
939
+[4,]   3.269417 2.761856 -1.861797
940
+[5,]   3.001258 2.426278 -1.714845
941 941
 
942 942
 $df.total
943
-[1] 15.30615 15.30615 15.30615 15.30615 13.30615
943
+[1] 28 28 28 28 28
944 944
 
945 945
 $p.value
946 946
        alpha-beta     mu+alpha      mu+beta
947
-[1,] 1.529450e-03 0.0206493481 0.0117123495
948
-[2,] 7.144893e-06 0.0001195844 0.0006385076
949
-[3,] 2.109270e-03 0.1055117477 0.0031325769
950
-[4,] 3.381970e-03 0.0102514264 0.0668844448
951
-[5,] 7.124839e-03 0.0230888584 0.0922478630
947
+[1,] 1.945874e-04 0.0076518793 0.0035226472
948
+[2,] 1.290875e-05 0.0003507232 0.0020449743
949
+[3,] 5.877177e-04 0.0817788496 0.0009997966
950
+[4,] 2.854794e-03 0.0100337538 0.0731588447
951
+[5,] 5.599946e-03 0.0219470179 0.0974229293
952 952
 
953 953
 $lods
954
-     alpha-beta  mu+alpha    mu+beta
955
-[1,]  -1.013417 -3.702133 -3.0332393
956
-[2,]   3.981496  1.283349 -0.2615911
957
-[3,]  -1.315036 -5.168621 -1.7864101
958
-[4,]  -1.757103 -3.043209 -4.6191869
959
-[5,]  -2.257358 -3.478267 -4.5683738
954
+     alpha-beta   mu+alpha    mu+beta
955
+[1,]  0.7356274 -2.7480831 -1.9651516
956
+[2,]  3.2466921  0.1146964 -1.4669585
957
+[3,] -0.2839280 -4.8267525 -0.8071472
958
+[4,] -1.7300021 -2.9939736 -4.6385674
959
+[5,] -2.1848828 -3.4282344 -4.5756693
960 960
 
961 961
 $F
962
-[1]  7.421911 22.203107  7.608327  6.227010  5.060579
962
+[1]  9.207280 14.049948  8.378781  5.547069  4.521367
963 963
 
964 964
 $F.p.value
965
-[1] 5.581800e-03 2.988923e-05 5.080726e-03 1.050148e-02 2.320274e-02
965
+[1] 1.013549e-04 8.103854e-07 2.316764e-04 3.913618e-03 1.090148e-02
966 966
 
967 967
 > 
968 968
 > ### uniquegenelist
... ...
@@ -1273,4 +1273,4 @@ GO:0043009 chordate embryonic developm...  BP  3  0    2 1.0000000 0.025788497
1273 1273
 > 
1274 1274
 > proc.time()
1275 1275
    user  system elapsed 
1276
-   3.06    0.31    3.36 
1276
+   1.32    0.15    2.62