Browse code

8 April 2015: limma 3.23.13

- new function fry(), which provides a fast version of mroast()
when there is little heteroscedasticity between genes.

- minor change to mroast() output so that FDR values are never
smaller than p-values when midp=TRUE.


git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/limma@102075 bc3139a8-67e5-0310-9ffc-ced21a209358

Gordon Smyth authored on 08/04/2015 05:56:02
Showing 7 changed files

... ...
@@ -1,6 +1,6 @@
1 1
 Package: limma
2
-Version: 3.23.12
3
-Date: 2015/04/04
2
+Version: 3.23.13
3
+Date: 2015/04/08
4 4
 Title: Linear Models for Microarray Data
5 5
 Description: Data analysis, linear models and differential expression for microarray data.
6 6
 Author: Gordon Smyth [cre,aut], Matthew Ritchie [ctb], Jeremy Silver [ctb], James Wettenhall [ctb], Natalie Thorne [ctb], Davis McCarthy [ctb], Di Wu [ctb], Yifang Hu [ctb], Wei Shi [ctb], Belinda Phipson [ctb], Alicia Oshlack [ctb], Carolyn de Graaf [ctb], Mette Langaas [ctb], Egil Ferkingstad [ctb], Marcus Davy [ctb], Francois Pepin [ctb], Dongseok Choi [ctb]
... ...
@@ -12,6 +12,8 @@ S3method("[",RGList)
12 12
 S3method("[",EList)
13 13
 S3method("[",EListRaw)
14 14
 S3method(anova,MAList)
15
+S3method(as.data.frame,EList)
16
+S3method(as.data.frame,MAList)
15 17
 S3method(as.data.frame,MArrayLM)
16 18
 S3method(as.matrix,MAList)
17 19
 S3method(as.matrix,EList)
... ...
@@ -54,6 +56,7 @@ S3method("dimnames<-",RGList)
54 56
 S3method("dimnames<-",EList)
55 57
 S3method("dimnames<-",EListRaw)
56 58
 S3method(fitted,MArrayLM)
59
+S3method(fry,default)
57 60
 S3method(goana,default)
58 61
 S3method(goana,MArrayLM)
59 62
 S3method(length,MAList)
... ...
@@ -65,6 +68,7 @@ S3method(merge,MAList)
65 68
 S3method(merge,RGList)
66 69
 S3method(merge,EList)
67 70
 S3method(merge,EListRaw)
71
+S3method(mroast,default)
68 72
 S3method(roast,default)
69 73
 S3method(romer,default)
70 74
 S3method(rbind,MAList)
71 75
new file mode 100644
... ...
@@ -0,0 +1,136 @@
1
+fry <- function(y,...) UseMethod("fry")
2
+
3
+fry.default <- function(y,index=NULL,design=NULL,contrast=ncol(design),weights=NULL,sort=TRUE,...)
4
+#	Quick version of roast gene set test assuming equal variances between genes
5
+#	The up and down p-values are equivalent to those from roast with nrot=Inf
6
+#	in the special case of prior.df=Inf.
7
+#	Gordon Smyth and Goknur Giner
8
+#	Created 30 January 2015.  Last modified 8 April 2015
9
+{
10
+#	Issue warning if extra arguments found
11
+	dots <- names(list(...))
12
+	if(length(dots)) warning("Extra arguments disregarded: ",sQuote(dots))
13
+
14
+#	Extract components from y
15
+	y <- getEAWP(y)
16
+	G <- nrow(y$exprs)
17
+	n <- ncol(y$exprs)
18
+
19
+#	Check index
20
+	if(is.null(index)) index <- list(set1=1L:G)
21
+	if(!is.list(index)) index <- list(set1=index)
22
+
23
+#	Check design
24
+	if(is.null(design)) design <- y$design
25
+	if(is.null(design))
26
+		design <- matrix(1,n,1)
27
+	else {
28
+		design <- as.matrix(design)
29
+		if(mode(design) != "numeric") stop("design must be a numeric matrix")
30
+	}
31
+	if(nrow(design) != n) stop("row dimension of design matrix must match column dimension of data")
32
+	ne <- nonEstimable(design)
33
+	if(!is.null(ne)) cat("Coefficients not estimable:",paste(ne,collapse=" "),"\n")
34
+
35
+	p <- ncol(design)
36
+	df.residual <- n-p
37
+	df.camera <- min(df.residual,G-2)
38
+
39
+#	Check weights
40
+	if(is.null(weights)) weights <- y$weights
41
+
42
+#	Reduce to numeric expression matrix
43
+	y <- y$exprs
44
+
45
+#	Check weights
46
+	if(!is.null(weights)) {
47
+		if(any(weights<=0)) stop("weights must be positive")
48
+		if(length(weights)==n) {
49
+			sw <- sqrt(weights)
50
+			y <- t(t(y)*sw)
51
+			design <- design*sw
52
+			weights <- NULL
53
+		}
54
+	}
55
+	if(!is.null(weights)) {
56
+		if(length(weights)==G) weights <- matrix(weights,G,n)
57
+		weights <- as.matrix(weights)
58
+		if(any( dim(weights) != dim(y) )) stop("weights not conformal with y")
59
+	}
60
+
61
+#	Reform design matrix so that contrast of interest is last column
62
+	if(is.character(contrast)) {
63
+		contrast <- which(contrast==colnames(design))
64
+		if(length(contrast)==0) stop("coef ",contrast," not found")
65
+	}
66
+	if(length(contrast)==1) {
67
+		j <- c((1:p)[-contrast], contrast)
68
+		if(contrast<p) design <- design[,j]
69
+	} else {
70
+		QR <- qr(contrast)
71
+		design <- t(qr.qty(QR,t(design)))
72
+		if(sign(QR$qr[1,1]<0)) design[,1] <- -design[,1]
73
+		design <- design[,c(2:p,1)]
74
+	}
75
+
76
+#	Compute effects matrix
77
+	if(is.null(weights)) {
78
+		QR <- qr(design)
79
+		if(QR$rank<p) stop("design matrix is not of full rank")
80
+		effects <- qr.qty(QR,t(y))
81
+		unscaledt <- effects[p,]
82
+		if(QR$qr[p,p]<0) unscaledt <- -unscaledt
83
+	} else {
84
+		effects <- matrix(0,n,G)
85
+		unscaledt <- rep(0,n)
86
+		sw <- sqrt(weights)
87
+		yw <- y*sw
88
+		for (g in 1:G) {
89
+			xw <- design*sw[g,]
90
+			QR <- qr(xw)
91
+			if(QR$rank<p) stop("weighted design matrix not of full rank for gene ",g)
92
+			effects[,g] <- qr.qty(QR,yw[g,])
93
+			unscaledt[g] <- effects[p,g]
94
+			if(QR$qr[p,p]<0) unscaledt[g] <- -unscaledt[g]
95
+		}
96
+	}
97
+
98
+#	Standardized residuals
99
+	U <- effects[-(1:(p-1)),,drop=FALSE]
100
+
101
+#	Global statistics
102
+	nsets <- length(index)
103
+	NGenes <- rep.int(0L,nsets)
104
+	Direction <- rep.int("",nsets)
105
+	PValue.Mixed <- PValue <- rep.int(0,nsets)
106
+	for (i in 1:nsets) {
107
+		iset <- index[[i]]
108
+		USet <- U[,iset,drop=FALSE]
109
+		NGenes[i] <- ncol(USet)
110
+		MeanUSet <- rowMeans(USet)
111
+		t.stat <- MeanUSet[1L] / sqrt(mean(MeanUSet[-1L]^2L))
112
+		if(t.stat>0) Direction[i] <- "Up" else Direction[i] <- "Down"
113
+		PValue[i] <- 2*pt(-abs(t.stat),df=df.residual)
114
+#		MSqUSet <- rowMeans(USet^2)
115
+#		F.stat <- MSqUSet[1L] / mean(MSqUSet[-1L])
116
+#		PValue.Mixed[i] <- pf(F.stat,df1=1L,df2=df.residual,lower.tail=FALSE)
117
+	}
118
+
119
+#	Add FDR
120
+	if(nsets>1) {
121
+		FDR <- p.adjust(PValue,method="BH")
122
+#		FDR.Mixed <- p.adjust(PValue.Mixed,method="BH")
123
+		tab <- data.frame(NGenes=NGenes,Direction=Direction,PValue=PValue,FDR=FDR)
124
+	} else {
125
+		tab <- data.frame(NGenes=NGenes,Direction=Direction,PValue=PValue)
126
+	}
127
+	rownames(tab) <- names(index)
128
+
129
+#	Sort by p-value
130
+	if(sort && nsets>1) {
131
+		o <- order(tab$PValue,-tab$NGenes)
132
+		tab <- tab[o,]
133
+	}
134
+
135
+	tab
136
+}
... ...
@@ -16,7 +16,7 @@ roast <- function(y,...) UseMethod("roast")
16 16
 roast.default <- function(y,index=NULL,design=NULL,contrast=ncol(design),set.statistic="mean",gene.weights=NULL,array.weights=NULL,weights=NULL,block=NULL,correlation,var.prior=NULL,df.prior=NULL,trend.var=FALSE,nrot=999,approx.zscore=TRUE,...)
17 17
 # Rotation gene set testing for linear models
18 18
 # Gordon Smyth and Di Wu
19
-# Created 24 Apr 2008.  Last modified 31 March 2015.
19
+# Created 24 Apr 2008.  Last modified 8 April 2015.
20 20
 {
21 21
 #	Issue warning if extra arguments found
22 22
 	dots <- names(list(...))
... ...
@@ -483,6 +483,11 @@ mroast.default <- function(y,index=NULL,design=NULL,contrast=ncol(design),set.st
483 483
 		stringsAsFactors=FALSE
484 484
 	)
485 485
 
486
+	if(midp) {
487
+		tab$FDR <- pmax(tab$FDR, pv[,"UpOrDown"])
488
+		tab$FDR.Mixed <- pmax(tab$FDR.Mixed, pv[,"Mixed"])
489
+	}
490
+
486 491
 #	Sort by p-value
487 492
 	sort <- match.arg(sort,c("directional","mixed","none"))
488 493
 	if(sort=="none") return(tab)
... ...
@@ -1,3 +1,11 @@
1
+ 8 April 2015: limma 3.23.13
2
+
3
+- new function fry(), which provides a fast version of mroast()
4
+  when there is little heteroscedasticity between genes.
5
+ 
6
+- minor change to mroast() output so that FDR values are never
7
+  smaller than p-values when midp=TRUE.
8
+ 
1 9
  4 April 2015: limma 3.23.12
2 10
 
3 11
 - camera(), roast(), mroast() and romer() now give a warning if
... ...
@@ -5,19 +5,25 @@
5 5
 \alias{mroast.default}
6 6
 \alias{Roast-class}
7 7
 \alias{show,Roast-method}
8
+\alias{fry}
9
+\alias{fry.default}
8 10
 \title{Rotation Gene Set Tests}
9 11
 \description{
10 12
 Rotation gene set testing for linear models.
11 13
 }
12 14
 
13 15
 \usage{
14
-\S3method{roast}{default}(y, index=NULL, design=NULL, contrast=ncol(design), set.statistic="mean",
15
-     gene.weights=NULL, array.weights=NULL, weights=NULL, block=NULL, correlation,
16
-     var.prior=NULL, df.prior=NULL, trend.var=FALSE, nrot=999, approx.zscore=TRUE, \dots)
17
-\S3method{mroast}{default}(y, index=NULL, design=NULL, contrast=ncol(design), set.statistic="mean",
18
-     gene.weights=NULL, array.weights=NULL, weights=NULL, block=NULL, correlation,
19
-     var.prior=NULL, df.prior=NULL, trend.var=FALSE, nrot=999, approx.zscore=TRUE,
20
-     adjust.method="BH", midp=TRUE, sort="directional", \dots)
16
+\S3method{fry}{default}(y, index = NULL, design = NULL, contrast = ncol(design),
17
+      weights = NULL, sort = TRUE, \dots)
18
+\S3method{roast}{default}(y, index = NULL, design = NULL, contrast = ncol(design),
19
+      set.statistic = "mean", gene.weights = NULL, array.weights = NULL, weights = NULL,
20
+      block = NULL, correlation, var.prior = NULL, df.prior = NULL, trend.var = FALSE,
21
+      nrot = 999, approx.zscore = TRUE, \dots)
22
+\S3method{mroast}{default}(y, index = NULL, design = NULL, contrast = ncol(design),
23
+       set.statistic = "mean", gene.weights = NULL, array.weights = NULL, weights = NULL,
24
+       block = NULL, correlation, var.prior = NULL, df.prior = NULL, trend.var = FALSE,
25
+       nrot = 999, approx.zscore = TRUE, adjust.method = "BH", midp = TRUE,
26
+       sort = "directional", \dots)
21 27
 }
22 28
 
23 29
 \arguments{
... ...
@@ -40,7 +46,7 @@ Rotation gene set testing for linear models.
40 46
   \item{nrot}{number of rotations used to estimate the p-values.}
41 47
   \item{adjust.method}{method used to adjust the p-values for multiple testing. See \code{\link{p.adjust}} for possible values.}
42 48
   \item{midp}{logical, should mid-p-values be used in instead of ordinary p-values when adjusting for multiple testing?}
43
-  \item{sort}{character, whether to sort output table by directional p-value (\code{"directional"}), non-directional p-value (\code{"mixed"}), or not at all (\code{"none"}).}
49
+  \item{sort}{character, whether to sort output table by directional p-value (\code{"directional"}), non-directional p-value (\code{"mixed"}), or not at all (\code{"none"}).  For \code{fry} it is logical, whether to sort or not.}
44 50
   \item{approx.zscore}{logical, if \code{TRUE} then a fast approximation is used to convert t-statistics into z-scores prior to computing set statistics. If \code{FALSE}, z-scores will be exact.}
45 51
   \item{\dots}{other arguments not currently used.}
46 52
 }
... ...
@@ -103,6 +109,9 @@ This means that the smallest possible p-value is \code{1/(nrot+1)}.
103 109
 \code{mroast} does roast tests for multiple sets, including adjustment for multiple testing.
104 110
 By default, \code{mroast} reports ordinary p-values but uses mid-p-values (Routledge, 1994) at the multiple testing stage.
105 111
 Mid-p-values are probably a good choice when using false discovery rates (\code{adjust.method="BH"}) but not when controlling the family-wise type I error rate (\code{adjust.method="holm"}).
112
+
113
+\code{fry} is a fast version of \code{mroast} in the special case that \code{df.prior} is large and \code{set.statistic="mean"}.
114
+In this situation, it gives the same result as \code{mroast} with an infinite number of rotations.
106 115
 }
107 116
 
108 117
 \note{
... ...
@@ -1,5 +1,5 @@
1 1
 
2
-R version 3.2.0 alpha (2015-03-28 r68110)
2
+R version 3.2.0 beta (2015-04-02 r68137) -- "Full of Ingredients"
3 3
 Copyright (C) 2015 The R Foundation for Statistical Computing
4 4
 Platform: x86_64-w64-mingw32/x64 (64-bit)
5 5
 
... ...
@@ -1173,21 +1173,21 @@ Up                 1 0.0010005
1173 1173
 UpOrDown           1 0.0020000
1174 1174
 Mixed              1 0.0020000
1175 1175
 > mroast(y=y,list(set1=iset1,set2=iset2),design,contrast=2,gene.weights=runif(100))
1176
-     NGenes PropDown PropUp Direction PValue    FDR PValue.Mixed FDR.Mixed
1177
-set1      5        0      1        Up  0.008 0.0150        0.008    0.0150
1178
-set2      5        0      0        Up  0.959 0.9585        0.687    0.6865
1176
+     NGenes PropDown PropUp Direction PValue   FDR PValue.Mixed FDR.Mixed
1177
+set1      5        0      1        Up  0.008 0.015        0.008     0.015
1178
+set2      5        0      0        Up  0.959 0.959        0.687     0.687
1179 1179
 > mroast(y=y,list(set1=iset1,set2=iset2),design,contrast=2,array.weights=c(0.5,1,0.5,1))
1180
-     NGenes PropDown PropUp Direction PValue    FDR PValue.Mixed FDR.Mixed
1181
-set1      5        0      1        Up  0.004 0.0070        0.004    0.0070
1182
-set2      5        0      0        Up  0.679 0.6785        0.658    0.6575
1180
+     NGenes PropDown PropUp Direction PValue   FDR PValue.Mixed FDR.Mixed
1181
+set1      5        0      1        Up  0.004 0.007        0.004     0.007
1182
+set2      5        0      0        Up  0.679 0.679        0.658     0.658
1183 1183
 > mroast(y=y,list(set1=iset1,set2=iset2),design,contrast=2,weights=w)
1184
-     NGenes PropDown PropUp Direction PValue    FDR PValue.Mixed FDR.Mixed
1185
-set1      5      0.0      1        Up  0.003 0.0050        0.003    0.0050
1186
-set2      5      0.2      0      Down  0.950 0.9495        0.250    0.2495
1184
+     NGenes PropDown PropUp Direction PValue   FDR PValue.Mixed FDR.Mixed
1185
+set1      5      0.0      1        Up  0.003 0.005        0.003     0.005
1186
+set2      5      0.2      0      Down  0.950 0.950        0.250     0.250
1187 1187
 > mroast(y=y,list(set1=iset1,set2=iset2),design,contrast=2,weights=w,array.weights=c(0.5,1,0.5,1))
1188
-     NGenes PropDown PropUp Direction PValue    FDR PValue.Mixed FDR.Mixed
1189
-set1      5        0      1        Up  0.001 0.0010        0.001    0.0010
1190
-set2      5        0      0      Down  0.791 0.7905        0.146    0.1455
1188
+     NGenes PropDown PropUp Direction PValue   FDR PValue.Mixed FDR.Mixed
1189
+set1      5        0      1        Up  0.001 0.001        0.001     0.001
1190
+set2      5        0      0      Down  0.791 0.791        0.146     0.146
1191 1191
 > 
1192 1192
 > ### camera
1193 1193
 > 
... ...
@@ -1376,4 +1376,4 @@ GO:0070062 0.055161144
1376 1376
 > 
1377 1377
 > proc.time()
1378 1378
    user  system elapsed 
1379
-   2.94    0.23    3.16 
1379
+   3.13    0.24    4.36