Browse code

8 Aug 2014: limma 3.21.12

- The definition of the M and A axes for an MA-plot of single channel
data is changed slightly. Previously the A-axis was the average of
all arrays in the dataset -- this has been definition since MA-plots
were introduced for single channel data in April 2003. Now an
artificial array is formed by averaging all arrays other than the
one to be plotted. Then a mean-difference plot is formed from the
specified array and the artificial array. This change ensures the
specified and artificial arrays are computed from independent data,
and ensures the MA-plot will reduce to a correct mean-difference
plot when there are just two arrays in the dataset.


git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/limma@93246 bc3139a8-67e5-0310-9ffc-ced21a209358

Gordon Smyth authored on 08/08/2014 05:19:48
Showing 12 changed files

... ...
@@ -1,13 +1,13 @@
1 1
 Package: limma
2
-Version: 3.21.10
3
-Date: 2014/06/27
2
+Version: 3.21.12
3
+Date: 2014/08/08
4 4
 Title: Linear Models for Microarray Data
5 5
 Description: Data analysis, linear models and differential expression for microarray data.
6 6
 Author: Gordon Smyth [cre,aut], Matthew Ritchie [ctb], Jeremy Silver [ctb], James Wettenhall [ctb], Natalie Thorne [ctb], Davis McCarthy [ctb], Di Wu [ctb], Yifang Hu [ctb], Wei Shi [ctb], Belinda Phipson [ctb], Alicia Oshlack [ctb], Carolyn de Graaf [ctb], Mette Langaas [ctb], Egil Ferkingstad [ctb], Marcus Davy [ctb], Francois Pepin [ctb], Dongseok Choi [ctb]
7 7
 Maintainer: Gordon Smyth <[email protected]>
8 8
 License: GPL (>=2)
9 9
 Depends: R (>= 2.3.0), methods
10
-Suggests: statmod (>= 1.2.2), splines, locfit, MASS, ellipse, affy, vsn, AnnotationDbi, org.Hs.eg.db, GO.db, illuminaio
10
+Suggests: statmod (>= 1.2.2), splines, locfit, MASS, ellipse, affy, vsn, AnnotationDbi, org.Hs.eg.db, GO.db, illuminaio, BiasedUrn
11 11
 LazyLoad: yes
12 12
 URL: http://bioinf.wehi.edu.au/limma
13 13
 biocViews: ExonArray, GeneExpression, Transcription, AlternativeSplicing, DifferentialExpression, DifferentialSplicing, GeneSetEnrichment, DataImport, Genetics, Bayesian, Clustering, Regression, TimeCourse, Microarray, microRNAArray, mRNAMicroarray, OneChannel, ProprietaryPlatforms, TwoChannel, RNASeq, BatchEffect, MultipleComparison, Normalization, Preprocessing, QualityControl
... ...
@@ -50,6 +50,7 @@ S3method("dimnames<-",RGList)
50 50
 S3method("dimnames<-",EList)
51 51
 S3method("dimnames<-",EListRaw)
52 52
 S3method(fitted,MArrayLM)
53
+S3method(goana,MArrayLM)
53 54
 S3method(length,MAList)
54 55
 S3method(length,MArrayLM)
55 56
 S3method(length,RGList)
... ...
@@ -2,7 +2,7 @@ diffSplice <- function(fit,geneid,exonid=NULL,verbose=TRUE)
2 2
 #	Test for splicing variants between conditions
3 3
 #	using linear model fit of exon data.
4 4
 #	Charity Law and Gordon Smyth
5
-#	Created 13 Dec 2013.  Last modified 18 Feb 2014.
5
+#	Created 13 Dec 2013.  Last modified 7 Aug 2014.
6 6
 {
7 7
 	exon.genes <- fit$genes
8 8
 	if(is.null(exon.genes)) exon.genes <- data.frame(ExonID=1:nrow(fit))
... ...
@@ -106,78 +106,98 @@ diffSplice <- function(fit,geneid,exonid=NULL,verbose=TRUE)
106 106
 	out$gene.F.p.value <- gene.F.p.value
107 107
 
108 108
 #	Which columns of exon.genes contain gene level annotation?
109
-	exon.lastexon <- cumsum(gene.nexons)
110
-	exon.firstexon <- exon.lastexon-gene.nexons+1
109
+	gene.lastexon <- cumsum(gene.nexons)
110
+	gene.firstexon <- gene.lastexon-gene.nexons+1
111 111
 	no <- logical(nrow(exon.genes))
112
-	isdup <- vapply(exon.genes,duplicated,no)[-exon.firstexon,,drop=FALSE]
112
+	isdup <- vapply(exon.genes,duplicated,no)[-gene.firstexon,,drop=FALSE]
113 113
 	isgenelevel <- apply(isdup,2,all)
114
-	out$gene.genes <- exon.genes[exon.lastexon,isgenelevel, drop=FALSE]
114
+	out$gene.genes <- exon.genes[gene.lastexon,isgenelevel, drop=FALSE]
115 115
 	out$gene.genes$NExons <- gene.nexons
116
+	out$gene.firstexon <- gene.firstexon
117
+	out$gene.lastexon <- gene.lastexon
118
+
119
+#	Simes adjustment of exon level p-values
120
+	simes <- function(p,n) {
121
+		p <- p[-which.max(p)]
122
+		min(sort(p)*(n-1)/(1:(n-1)))
123
+	}
124
+	out$gene.simes.p.value <- out$gene.F.p.value
125
+	for (i in 1:ngenes) for (j in 1:ncol(fit)) {
126
+		out$gene.simes.p.value[i,j] <- simes(exon.p.value[gene.firstexon[i]:gene.lastexon[i],j],gene.nexons[i])
127
+	}
116 128
 
117 129
 	out
118 130
 }
119 131
 
120
-topSplice <- function(fit, coef=ncol(fit), level="exon", number=10, FDR=1)
121
-#	Collate voomex results in data.frame, ordered from most significant at top
132
+topSplice <- function(fit, coef=ncol(fit), level="hybrid", number=10, FDR=1)
133
+#	Collate diffSplice results into data.frame, ordered from most significant at top
122 134
 #	Gordon Smyth
123
-#	Created 18 Dec 2013.  Last modified 17 March 2014.
135
+#	Created 18 Dec 2013.  Last modified 7 Aug 2014.
124 136
 {
125 137
 	coef <- coef[1]
126
-	exon <- match.arg(level,c("exon","gene"))
127
-	if(level=="exon") {
138
+	level <- match.arg(level,c("hybrid","exon","gene"))
139
+	switch(level,
140
+	"exon" = {
128 141
 		number <- min(number,nrow(fit$coefficients))
129 142
 		P <- fit$p.value[,coef]
130 143
 		BH <- p.adjust(P, method="BH")
131 144
 		if(FDR<1) number <- min(number,sum(BH<FDR))
132 145
 		o <- order(P)[1:number]
133 146
 		data.frame(fit$genes[o,,drop=FALSE],logFC=fit$coefficients[o,coef],t=fit$t[o,coef],P.Value=P[o],FDR=BH[o])
134
-	} else {
147
+	},
148
+	gene = {
135 149
 		number <- min(number,nrow(fit$gene.F))
136 150
 		P <- fit$gene.F.p.value[,coef]
137 151
 		BH <- p.adjust(P, method="BH")
138 152
 		if(FDR<1) number <- min(number,sum(BH<FDR))
139 153
 		o <- order(P)[1:number]
140 154
 		data.frame(fit$gene.genes[o,,drop=FALSE],F=fit$gene.F[o,coef],P.Value=P[o],FDR=BH[o])
155
+	},
156
+	hybrid = {
157
+		number <- min(number,nrow(fit$gene.F))
158
+		P <- fit$gene.simes.p.value[,coef]
159
+		BH <- p.adjust(P, method="BH")
160
+		if(FDR<1) number <- min(number,sum(BH<FDR))
161
+		o <- order(P)[1:number]
162
+		data.frame(fit$gene.genes[o,,drop=FALSE],P.Value=P[o],FDR=BH[o])
141 163
 	}
164
+	)
142 165
 }
143 166
 
144
-plotSplice <- function(fit, coef=ncol(fit), geneid=NULL, rank=1L, FDR = 0.05)
145
-#	Plot exons of most differentially spliced gene
167
+plotSplice <- function(fit, coef=ncol(fit), geneid=NULL, genecolname=NULL, rank=1L, FDR = 0.05)
168
+#	Plot exons of chosen gene
169
+#	fit is output from diffSplice
146 170
 #	Gordon Smyth and Yifang Hu
147 171
 #	Created 3 Jan 2014.  Last modified 19 March 2014.
148 172
 {
149
-	# Gene labelling including gene symbol
150
-	genecolname <- fit$genecolname
151
-	genelab <- grep(paste0(genecolname,"|Symbol|symbol"), colnames(fit$gene.genes), value = T)
152
-	
173
+	if(is.null(genecolname)) 
174
+		genecolname <- fit$genecolname
175
+	else
176
+		genecolname <- as.character(genecolname)
177
+
153 178
 	if(is.null(geneid)) {
179
+#		Find gene from specified rank 
154 180
 		if(rank==1L)
155 181
 			i <- which.min(fit$gene.F.p.value[,coef])
156 182
 		else
157 183
 			i <- order(fit$gene.F.p.value[,coef])[rank]
158
-
159
-		geneid <- paste(fit$gene.genes[i,genelab], collapse = ".")
160
-
184
+		geneid <- paste(fit$gene.genes[i,genecolname], collapse = ".")
161 185
 	} else {
162
-		i <- which(fit$gene.genes[,fit$genecolname]==geneid)
163
-		
164
-		geneid <- paste(fit$gene.genes[i,genelab], collapse = ".")
165
-
186
+#		Find gene from specified name
187
+		geneid <- as.character(geneid)
188
+		i <- which(fit$gene.genes[,genecolname]==geneid)[1]
166 189
 		if(!length(i)) stop(paste("geneid",geneid,"not found"))
167 190
 	}
168
-	exon.lastexon <- cumsum(fit$gene.genes$NExons[1:i])
169
-	j <- (exon.lastexon[i-1]+1):exon.lastexon[i]
170 191
 
171
-	exoncolname <- fit$exoncolname
192
+#	Row numbers containing exons
193
+	j <- fit$gene.firstexon[i]:fit$gene.lastexon[i]
172 194
 
173
-	if(is.null(exoncolname)){
195
+	exoncolname <- fit$exoncolname
196
+	if(is.null(exoncolname)) {
174 197
 
175 198
 		plot(fit$coefficients[j,coef], xlab = "Exon", ylab = "logFC (this exon vs rest)", main = geneid, type = "b")
176 199
 
177
-	}
178
-
179
-	# Plot exons and mark exon ids on the axis
180
-	if(!is.null(exoncolname)) {
200
+	} else {
181 201
 
182 202
 		exon.id <- fit$genes[j, exoncolname]
183 203
 		xlab <- paste("Exon", exoncolname, sep = " ")
... ...
@@ -186,25 +206,24 @@ plotSplice <- function(fit, coef=ncol(fit), geneid=NULL, rank=1L, FDR = 0.05)
186 206
 		axis(1, at = 1:length(j), labels = exon.id, las = 2, cex.axis = 0.6)
187 207
 		mtext(xlab, side = 1, padj = 5.2)
188 208
 
189
-		# Mark the topSpliced exons
209
+#		Mark the topSpliced exons
190 210
 		top <- topSplice(fit, coef = coef, number = Inf, level = "exon", FDR = FDR)
191 211
 		m <- which(top[,genecolname] %in% fit$gene.genes[i,genecolname])
192
-
193 212
 		if(length(m) > 0){
194
-
195
-			if(length(m) == 1) cex <- 1.5 else{
196
-
213
+			if(length(m) == 1)
214
+				cex <- 1.5
215
+			else {
197 216
 				abs.fdr <- abs(log10(top$FDR[m]))
198 217
 				from <- range(abs.fdr)
199 218
 				to <- c(1,2)
200 219
 				cex <- (abs.fdr - from[1])/diff(from) * diff(to) + to[1]
201
-
202 220
 			}	
203 221
 			mark <- match(top[m, exoncolname], exon.id)
204 222
 			points((1:length(j))[mark], fit$coefficients[j[mark], coef], col = "red", pch = 16, cex = cex)
205 223
 		}
224
+
206 225
 	}
207 226
 
208 227
 	abline(h=0,lty=2)
209
-
210
-}
211 228
\ No newline at end of file
229
+	invisible()
230
+}
... ...
@@ -1,30 +1,81 @@
1
-goana <- function(fit, coef = ncol(fit), geneid = "GeneID", FDR = 0.05, species = "Hs"){
1
+goana <- function(de,...) UseMethod("goana")
2
+
3
+goana.MArrayLM <- function(de, coef = ncol(de), geneid = rownames(de), FDR = 0.05, species = "Hs", trend = FALSE, ...)
2 4
 #  Gene ontology analysis of DE genes from linear model fit
3 5
 #  Gordon Smyth and Yifang Hu
4
-#  Created 20 June 2014. Last modified 24 June 2014.
5
-
6
-	# Check input
7
-	if(!is(fit, "MArrayLM")) stop("fit must be an MArrayLM object.")
8
-	if(is.null(fit$p.value)) stop("p value not found in fit object (from eBayes).")
9
-	if(is.null(fit$coefficients)) stop("coefficient not found in fit object.")
10
-
11
-	if(length(geneid) == nrow(fit)){
6
+#  Created 20 June 2014.  Last modified 23 July 2014.
7
+{
8
+	# Check fit
9
+	if(is.null(de$p.value)) stop("p value not found in fit object (from eBayes).")	
10
+	if(is.null(de$coefficients)) stop("coefficient not found in fit object.")
11
+	if(length(coef) != 1) stop("coef length needs to be 1.")
12
+	ngenes <- nrow(de)
13
+
14
+	# Check geneid
15
+	# Can be either a vector of IDs or a column name
16
+	geneid <- as.character(geneid)
17
+	if(length(geneid) == ngenes) {
18
+		universe <- geneid
19
+	} else
20
+		if(length(geneid) == 1L) {
21
+			universe <- de$genes[[geneid]]
22
+			if(is.null(universe)) stop(paste("Column",geneid,"not found in de$genes"))
23
+		} else
24
+			stop("geneid has incorrect length")
25
+
26
+	# Check trend
27
+	# Can be logical, or a numeric vector of covariate values, or the name of the column containing the covariate values
28
+	if(is.logical(trend)) {
29
+		if(trend) {
30
+			covariate <- de$Amean
31
+			if(is.null(covariate)) stop("Amean not found in fit")
32
+		}
33
+	} else
34
+		if(is.numeric(trend)) {
35
+			if(length(trend) != ngenes) stop("If trend is numeric, then length must equal nrow(de)")
36
+			covariate <- trend
37
+			trend <- TRUE
38
+		} else {
39
+			if(is.character(trend)) {
40
+				if(length(trend) != 1L) stop("If trend is character, then length must be 1")
41
+				covariate <- de$genes[[trend]]
42
+				if(is.null(covariate)) stop(paste("Column",trend,"not found in de$genes"))
43
+				trend <- TRUE
44
+			} else
45
+				stop("trend is neither logical, numeric nor character")
46
+		}
47
+
48
+	# Check FDR
49
+	if(!is.numeric(FDR) | length(FDR) != 1) stop("FDR must be numeric and of length 1.")
50
+	if(FDR < 0 | FDR > 1) stop("FDR should be between 0 and 1.")
12 51
 
13
-		fit$genes$GeneID <- geneid
14
-		EG.col <- "GeneID"
52
+	# Get up and down DE genes
53
+	fdr.coef <- p.adjust(de$p.value[,coef], method = "BH")
54
+	EG.DE.UP <- universe[fdr.coef < FDR & de$coef[,coef] > 0]
55
+	EG.DE.DN <- universe[fdr.coef < FDR & de$coef[,coef] < 0]
56
+	de.gene <- list(Up=EG.DE.UP, Down=EG.DE.DN)
57
+
58
+	# Fit monotonic cubic spline for DE genes vs. gene.weights
59
+	if(trend) {
60
+			PW <- isDE <- rep(0,nrow(de))
61
+			isDE[fdr.coef < FDR] <- 1
62
+			o <- order(covariate)
63
+			PW[o] <- tricubeMovingAverage(isDE[o],span=0.5,full.length=TRUE)
15 64
 	}
65
+	if(!trend) PW <- NULL
16 66
 
17
-	else if(length(geneid) == 1) EG.col <- as.character(geneid)
18
-
19
-	if(is.null(fit$genes)) stop("no annotation (genes) found.")
20
-
21
-	EG.All <- as.character(fit$genes[[EG.col]])
22
-
23
-	# Get up and down DE genes
24
-	fdr.coef <- p.adjust(fit$p.value[,coef], method = "BH")
67
+	NextMethod(de = de.gene, universe = universe, species = species, weights = PW, ...)
68
+}
25 69
 
26
-	EG.DE.UP <- EG.All[fdr.coef < FDR & fit$coef[,coef] > 0]
27
-	EG.DE.DN <- EG.All[fdr.coef < FDR & fit$coef[,coef] < 0]
70
+goana.default <- function(de, universe = NULL, species = "Hs", weights = NULL, ...)
71
+#  Gene ontology analysis of DE genes
72
+#  Gordon Smyth and Yifang Hu
73
+#  Created 20 June 2014.  Last modified 23 July 2014.
74
+{
75
+	# check de
76
+	if(!is.list(de)) de <- list(DE1 = de)
77
+	if(is.null(names(de))) names(de) <- paste0("DE", 1:length(de))
78
+	de <- lapply(de, as.character)
28 79
 
29 80
 	# Select species
30 81
 	species <- match.arg(species, c("Hs", "Mm", "Rn", "Dm"))
... ...
@@ -42,46 +93,102 @@ goana <- function(fit, coef = ncol(fit), geneid = "GeneID", FDR = 0.05, species
42 93
 	d <- duplicated(EG.GO[,c("gene_id", "go_id", "Ontology")])
43 94
 	EG.GO <- EG.GO[!d, ]
44 95
 
96
+	# Check universe
97
+	if(is.null(universe)) universe <- EG.GO$gene_id
98
+	universe <- as.character(universe)
99
+
100
+	# Check weights
101
+	if(!is.null(weights)){
102
+		if(length(weights)!=length(universe)) stop("length(weights) must equal length(universe).")
103
+	}
104
+
45 105
 	# Reduce to universe
46
-	EG.GO <- EG.GO[EG.GO$gene_id %in% EG.All, ]
106
+	EG.GO <- EG.GO[EG.GO$gene_id %in% universe, ]
107
+	if(!length(EG.GO$gene_id)) stop("Universe is empty.")
108
+
47 109
 	Total <- length(unique(EG.GO$gene_id))
48 110
 
49 111
 	# Overlap with DE genes
50
-	isDE.UP <- (EG.GO$gene_id %in% EG.DE.UP)
51
-	isDE.DN <- (EG.GO$gene_id %in% EG.DE.DN)
52
-	TotalDE.UP <- length(unique(EG.GO$gene_id[isDE.UP]))
53
-	TotalDE.DN <- length(unique(EG.GO$gene_id[isDE.DN]))
112
+	isDE <- lapply(de, function(x) EG.GO$gene_id %in% x)
113
+	TotalDE <- lapply(isDE, function(x) length(unique(EG.GO$gene_id[x])))
114
+	nDE <- length(isDE)
115
+
116
+	if(length(weights)) {
117
+		# Probability weight for each gene
118
+		m <- match(EG.GO$gene_id, universe)
119
+		PW2 <- list(weights[m])
120
+		X <- do.call(cbind, c(N=1, isDE, PW=PW2))
121
+	} else
122
+		X <- do.call(cbind, c(N=1, isDE))
54 123
 
55
-	X <- cbind(N=1, Up=isDE.UP, Down=isDE.DN)
56 124
 	group <- paste(EG.GO$go_id, EG.GO$Ontology, sep=".")
57 125
 	S <- rowsum(X, group=group, reorder=FALSE)
58 126
 
59
-	# Fisher's exact test
60
-	p.UP <- phyper(q=S[,"Up"]-0.5,m=TotalDE.UP,n=Total-TotalDE.UP,k=S[,"N"],lower.tail=FALSE)
61
-	p.DN <- phyper(q=S[,"Down"]-0.5,m=TotalDE.DN,n=Total-TotalDE.DN,k=S[,"N"],lower.tail=FALSE)
127
+	P <- matrix(0, nrow = nrow(S), ncol = nDE)
128
+
129
+	if(length(weights)) {
130
+
131
+		# Calculate weight
132
+		require("BiasedUrn", character.only = TRUE)
133
+		PW.ALL <- sum(weights[universe %in% EG.GO$gene_id])
134
+		AVE.PW <- S[,"PW"]/S[,"N"]
135
+		W <- AVE.PW*(Total-S[,"N"])/(PW.ALL-S[,"N"]*AVE.PW)
136
+
137
+		# Wallenius' noncentral hypergeometric test
138
+		for(j in 1:nDE){
139
+
140
+			for(i in 1:nrow(S)){
141
+
142
+				P[i,j] <- pWNCHypergeo(S[i,1+j], S[i,"N"], Total-S[i,"N"], TotalDE[[j]], W[i],lower.tail=FALSE)+
143
+					dWNCHypergeo(S[i,1+j], S[i,"N"], Total-S[i,"N"], TotalDE[[j]], W[i])
144
+			}
145
+
146
+		}
147
+
148
+		S <- S[,-ncol(S)]
149
+
150
+	} else {
151
+
152
+		# Fisher's exact test
153
+		for(j in 1:nDE){
154
+
155
+			P[,j] <- phyper(q=S[,1+j]-0.5,m=TotalDE[[j]],n=Total-TotalDE[[j]], k=S[,"N"],lower.tail=FALSE)
156
+		}
157
+
158
+	}
62 159
 
63 160
 	# Assemble output
64 161
 	g <- strsplit2(rownames(S),split="\\.")
65 162
 	TERM <- select(GO.db,keys=g[,1],columns="TERM")
66
-	Results <- data.frame(Term=TERM[[2]],Ont=g[,2],S,P.Up=p.UP,P.Down=p.DN,stringsAsFactors=FALSE)
163
+	Results <- data.frame(Term = TERM[[2]], Ont = g[,2], S, P, stringsAsFactors=FALSE)
67 164
 	rownames(Results) <- g[,1]
68 165
 
166
+	# Name P value for the DE genes
167
+	iTON <- c(1:3)
168
+	iDE <- 3+c(1:nDE)
169
+	PDE<- paste0("P.", colnames(Results)[iDE])
170
+	colnames(Results)[-c(iTON,iDE)] <- PDE
171
+
69 172
 	Results
70 173
 }
71 174
 
72 175
 topGO <- function(results, ontology = c("BP", "CC", "MF"), sort = "up", number = 20L){
73 176
 #  Extract sorted goana gene ontology test results 
74 177
 #  Gordon Smyth and Yifang Hu
75
-#  Created 20 June 2014. Last modified 24 June 2014.
76
-	
77
-	# Check input
78
-	if(any(! c("Term", "N", "Up", "Down", "Ont", "P.Up", "P.Down") %in% colnames(results))) stop("Results column names don't match GOTest results.")
79
-	
178
+#  Created 20 June 2014. Last modified 21 July 2014.
179
+
180
+	# Check results
181
+	if(!is.data.frame(results)) stop("Expect a dataframe with goana results.")
182
+
183
+	# Check ontology
80 184
 	ontology <- match.arg(ontology, c("BP", "CC", "MF"), several.ok = TRUE)
81
-	
82
-	sort <- match.arg(tolower(sort), c("up", "down"))
83
-	sort <- switch(sort, up = "P.Up", down = "P.Down")
84
-	
185
+
186
+	# Check sort and sort by P value
187
+	if(length(sort) != 1) stop("sort length needs to be 1.")
188
+	sort <- tolower(colnames(results)) %in% tolower(paste0("P.", sort))
189
+	if(sum(sort)!=1) stop("sort not found.")
190
+
191
+	# Check number
85 192
 	if(!is.numeric(number)) stop("Need to input number.")
86 193
 	if(number < 1L) return(data.frame())
87 194
 
... ...
@@ -89,10 +196,9 @@ topGO <- function(results, ontology = c("BP", "CC", "MF"), sort = "up", number =
89 196
 	sel <- results$Ont %in% ontology
90 197
 	results <- results[sel,]
91 198
 
92
-	# Sort by Up or Down p value
199
+	# Sort by p value
93 200
 	o <- order(results[, sort], rownames(results))
94 201
 	results <- results[o,]
95 202
 
96 203
 	if(number >= nrow(results)) results else results[1:number,]
97
-}
98
-
204
+}
99 205
\ No newline at end of file
... ...
@@ -3,7 +3,7 @@
3 3
 
4 4
 plotMA <- function(MA,...) UseMethod("plotMA")
5 5
 
6
-plotMA.RGList <- function(MA, array=1, xlab="A", ylab="M", main=colnames(MA)[array], xlim=NULL, ylim=NULL, status, values, pch, col, cex, legend=TRUE, zero.weights=FALSE, ...)
6
+plotMA.RGList <- function(MA, array=1, xlab="A", ylab="M", main=colnames(MA)[array], xlim=NULL, ylim=NULL, status=NULL, values=NULL, pch=NULL, col=NULL, cex=NULL, legend=TRUE, zero.weights=FALSE, ...)
7 7
 #	MA-plot with color coding for controls
8 8
 #	Gordon Smyth 7 April 2003, James Wettenhall 27 June 2003.
9 9
 #	Last modified 23 April 2013.
... ...
@@ -12,7 +12,7 @@ plotMA.RGList <- function(MA, array=1, xlab="A", ylab="M", main=colnames(MA)[arr
12 12
 	plotMA(MA,array=1,xlab=xlab,ylab=ylab,main=main,xlim=xlim,ylim=ylim,status=status,values=values,pch=pch,col=col,cex=cex,legend=legend,zero.weights=zero.weights,...)
13 13
 }
14 14
 
15
-plotMA.MAList <- function(MA, array=1, xlab="A", ylab="M", main=colnames(MA)[array], xlim=NULL, ylim=NULL, status, values, pch, col, cex, legend=TRUE, zero.weights=FALSE, ...)
15
+plotMA.MAList <- function(MA, array=1, xlab="A", ylab="M", main=colnames(MA)[array], xlim=NULL, ylim=NULL, status=NULL, values=NULL, pch=NULL, col=NULL, cex=NULL, legend=TRUE, zero.weights=FALSE, ...)
16 16
 #	MA-plot with color coding for controls
17 17
 #	Gordon Smyth 7 April 2003, James Wettenhall 27 June 2003.
18 18
 #	Last modified 23 April 2013.
... ...
@@ -20,7 +20,7 @@ plotMA.MAList <- function(MA, array=1, xlab="A", ylab="M", main=colnames(MA)[arr
20 20
 	x <- as.matrix(MA$A)[,array]
21 21
 	y <- as.matrix(MA$M)[,array]
22 22
 	if(is.null(MA$weights)) w <- NULL else w <- as.matrix(MA$weights)[,array]
23
-	if(missing(status)) status <- MA$genes$Status
23
+	if(is.null(status)) status <- MA$genes$Status
24 24
 	if(!is.null(w) && !zero.weights) {
25 25
 		i <- is.na(w) | (w <= 0)
26 26
 		y[i] <- NA
... ...
@@ -28,7 +28,7 @@ plotMA.MAList <- function(MA, array=1, xlab="A", ylab="M", main=colnames(MA)[arr
28 28
 	.plotMAxy(x,y,xlab=xlab,ylab=ylab,main=main,xlim=xlim,ylim=ylim,status=status,values=values,pch=pch,col=col,cex=cex,legend=legend, ...)
29 29
 }
30 30
 
31
-plotMA.MArrayLM <- function(MA, coef=ncol(MA), xlab="AveExpr", ylab="logFC", main=colnames(MA)[coef], xlim=NULL, ylim=NULL, status, values, pch, col, cex, legend=TRUE, zero.weights=FALSE, ...)
31
+plotMA.MArrayLM <- function(MA, coef=ncol(MA), xlab="AveExpr", ylab="logFC", main=colnames(MA)[coef], xlim=NULL, ylim=NULL, status=NULL, values=NULL, pch=NULL, col=NULL, cex=NULL, legend=TRUE, zero.weights=FALSE, ...)
32 32
 #	MA-plot with color coding for controls
33 33
 #	Gordon Smyth 7 April 2003, James Wettenhall 27 June 2003.
34 34
 #	Last modified 21 March 2014.
... ...
@@ -37,7 +37,7 @@ plotMA.MArrayLM <- function(MA, coef=ncol(MA), xlab="AveExpr", ylab="logFC", mai
37 37
 	x <- MA$Amean
38 38
 	y <- as.matrix(MA$coef)[,coef]
39 39
 	if(is.null(MA$weights)) w <- NULL else w <- as.matrix(MA$weights)[,coef]
40
-	if(missing(status)) status <- MA$genes$Status
40
+	if(is.null(status)) status <- MA$genes$Status
41 41
 	if(!is.null(w) && !zero.weights) {
42 42
 		i <- is.na(w) | (w <= 0)
43 43
 		y[i] <- NA
... ...
@@ -45,7 +45,7 @@ plotMA.MArrayLM <- function(MA, coef=ncol(MA), xlab="AveExpr", ylab="logFC", mai
45 45
 	.plotMAxy(x,y,xlab=xlab,ylab=ylab,main=main,xlim=xlim,ylim=ylim,status=status,values=values,pch=pch,col=col,cex=cex,legend=legend, ...)
46 46
 }
47 47
 
48
-plotMA.EList <- function(MA, array=1, xlab="A", ylab="M", main=colnames(MA)[array], xlim=NULL, ylim=NULL, status, values, pch, col, cex, legend=TRUE, zero.weights=FALSE, ...)
48
+plotMA.EList <- function(MA, array=1, xlab="A", ylab="M", main=colnames(MA)[array], xlim=NULL, ylim=NULL, status=NULL, values=NULL, pch=NULL, col=NULL, cex=NULL, legend=TRUE, zero.weights=FALSE, ...)
49 49
 #	MA-plot with color coding for controls
50 50
 #	Gordon Smyth 7 April 2003, James Wettenhall 27 June 2003.
51 51
 #	Last modified 23 April 2013.
... ...
@@ -59,7 +59,7 @@ plotMA.EList <- function(MA, array=1, xlab="A", ylab="M", main=colnames(MA)[arra
59 59
 		x <- rowMeans(MA$E,na.rm=TRUE)
60 60
 	y <- MA$E[,array]-x
61 61
 	if(is.null(MA$weights)) w <- NULL else w <- as.matrix(MA$weights)[,array]
62
-	if(missing(status)) status <- MA$genes$Status
62
+	if(is.null(status)) status <- MA$genes$Status
63 63
 
64 64
 	if(!is.null(w) && !zero.weights) {
65 65
 		i <- is.na(w) | (w <= 0)
... ...
@@ -68,34 +68,29 @@ plotMA.EList <- function(MA, array=1, xlab="A", ylab="M", main=colnames(MA)[arra
68 68
 	.plotMAxy(x,y,xlab=xlab,ylab=ylab,main=main,xlim=xlim,ylim=ylim,status=status,values=values,pch=pch,col=col,cex=cex,legend=legend, ...)
69 69
 }
70 70
 
71
-plotMA.default <- function(MA, array=1, xlab="A", ylab="M", main=colnames(MA)[array], xlim=NULL, ylim=NULL, status, values, pch, col, cex, legend=TRUE, zero.weights=FALSE, ...)
71
+plotMA.default <- function(MA, array=1, xlab="A", ylab="M", main=colnames(MA)[array], xlim=NULL, ylim=NULL, status=NULL, values=NULL, pch=NULL, col=NULL, cex=NULL, legend=TRUE, ...)
72 72
 #	MA-plot with color coding for controls
73 73
 #	Gordon Smyth 7 April 2003, James Wettenhall 27 June 2003.
74
-#	Last modified 23 April 2013.
74
+#	Last modified 8 August 2014.
75 75
 {
76 76
 #	Data is assumed to be single-channel
77 77
 	MA <- as.matrix(MA)
78 78
 	narrays <- ncol(MA)
79
-	if(narrays < 2) stop("Need at least two arrays")
80
-	if(narrays > 5)
81
-		x <- apply(MA,1,median,na.rm=TRUE)
82
-	else
83
-		x <- rowMeans(MA,na.rm=TRUE)
84
-	y <- MA[,array]-x
85
-	w <- NULL
86
-	if(missing(status)) status <- NULL
79
+	if(narrays<2) stop("Need at least two columns")
80
+	array <- as.integer(array[1L])
81
+	Ave <- rowMeans(MA[,-array,drop=FALSE],na.rm=TRUE)
82
+	y <- MA[,array]-Ave
83
+	x <- (MA[,array]+Ave)/2
87 84
 
88
-	if(!is.null(w) && !zero.weights) {
89
-		i <- is.na(w) | (w <= 0)
90
-		y[i] <- NA
91
-	}
92 85
 	.plotMAxy(x,y,xlab=xlab,ylab=ylab,main=main,xlim=xlim,ylim=ylim,status=status,values=values,pch=pch,col=col,cex=cex,legend=legend, ...)
93 86
 }
94 87
 
95
-.plotMAxy <- function(x, y, xlab="A", ylab="M", main=NULL, xlim=NULL, ylim=NULL, status, values, pch, col, cex, legend=TRUE, ...)
88
+# Call this plotWithHighlights and document?
89
+
90
+.plotMAxy <- function(x, y, xlab="A", ylab="M", main=NULL, xlim=NULL, ylim=NULL, status=NULL, values=NULL, pch=NULL, col=NULL, cex=NULL, legend=TRUE, pch0=16, col0="black", cex0=0.3, ...)
96 91
 #	MA-plot with color coding for controls
97 92
 #	Gordon Smyth 7 April 2003, James Wettenhall 27 June 2003.
98
-#	Last modified 23 April 2013.
93
+#	Last modified 13 April 2014.
99 94
 {
100 95
 #	Check legend
101 96
 	legend.position <- "topleft"
... ...
@@ -114,65 +109,69 @@ plotMA.default <- function(MA, array=1, xlab="A", ylab="M", main=colnames(MA)[ar
114 109
 
115 110
 #	If no status information, just plot points normally
116 111
 	if(is.null(status) || all(is.na(status))) {
117
-		if(missing(pch)) pch <- 16
118
-		if(missing(cex)) cex <- 0.3
119
-		points(x,y,pch=pch[[1]],cex=cex[1])
112
+		points(x,y,pch=pch0,cex=cex0)
120 113
 		return(invisible())
121 114
 	}
122 115
 
123
-#	From here, status is not NULL and not all missing
116
+#	From here, status is not NULL and not all NA
124 117
 
125 118
 #	Check values
126
-	if(missing(values)) {
127
-		if(is.null(attr(status,"values")))
128
-			values <- names(sort(table(status),decreasing=TRUE))
129
-		else
130
-			values <- attr(status,"values")
119
+#	Default is to set the most frequent status value as background, and to highlight all other status values in order of frequency
120
+	if(is.null(values)) values <- attr(status,"values")
121
+	if(is.null(values)) {
122
+		status.values <- names(sort(table(status),decreasing=TRUE))
123
+		status <- as.character(status)
124
+		values <- status.values[-1]
131 125
 	}
132 126
 	nvalues <- length(values)
127
+	if(nvalues==0L) {
128
+		points(x,y,pch=pch0,cex=cex0)
129
+		return(invisible())
130
+	}
131
+
132
+#	From here, values has positive length
133 133
 
134 134
 #	Plot non-highlighted points
135
-	sel <- !(status %in% values)
136
-	nonhi <- any(sel)
137
-	if(nonhi) points(x[sel],y[sel],pch=16,cex=0.3)
135
+	bg <- !(status %in% values)
136
+	nonhi <- any(bg)
137
+	if(nonhi) points(x[bg],y[bg],pch=pch0,cex=cex0)
138 138
 
139
-	if(missing(pch)) {
140
-		if(is.null(attr(status,"pch")))
141
-			pch <- rep(16,nvalues)
142
-		else
143
-			pch <- attr(status,"pch")
144
-	}
139
+#	Check parameters for plotting highlighted points
145 140
 
146
-	if(missing(cex)) {
147
-		if(is.null(attr(status,"cex"))) {
148
-			cex <- rep(1,nvalues)
149
-			if(!nonhi) cex[1] <- 0.3
150
-		} else
151
-			cex <- attr(status,"cex")
152
-	}
141
+	if(is.null(pch)) pch <- attr(status,"pch")
142
+	if(is.null(pch)) pch <- pch0
143
+	pch <- rep(pch,length=nvalues)
153 144
 
154
-	if(missing(col)) {
155
-		if(is.null(attr(status,"col"))) {
156
-			col <- nonhi + 1:nvalues
157
-		} else
158
-			col <- attr(status,"col")
159
-	}
145
+	if(is.null(cex)) cex <- attr(status,"cex")
146
+	if(is.null(cex)) cex <- 1
147
+	cex <- rep(cex,length=nvalues)
160 148
 
161
-	pch <- rep(pch,length=nvalues)
149
+	if(is.null(col)) col <- attr(status,"col")
150
+	if(is.null(col)) col <- nonhi + 1L:nvalues
162 151
 	col <- rep(col,length=nvalues)
163
-	cex <- rep(cex,length=nvalues)
164 152
 
165
-#	Plot highlighted classes of points
153
+#	Plot highlighted points
166 154
 	for (i in 1:nvalues) {
167 155
 		sel <- status==values[i]
168 156
 		points(x[sel],y[sel],pch=pch[[i]],cex=cex[i],col=col[i])
169 157
 	}
170 158
 
171 159
 	if(legend) {
160
+		if(nonhi) {
161
+#			Include background value in legend
162
+			bg.value <- unique(status[bg])
163
+			if(length(bg.value) > 1) bg.value <- "Other"
164
+			values <- c(bg.value,values)
165
+			pch <- c(pch0,pch)
166
+			col <- c(col0,col)
167
+			cex <- c(cex0,cex)
168
+		}
169
+		h <- cex>0.5
170
+		cex[h] <- 0.5+0.8*(cex[h]-0.5)
172 171
 		if(is.list(pch))
173
-			legend(legend.position,legend=values,fill=col,col=col,cex=0.9)
172
+			legend(legend.position,legend=values,fill=col,col=col,cex=0.9,pt.cex=cex)
174 173
 		else
175
-			legend(legend.position,legend=values,pch=pch,,col=col,cex=0.9)
174
+			legend(legend.position,legend=values,pch=pch,,col=col,cex=0.9,pt.cex=cex)
176 175
 	}
177 176
 	invisible()
178 177
 }
... ...
@@ -3,7 +3,7 @@
3 3
 read.ilmn <- function(files=NULL, ctrlfiles=NULL, path=NULL, ctrlpath=NULL, probeid="Probe", annotation=c("TargetID", "SYMBOL"), expr="AVG_Signal", other.columns="Detection",sep="\t", quote="\"", verbose=TRUE, ...)
4 4
 #	Read one or more files of Illumina BeadStudio output
5 5
 #	Wei Shi and Gordon Smyth.
6
-#	Created 15 July 2009. Last modified 27 November 2013.
6
+#	Created 15 July 2009. Last modified 21 July 2014.
7 7
 {
8 8
 	if(!is.null(files)){
9 9
 		f <- unique(files)
... ...
@@ -34,12 +34,22 @@ read.ilmn <- function(files=NULL, ctrlfiles=NULL, path=NULL, ctrlpath=NULL, prob
34 34
 			else
35 35
 				elist.ctrl <- cbind(elist.ctrl, elist.ctrl1)
36 36
 		}
37
-		elist.ctrl$genes$Status <- elist.ctrl$genes[,ncol(elist.ctrl$genes)]
37
+		
38
+		if(is.null(elist.ctrl$genes)) elist.ctrl$genes <- data.frame(Status = rep("negative", nrow(elist.ctrl)))
39
+		else {
40
+			ctrl.status.negative <- apply(elist.ctrl$genes, 2, function(x) sum(tolower(x) %in% "negative"))
41
+			STATUS.col <- which.max(ctrl.status.negative)
42
+			if(ctrl.status.negative[STATUS.col] > 0) elist.ctrl$genes$Status <- elist.ctrl$genes[[STATUS.col]]
43
+			else elist.ctrl$genes$Status <- "negative"
44
+		}
38 45
 	}
39 46
 	
40 47
 	if(!is.null(files))
41 48
 		if(!is.null(ctrlfiles)){
42
-			colnames(elist.ctrl$genes) <- colnames(elist$genes)
49
+			REG.col <- setdiff(colnames(elist$genes), colnames(elist.ctrl$genes))
50
+			if(length(REG.col)) for(i in REG.col) elist.ctrl$genes[[i]] <- NA
51
+			CTRL.col <- setdiff(colnames(elist.ctrl$genes), colnames(elist$genes))
52
+			if(length(CTRL.col)) for(i in CTRL.col) elist$genes[[i]] <- NA
43 53
 			return(rbind(elist, elist.ctrl))
44 54
 		}
45 55
 		else
... ...
@@ -64,7 +74,7 @@ read.ilmn.targets <- function(targets, ...)
64 74
 .read.oneilmnfile <- function(fname, probeid, annotation, expr, other.columns, sep, quote, verbose, ...)
65 75
 #	Read a single file of Illumina BeadStudio output
66 76
 #	Wei Shi and Gordon Smyth
67
-#	Created 15 July 2009. Last modified 16 June 2014.
77
+#	Created 15 July 2009. Last modified 21 July 2014.
68 78
 {
69 79
 	h <- readGenericHeader(fname,columns=expr,sep=sep)
70 80
 	skip <- h$NHeaderRecords
... ...
@@ -101,7 +111,7 @@ read.ilmn.targets <- function(targets, ...)
101 111
 #	Add probe annotation	
102 112
 	if(length(anncol)) {
103 113
 		elist$genes <- x[,anncol,drop=FALSE]
104
-		if(!any(duplicated(pids))) row.names(elist$genes) <- pids
114
+		if(length(pids) & !any(duplicated(pids))) row.names(elist$genes) <- pids
105 115
 	}
106 116
 
107 117
 #	elist$targets <- data.frame(SampleNames=snames, stringsAsFactors=FALSE)
... ...
@@ -1,3 +1,28 @@
1
+ 8 Aug 2014: limma 3.21.12
2
+
3
+- The definition of the M and A axes for an MA-plot of single channel
4
+  data is changed slightly.  Previously the A-axis was the average of
5
+  all arrays in the dataset -- this has been definition since MA-plots
6
+  were introduced for single channel data in April 2003.  Now an
7
+  artificial array is formed by averaging all arrays other than the
8
+  one to be plotted.  Then a mean-difference plot is formed from the
9
+  specified array and the artificial array.  This change ensures the 
10
+  specified and artificial arrays are computed from independent data,
11
+  and ensures the MA-plot will reduce to a correct mean-difference
12
+  plot when there are just two arrays in the dataset.
13
+
14
+ 7 Aug 2014: limma 3.21.11
15
+
16
+- diffSplice() now includes Simes adjusted p-values
17
+
18
+- topSplice() has a new level="hybrid" argument for ranking genes by
19
+  Simes adjusted p-values.
20
+
21
+- goana() is now an S3 generic function.
22
+
23
+- goana() can now optionally adjust for gene length or abundance
24
+  bias.
25
+
1 26
 27 June 2014: limma 3.21.10
2 27
 
3 28
 - remove col argument from plotMDS(), as it handled by ... as are
... ...
@@ -3448,6 +3473,10 @@ Apr 25 2003: limma 0.9.7
3448 3473
 The smawehi package was renamed to limma, with the title "Linear Models
3449 3474
 for Microarray Data" and became part of the Bioconductor project.
3450 3475
 
3476
+Apr 7, 2003
3477
+
3478
+MA plots added for both two-color and single channel data.
3479
+
3451 3480
 11 November 2002: smawehi 0.1
3452 3481
 
3453 3482
 smawehi package made publicly available for the first time, through
... ...
@@ -30,10 +30,10 @@ The method is explained briefly in Majewski et al (2010) and in full detail in P
30 30
 The \code{subset} argument specifies whether and how the fit object should be subsetted.
31 31
 Ideally, only genes that are truly differentially expressed for one or both of the contrasts should be used estimate the biological correlation.
32 32
 The default is \code{"all"}, which uses all genes in the fit object to estimate the biological correlation.
33
-The option \code{"Fpval"} chooses genes based on how many F-test p-values are estimated to be truly significant using the method \code{propNotDE}.
33
+The option \code{"Fpval"} chooses genes based on how many F-test p-values are estimated to be truly significant using the function \code{propTrueNull}.
34 34
 This should capture genes that display any evidence of differential expression in either of the two contrasts.
35 35
 The options \code{"p.union"} and \code{"p.int"} are based on the moderated t p-values from both contrasts.
36
-From the \code{propNotDE} method an estimate of the number of p-values truly significant in either of the two contrasts can be obtained.
36
+From the \code{propTrueNull} function an estimate of the number of p-values truly significant in either of the two contrasts can be obtained.
37 37
 "p.union" takes the union of these genes and \code{"p.int"} takes the intersection of these genes.
38 38
 The other options, \code{"logFC"} and \code{"predFC"} subsets on genes that attain a logFC or predFC at least as large as the 90th percentile of the log fold changes or predictive log fold changes on the absolute scale. 
39 39
 
... ...
@@ -1,48 +1,97 @@
1 1
 \name{goana}
2 2
 \alias{goana}
3
+\alias{goana.default}
4
+\alias{goana.MArrayLM}
3 5
 \title{Gene Ontology Analysis of Differentially Expressed Genes}
4 6
 \description{
5
-Hypergeometric tests on up and down differentially expressed genes for over-representation of GO Terms.
7
+Fisher's exact test or Wallenius' noncentral hypergeometric test on differentially expressed genes for over-representation of gene ontology (GO) terms.
6 8
 }
7 9
 \usage{
8
-goana(fit, coef = ncol(fit), geneid = "GeneID", FDR = 0.05, species = "Hs")
10
+\method{goana}{default}(de, universe = NULL, species = "Hs", weights = NULL, \dots)
11
+\method{goana}{MArrayLM}(de, coef = ncol(de), geneid = rownames(de), FDR = 0.05, 
12
+      species = "Hs", trend = FALSE, \dots)
9 13
 }
10 14
 \arguments{
11
-  \item{fit}{should be an object of class \code{MArrayLM} as produced by \code{lmFit} and \code{eBayes}.}
15
+  \item{de}{Differentially expressed genes.  Can be a \code{MArrayLM} fit object, or a vector or list of Entrez Gene IDs of differentially expressed genes.}
16
+  \item{universe}{vector specifying the Entrez Gene identifiers of the universe. If \code{NULL}, all Entrez Gene IDs with any gene ontology annotation will be the universe.}
17
+  \item{species}{character string specifying the species. Possible values are \code{"Hs"}, \code{"Mm"}, \code{"Rn"} or \code{"Dm"}.}
18
+  \item{weights}{probability weighting of the universe.}
12 19
   \item{coef}{column number or column name specifying which coefficient or contrast of the linear model is of interest.}
13
-  \item{geneid}{Entrez gene identifiers. Either a vector of length nrow(fit) or the name of the column of fit$genes containing the Entrez Gene IDs.}
20
+  \item{geneid}{Entrez Gene identifiers. Either a vector of length \code{nrow(de)} or the name of the column of \code{de$genes} containing the Entrez Gene IDs.}
14 21
   \item{FDR}{numeric. False discovery rate of differentially expressed genes less than this cutoff.}
15
-  \item{species}{character string specifying the species. Possible values are \code{"Hs"}, \code{"Mm"}, \code{"Rn"} or \code{"Dm"}.}
22
+  \item{trend}{adjust analysis for gene length or abundance?
23
+  Can be logical, or a numeric vector of covariate values, or the name of the column of \code{de$genes} containing the covariate values.
24
+  If \code{TRUE}, then \code{de$Amean} is used as the covariate.}
25
+  \item{\dots}{further arguments.}
16 26
 }
17 27
 \details{
18
-This function is used in conjunction with \code{\link{lmFit}}, \code{\link{eBayes}} and \code{\link{topGO}}.
28
+Performs a Gene Ontology enrichment analysis for one for more gene lists using the appropriate Bioconductor organism package.
29
+The gene lists must be supplied as Entrez Gene IDs.
30
+
31
+If applied to a linear model fit, then the differentially expressed genes are extracted automatically from the fit object, and analyses are done separately for the up and down significant genes.
32
+
33
+If \code{trend=FALSE}, the function computes one-sided hypergeometric tests.
34
+If \code{trend=TRUE} or a covariate is supplied, then a trend is fitted to the differential expression results and the method of Young et al (2010) is used to adjust for this trend.
35
+The adjusted test uses Wallenius' noncentral hypergeometric distribution.
19 36
 }
20 37
 \value{
21
-  A dataframe with rows for GO IDs and the following columns:
38
+  A dataframe with rows for GO IDs and the following possible columns:
22 39
   \item{Term}{GO terms.}
23 40
   \item{Ont}{GO term ontology from BP, CC and MF.}
24 41
   \item{N}{Number of genes in the GO term.}
25 42
   \item{Up}{Number of up regulated differentially expressed genes for testing.}
26 43
   \item{Down}{Number of down regulated differentially expressed genes for testing.}
27
-  \item{P.Up}{P value from hypergeometric test of up regulated differentially expressed genes for over representation of GO terms.}
28
-  \item{P.Down}{P value from hypergeometric test of down regulated differentially expressed genes for over representation of GO terms.}
44
+  \item{DE1}{Number of differentially expressed genes for testing.}
45
+  \item{P.Up}{P value from hypergeometric test or Wallenius' noncentral hypergeometric test of up regulated differentially expressed genes for over representation of GO terms.}
46
+  \item{P.Down}{P value from hypergeometric test or Wallenius' noncentral hypergeometric test of down regulated differentially expressed genes for over representation of GO terms.}
47
+  \item{P.DE1}{P value from hypergeometric test or Wallenius' noncentral hypergeometric test of differentially expressed genes for over representation of GO terms.}
48
+}
49
+
50
+\references{
51
+  Young, M. D., Wakefield, M. J., Smyth, G. K., Oshlack, A. (2010).
52
+  Gene ontology analysis for RNA-seq: accounting for selection bias.
53
+  \emph{Genome Biology} 11, R14.
54
+  \url{http://genomebiology.com/2010/11/2/R14}
29 55
 }
30 56
 
31 57
 \seealso{
32 58
 \code{\link{topGO}}
33 59
 
34
-The gostats package also does GO analyses with some extra options.
60
+The goseq package implements a similar GO analysis.
61
+The goseq version will work with a variety of gene identifiers, not only Entrez Gene as here, and includes a database of gene length information for various species.
62
+
63
+The gostats package also does GO analyses with some different options.
35 64
 }
36 65
 \author{Gordon Smyth and Yifang Hu}
37 66
 \examples{
38 67
 
39 68
 \dontrun{
40 69
 
41
-fit <- lmFit(y,design)
70
+fit <- lmFit(y, design)
42 71
 fit <- eBayes(fit)
43
-go.results <- goana(fit)
44
-topGO(go.results, sort = "up")
45
-topGO(go.results, sort = "down")
72
+
73
+# goana without adjusting for gene length bias
74
+go.fisher <- goana(fit)
75
+topGO(go.fisher, sort = "up")
76
+topGO(go.fisher, sort = "down")
77
+
78
+# goana adjusting for gene length bias 
79
+go.len <- goana(fit, geneid = "GeneID", trend = "Length")
80
+topGO(go.len, sort = "up")
81
+topGO(go.len, sort = "down")
82
+
83
+# goana adjusting for gene abundance
84
+go.abund <- goana(fit, geneid = "GeneID", trend = TRUE)
85
+topGO(go.abund, sort = "up")
86
+topGO(go.abund, sort = "down")
87
+
88
+# goana.default
89
+go.de <- goana(list(DE1 = EG.DE1, DE2 = EG.DE2, DE3 = EG.DE3))
90
+topGO(go.de, sort = "DE1")
91
+topGO(go.de, sort = "DE2")
92
+topGO(go.de, ontology = "BP", sort = "DE3")
93
+topGO(go.de, ontology = "CC", sort = "DE3")
94
+topGO(go.de, ontology = "MF", sort = "DE3")
46 95
 
47 96
 }
48 97
 
... ...
@@ -1,22 +1,24 @@
1
-\title{Plot exons on differentially spliced gene}
1
+\title{Differential splicing plot}
2 2
 \name{plotSplice}
3 3
 \alias{plotSplice}
4 4
 \description{
5
-Plot exons of differentially spliced gene.
5
+Plot relative log-fold changes by exons for the specified gene.
6 6
 }
7 7
 \usage{
8
-plotSplice(fit, coef=ncol(fit), geneid=NULL, rank=1L, FDR = 0.05)
8
+plotSplice(fit, coef=ncol(fit), geneid=NULL, genecolname=NULL, rank=1L, FDR = 0.05)
9 9
 }
10 10
 \arguments{
11 11
   \item{fit}{\code{MArrayLM} fit object produced by \code{diffSplice}.}
12 12
   \item{coef}{the coefficient (column) of fit for which differentially splicing is assessed.}
13 13
   \item{geneid}{character string, ID of the gene to plot.}
14
+  \item{genecolname}{column name of \code{fit$genes} containing gene IDs. Defaults to \code{fit$genecolname}.}
14 15
   \item{rank}{integer, if \code{geneid=NULL} then this ranked gene will be plotted.}
15 16
   \item{FDR}{numeric, mark exons with false discovery rate less than this cutoff.}
16 17
 }
17 18
 
18 19
 \details{
19
-Plots interaction log2-fold-change by exon for the specified gene.
20
+Plot relative log2-fold-changes by exon for the specified gene.
21
+The relative logFC is the difference between the exon's logFC and the overall logFC for the gene, as computed by \code{diffSplice}.
20 22
 }
21 23
 
22 24
 \value{A plot is created on the current graphics device.}
... ...
@@ -27,3 +29,4 @@ Plots interaction log2-fold-change by exon for the specified gene.
27 29
 An overview of diagnostic functions available in LIMMA is given in \link{09.Diagnostics}.
28 30
 }
29 31
 \examples{# See diffSplice}
32
+\keyword{rna-seq}
... ...
@@ -10,16 +10,15 @@
10 10
 Creates an MA-plot with color coding for control spots.
11 11
 }
12 12
 \usage{
13
-\method{plotMA}{default}(MA, array = 1, xlab = "A", ylab = "M", main = colnames(MA)[array],
14
-       xlim = NULL, ylim = NULL, status, values, pch, col, cex, legend = TRUE,
15
-       zero.weights = FALSE, ...)
16
-\method{plotMA}{MArrayLM}(MA, coef = ncol(MA), xlab = "AveExpr", ylab = "logFC",
17
-       main = colnames(MA)[coef], xlim = NULL, ylim = NULL, status, values, pch, col,
18
-       cex, legend = TRUE, zero.weights = FALSE, ...)
13
+\method{plotMA}{default}(MA, array=1, xlab="A", ylab="M", main=colnames(MA)[array], xlim=NULL,
14
+       ylim=NULL, status=NULL, values=NULL, pch=NULL, col=NULL, cex=NULL, legend=TRUE, \dots)
15
+\method{plotMA}{MArrayLM}(MA, coef=ncol(MA), xlab="AveExpr", ylab="logFC",
16
+       main=colnames(MA)[coef], xlim=NULL, ylim=NULL, status=NULL, values=NULL, pch=NULL,
17
+       col=NULL, cex=NULL, legend=TRUE, zero.weights=FALSE, \dots)
19 18
 }
20 19
 \arguments{
21
-  \item{MA}{an \code{RGList}, \code{MAList}, \code{EList} or \code{MArrayLM} object.
22
-  Alternatively a \code{matrix} or \code{ExpressionSet} object.}
20
+  \item{MA}{an \code{RGList}, \code{MAList}, \code{EList}, \code{ExpressionSet} or \code{MArrayLM} object.
21
+  Alternatively a numeric \code{matrix}.}
23 22
   \item{array}{integer giving the array to be plotted.}
24 23
   \item{coef}{integer giving the linear model coefficient to be plotted.}
25 24
   \item{xlab}{character string giving label for x-axis}
... ...
@@ -28,7 +27,7 @@ Creates an MA-plot with color coding for control spots.
28 27
   \item{xlim}{numeric vector of length 2 giving limits for x-axis, defaults to min and max of the data}
29 28
   \item{ylim}{numeric vector of length 2 giving limits for y-axis, defaults to min and max of the data}
30 29
   \item{status}{character vector giving the control status of each spot on the array, of same length as the number of rows of \code{MA$M}.
31
-  If omitted, all points are plotted in the default color, symbol and size.}
30
+  If omitted, all points are plotted in the default color (\code{"black"}), symbol (\code{pch=16}) and size (\code{cex=0.3}).}
32 31
   \item{values}{character vector giving values of \code{status} to be highlighted on the plot. Defaults to unique values of \code{status}.
33 32
   Ignored if there is no \code{status} vector.}
34 33
   \item{pch}{vector or list of plotting characters. Default is integer code 16 which gives a solid circle.
... ...
@@ -36,25 +35,27 @@ Creates an MA-plot with color coding for control spots.
36 35
   \item{col}{numeric or character vector of colors, of the same length as \code{values}. Defaults to \code{1:length(values)}.
37 36
   Ignored if there is no \code{status} vector.}
38 37
   \item{cex}{numeric vector of plot symbol expansions, of the the same length as \code{values}. 
39
-  Defaults to 0.3 for the most common status value and 1 for the others.
38
+  Defaults is 1.
40 39
   Ignored if there is no \code{status} vector.}
41 40
   \item{legend}{logical, should a legend of plotting symbols and colors be included. Can also be a character string giving position to place legend. Ignored if there is no \code{status} vector.}
42 41
   \item{zero.weights}{logical, should spots with zero or negative weights be plotted?}
43
-  \item{...}{any other arguments are passed to \code{plot}}
42
+  \item{\dots}{any other arguments are passed to \code{plot}}
44 43
 }
45 44
 
46 45
 \details{
47 46
 An MA-plot is a plot of log-intensity ratios (M-values) versus log-intensity averages (A-values).
48
-If \code{MA} is an \code{RGList} or \code{MAList} then this function produces an ordinary within-array MA-plot.
49
-If \code{MA} is an \code{MArrayLM} object, then the plot is an fitted model MA-plot in which the estimated coefficient is on the y-axis and the average A-value is on the x-axis.
47
+For two color data objects, a within-array MA-plot is produced with the M and A values computed from the two channels for the specified array.
48
+This is the same as a mean-difference plot (\code{\link{mdplot}}) with the red and green log2-intensities of the array providing the two columns.
49
+
50
+For single channel data objects, then a between-array MA-plot is produced.
51
+An articifial array is produced by averaging all the arrays other than the array specified.
52
+A mean-difference plot is then producing from the specified array and the artificial array.
53
+Note that this procedure reduces to an ordinary mean-difference plot when there are just two arrays total.
50 54
 
51
-If \code{MA} is a \code{matrix} or \code{ExpressionSet} object, then this function produces a between-array MA-plot.
52
-In this case the A-values in the plot are the average log-intensities across the arrays and the M-values are the deviations of the log-intensities for the specified array from the average.
53
-If there are more than five arays, then the average is computed robustly using medians.
54
-With five or fewer arrays, it is computed by means.
55
+If \code{MA} is an \code{MArrayLM} object, then the plot is an fitted model MA-plot in which the estimated coefficient is on the y-axis and the average A-value is on the x-axis.
55 56
 
56 57
 The \code{status} vector is intended to specify the control status of each spot, for example "gene", "ratio control", "house keeping gene", "buffer" and so on.
57
-The vector is usually computed using the function \code{\link{controlStatus}} and a spot-types file.
58
+The vector is often computed using the function \code{\link{controlStatus}} and a spot-types file.
58 59
 However the function may be used to highlight any subset of spots.
59 60
 
60 61
 The \code{status} can be included as the component \code{MA$genes$Status} instead of being passed as an argument to \code{plotMA}.
... ...
@@ -77,8 +78,7 @@ status[4:6] <- "M=3"
77 78
 MA$M[4:6] <- 3
78 79
 status[7:9] <- "M=-3"
79 80
 MA$M[7:9] <- -3
80
-plotMA(MA,main="MA-Plot with Simulated Data",
81
-       status=status,values=c("M=0","M=3","M=-3"),col=c("blue","red","green"))
81
+plotMA(MA,main="MA-Plot with Simulated Data",status=status,values=c("M=0","M=3","M=-3"),col=c("blue","red","green"))
82 82
 
83 83
 #  Same as above
84 84
 attr(status,"values") <- c("M=0","M=3","M=-3")
... ...
@@ -5,18 +5,27 @@
5 5
 Top table ranking the most differentially spliced genes or exons.
6 6
 }
7 7
 \usage{
8
-topSplice(fit, coef=ncol(fit), level="exon", number=10, FDR=1)
8
+topSplice(fit, coef=ncol(fit), level="hybrid", number=10, FDR=1)
9 9
 }
10 10
 \arguments{
11 11
   \item{fit}{\code{MArrayLM} fit object produced by \code{diffSplice}.}
12 12
   \item{coef}{the coefficient (column) of fit for which differentially splicing is assessed.}
13
-  \item{level}{character string, should the table be by \code{"exon"} or by \code{"gene"}.}
13
+  \item{level}{character string, possible values are \code{"gene"}, \code{"exon} or \code{"hybrid}.
14
+    \code{"gene"} gives F-tests for each gene.
15
+    \code{"exon"} gives t-tests for each exon.
16
+    \code{"hybrid"} gives gene level results with p-values derived from the exon-level tests.}
14 17
   \item{number}{integer, maximum number of rows to output.}
15 18
   \item{FDR}{numeric, only show exons or genes with false discovery rate less than this cutoff.}
16 19
 }
17 20
 
18 21
 \details{
19
-Ranks genes by the Plots interaction log2-fold-change by exon for the specified gene.
22
+Ranks genes or exons by evidence for differential splicing.
23
+The gene-level test is an F-test for any differences in exon usage between experimental conditions.
24
+The exon-level tests are t-tests for differences between each exon and all other exons for the same gene.
25
+
26
+The hybrid testing method processes the exon-level p-values to give an overall call of differential splicing for each gene.
27
+It returns the minimum Simes-adjusted p-values for each gene.
28
+It is likely to be more powerful than the genewise F-tests if only a minority of exons for a gene are differentially spliced.
20 29
 }
21 30
 
22 31
 \value{A data.frame with any annotation columns found in \code{fit} plus the following columns