- The definition of the M and A axes for an MA-plot of single channel
data is changed slightly. Previously the A-axis was the average of
all arrays in the dataset -- this has been definition since MA-plots
were introduced for single channel data in April 2003. Now an
artificial array is formed by averaging all arrays other than the
one to be plotted. Then a mean-difference plot is formed from the
specified array and the artificial array. This change ensures the
specified and artificial arrays are computed from independent data,
and ensures the MA-plot will reduce to a correct mean-difference
plot when there are just two arrays in the dataset.
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/limma@93246 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,13 +1,13 @@ |
1 | 1 |
Package: limma |
2 |
-Version: 3.21.10 |
|
3 |
-Date: 2014/06/27 |
|
2 |
+Version: 3.21.12 |
|
3 |
+Date: 2014/08/08 |
|
4 | 4 |
Title: Linear Models for Microarray Data |
5 | 5 |
Description: Data analysis, linear models and differential expression for microarray data. |
6 | 6 |
Author: Gordon Smyth [cre,aut], Matthew Ritchie [ctb], Jeremy Silver [ctb], James Wettenhall [ctb], Natalie Thorne [ctb], Davis McCarthy [ctb], Di Wu [ctb], Yifang Hu [ctb], Wei Shi [ctb], Belinda Phipson [ctb], Alicia Oshlack [ctb], Carolyn de Graaf [ctb], Mette Langaas [ctb], Egil Ferkingstad [ctb], Marcus Davy [ctb], Francois Pepin [ctb], Dongseok Choi [ctb] |
7 | 7 |
Maintainer: Gordon Smyth <[email protected]> |
8 | 8 |
License: GPL (>=2) |
9 | 9 |
Depends: R (>= 2.3.0), methods |
10 |
-Suggests: statmod (>= 1.2.2), splines, locfit, MASS, ellipse, affy, vsn, AnnotationDbi, org.Hs.eg.db, GO.db, illuminaio |
|
10 |
+Suggests: statmod (>= 1.2.2), splines, locfit, MASS, ellipse, affy, vsn, AnnotationDbi, org.Hs.eg.db, GO.db, illuminaio, BiasedUrn |
|
11 | 11 |
LazyLoad: yes |
12 | 12 |
URL: http://bioinf.wehi.edu.au/limma |
13 | 13 |
biocViews: ExonArray, GeneExpression, Transcription, AlternativeSplicing, DifferentialExpression, DifferentialSplicing, GeneSetEnrichment, DataImport, Genetics, Bayesian, Clustering, Regression, TimeCourse, Microarray, microRNAArray, mRNAMicroarray, OneChannel, ProprietaryPlatforms, TwoChannel, RNASeq, BatchEffect, MultipleComparison, Normalization, Preprocessing, QualityControl |
... | ... |
@@ -2,7 +2,7 @@ diffSplice <- function(fit,geneid,exonid=NULL,verbose=TRUE) |
2 | 2 |
# Test for splicing variants between conditions |
3 | 3 |
# using linear model fit of exon data. |
4 | 4 |
# Charity Law and Gordon Smyth |
5 |
-# Created 13 Dec 2013. Last modified 18 Feb 2014. |
|
5 |
+# Created 13 Dec 2013. Last modified 7 Aug 2014. |
|
6 | 6 |
{ |
7 | 7 |
exon.genes <- fit$genes |
8 | 8 |
if(is.null(exon.genes)) exon.genes <- data.frame(ExonID=1:nrow(fit)) |
... | ... |
@@ -106,78 +106,98 @@ diffSplice <- function(fit,geneid,exonid=NULL,verbose=TRUE) |
106 | 106 |
out$gene.F.p.value <- gene.F.p.value |
107 | 107 |
|
108 | 108 |
# Which columns of exon.genes contain gene level annotation? |
109 |
- exon.lastexon <- cumsum(gene.nexons) |
|
110 |
- exon.firstexon <- exon.lastexon-gene.nexons+1 |
|
109 |
+ gene.lastexon <- cumsum(gene.nexons) |
|
110 |
+ gene.firstexon <- gene.lastexon-gene.nexons+1 |
|
111 | 111 |
no <- logical(nrow(exon.genes)) |
112 |
- isdup <- vapply(exon.genes,duplicated,no)[-exon.firstexon,,drop=FALSE] |
|
112 |
+ isdup <- vapply(exon.genes,duplicated,no)[-gene.firstexon,,drop=FALSE] |
|
113 | 113 |
isgenelevel <- apply(isdup,2,all) |
114 |
- out$gene.genes <- exon.genes[exon.lastexon,isgenelevel, drop=FALSE] |
|
114 |
+ out$gene.genes <- exon.genes[gene.lastexon,isgenelevel, drop=FALSE] |
|
115 | 115 |
out$gene.genes$NExons <- gene.nexons |
116 |
+ out$gene.firstexon <- gene.firstexon |
|
117 |
+ out$gene.lastexon <- gene.lastexon |
|
118 |
+ |
|
119 |
+# Simes adjustment of exon level p-values |
|
120 |
+ simes <- function(p,n) { |
|
121 |
+ p <- p[-which.max(p)] |
|
122 |
+ min(sort(p)*(n-1)/(1:(n-1))) |
|
123 |
+ } |
|
124 |
+ out$gene.simes.p.value <- out$gene.F.p.value |
|
125 |
+ for (i in 1:ngenes) for (j in 1:ncol(fit)) { |
|
126 |
+ out$gene.simes.p.value[i,j] <- simes(exon.p.value[gene.firstexon[i]:gene.lastexon[i],j],gene.nexons[i]) |
|
127 |
+ } |
|
116 | 128 |
|
117 | 129 |
out |
118 | 130 |
} |
119 | 131 |
|
120 |
-topSplice <- function(fit, coef=ncol(fit), level="exon", number=10, FDR=1) |
|
121 |
-# Collate voomex results in data.frame, ordered from most significant at top |
|
132 |
+topSplice <- function(fit, coef=ncol(fit), level="hybrid", number=10, FDR=1) |
|
133 |
+# Collate diffSplice results into data.frame, ordered from most significant at top |
|
122 | 134 |
# Gordon Smyth |
123 |
-# Created 18 Dec 2013. Last modified 17 March 2014. |
|
135 |
+# Created 18 Dec 2013. Last modified 7 Aug 2014. |
|
124 | 136 |
{ |
125 | 137 |
coef <- coef[1] |
126 |
- exon <- match.arg(level,c("exon","gene")) |
|
127 |
- if(level=="exon") { |
|
138 |
+ level <- match.arg(level,c("hybrid","exon","gene")) |
|
139 |
+ switch(level, |
|
140 |
+ "exon" = { |
|
128 | 141 |
number <- min(number,nrow(fit$coefficients)) |
129 | 142 |
P <- fit$p.value[,coef] |
130 | 143 |
BH <- p.adjust(P, method="BH") |
131 | 144 |
if(FDR<1) number <- min(number,sum(BH<FDR)) |
132 | 145 |
o <- order(P)[1:number] |
133 | 146 |
data.frame(fit$genes[o,,drop=FALSE],logFC=fit$coefficients[o,coef],t=fit$t[o,coef],P.Value=P[o],FDR=BH[o]) |
134 |
- } else { |
|
147 |
+ }, |
|
148 |
+ gene = { |
|
135 | 149 |
number <- min(number,nrow(fit$gene.F)) |
136 | 150 |
P <- fit$gene.F.p.value[,coef] |
137 | 151 |
BH <- p.adjust(P, method="BH") |
138 | 152 |
if(FDR<1) number <- min(number,sum(BH<FDR)) |
139 | 153 |
o <- order(P)[1:number] |
140 | 154 |
data.frame(fit$gene.genes[o,,drop=FALSE],F=fit$gene.F[o,coef],P.Value=P[o],FDR=BH[o]) |
155 |
+ }, |
|
156 |
+ hybrid = { |
|
157 |
+ number <- min(number,nrow(fit$gene.F)) |
|
158 |
+ P <- fit$gene.simes.p.value[,coef] |
|
159 |
+ BH <- p.adjust(P, method="BH") |
|
160 |
+ if(FDR<1) number <- min(number,sum(BH<FDR)) |
|
161 |
+ o <- order(P)[1:number] |
|
162 |
+ data.frame(fit$gene.genes[o,,drop=FALSE],P.Value=P[o],FDR=BH[o]) |
|
141 | 163 |
} |
164 |
+ ) |
|
142 | 165 |
} |
143 | 166 |
|
144 |
-plotSplice <- function(fit, coef=ncol(fit), geneid=NULL, rank=1L, FDR = 0.05) |
|
145 |
-# Plot exons of most differentially spliced gene |
|
167 |
+plotSplice <- function(fit, coef=ncol(fit), geneid=NULL, genecolname=NULL, rank=1L, FDR = 0.05) |
|
168 |
+# Plot exons of chosen gene |
|
169 |
+# fit is output from diffSplice |
|
146 | 170 |
# Gordon Smyth and Yifang Hu |
147 | 171 |
# Created 3 Jan 2014. Last modified 19 March 2014. |
148 | 172 |
{ |
149 |
- # Gene labelling including gene symbol |
|
150 |
- genecolname <- fit$genecolname |
|
151 |
- genelab <- grep(paste0(genecolname,"|Symbol|symbol"), colnames(fit$gene.genes), value = T) |
|
152 |
- |
|
173 |
+ if(is.null(genecolname)) |
|
174 |
+ genecolname <- fit$genecolname |
|
175 |
+ else |
|
176 |
+ genecolname <- as.character(genecolname) |
|
177 |
+ |
|
153 | 178 |
if(is.null(geneid)) { |
179 |
+# Find gene from specified rank |
|
154 | 180 |
if(rank==1L) |
155 | 181 |
i <- which.min(fit$gene.F.p.value[,coef]) |
156 | 182 |
else |
157 | 183 |
i <- order(fit$gene.F.p.value[,coef])[rank] |
158 |
- |
|
159 |
- geneid <- paste(fit$gene.genes[i,genelab], collapse = ".") |
|
160 |
- |
|
184 |
+ geneid <- paste(fit$gene.genes[i,genecolname], collapse = ".") |
|
161 | 185 |
} else { |
162 |
- i <- which(fit$gene.genes[,fit$genecolname]==geneid) |
|
163 |
- |
|
164 |
- geneid <- paste(fit$gene.genes[i,genelab], collapse = ".") |
|
165 |
- |
|
186 |
+# Find gene from specified name |
|
187 |
+ geneid <- as.character(geneid) |
|
188 |
+ i <- which(fit$gene.genes[,genecolname]==geneid)[1] |
|
166 | 189 |
if(!length(i)) stop(paste("geneid",geneid,"not found")) |
167 | 190 |
} |
168 |
- exon.lastexon <- cumsum(fit$gene.genes$NExons[1:i]) |
|
169 |
- j <- (exon.lastexon[i-1]+1):exon.lastexon[i] |
|
170 | 191 |
|
171 |
- exoncolname <- fit$exoncolname |
|
192 |
+# Row numbers containing exons |
|
193 |
+ j <- fit$gene.firstexon[i]:fit$gene.lastexon[i] |
|
172 | 194 |
|
173 |
- if(is.null(exoncolname)){ |
|
195 |
+ exoncolname <- fit$exoncolname |
|
196 |
+ if(is.null(exoncolname)) { |
|
174 | 197 |
|
175 | 198 |
plot(fit$coefficients[j,coef], xlab = "Exon", ylab = "logFC (this exon vs rest)", main = geneid, type = "b") |
176 | 199 |
|
177 |
- } |
|
178 |
- |
|
179 |
- # Plot exons and mark exon ids on the axis |
|
180 |
- if(!is.null(exoncolname)) { |
|
200 |
+ } else { |
|
181 | 201 |
|
182 | 202 |
exon.id <- fit$genes[j, exoncolname] |
183 | 203 |
xlab <- paste("Exon", exoncolname, sep = " ") |
... | ... |
@@ -186,25 +206,24 @@ plotSplice <- function(fit, coef=ncol(fit), geneid=NULL, rank=1L, FDR = 0.05) |
186 | 206 |
axis(1, at = 1:length(j), labels = exon.id, las = 2, cex.axis = 0.6) |
187 | 207 |
mtext(xlab, side = 1, padj = 5.2) |
188 | 208 |
|
189 |
- # Mark the topSpliced exons |
|
209 |
+# Mark the topSpliced exons |
|
190 | 210 |
top <- topSplice(fit, coef = coef, number = Inf, level = "exon", FDR = FDR) |
191 | 211 |
m <- which(top[,genecolname] %in% fit$gene.genes[i,genecolname]) |
192 |
- |
|
193 | 212 |
if(length(m) > 0){ |
194 |
- |
|
195 |
- if(length(m) == 1) cex <- 1.5 else{ |
|
196 |
- |
|
213 |
+ if(length(m) == 1) |
|
214 |
+ cex <- 1.5 |
|
215 |
+ else { |
|
197 | 216 |
abs.fdr <- abs(log10(top$FDR[m])) |
198 | 217 |
from <- range(abs.fdr) |
199 | 218 |
to <- c(1,2) |
200 | 219 |
cex <- (abs.fdr - from[1])/diff(from) * diff(to) + to[1] |
201 |
- |
|
202 | 220 |
} |
203 | 221 |
mark <- match(top[m, exoncolname], exon.id) |
204 | 222 |
points((1:length(j))[mark], fit$coefficients[j[mark], coef], col = "red", pch = 16, cex = cex) |
205 | 223 |
} |
224 |
+ |
|
206 | 225 |
} |
207 | 226 |
|
208 | 227 |
abline(h=0,lty=2) |
209 |
- |
|
210 |
-} |
|
211 | 228 |
\ No newline at end of file |
229 |
+ invisible() |
|
230 |
+} |
... | ... |
@@ -1,30 +1,81 @@ |
1 |
-goana <- function(fit, coef = ncol(fit), geneid = "GeneID", FDR = 0.05, species = "Hs"){ |
|
1 |
+goana <- function(de,...) UseMethod("goana") |
|
2 |
+ |
|
3 |
+goana.MArrayLM <- function(de, coef = ncol(de), geneid = rownames(de), FDR = 0.05, species = "Hs", trend = FALSE, ...) |
|
2 | 4 |
# Gene ontology analysis of DE genes from linear model fit |
3 | 5 |
# Gordon Smyth and Yifang Hu |
4 |
-# Created 20 June 2014. Last modified 24 June 2014. |
|
5 |
- |
|
6 |
- # Check input |
|
7 |
- if(!is(fit, "MArrayLM")) stop("fit must be an MArrayLM object.") |
|
8 |
- if(is.null(fit$p.value)) stop("p value not found in fit object (from eBayes).") |
|
9 |
- if(is.null(fit$coefficients)) stop("coefficient not found in fit object.") |
|
10 |
- |
|
11 |
- if(length(geneid) == nrow(fit)){ |
|
6 |
+# Created 20 June 2014. Last modified 23 July 2014. |
|
7 |
+{ |
|
8 |
+ # Check fit |
|
9 |
+ if(is.null(de$p.value)) stop("p value not found in fit object (from eBayes).") |
|
10 |
+ if(is.null(de$coefficients)) stop("coefficient not found in fit object.") |
|
11 |
+ if(length(coef) != 1) stop("coef length needs to be 1.") |
|
12 |
+ ngenes <- nrow(de) |
|
13 |
+ |
|
14 |
+ # Check geneid |
|
15 |
+ # Can be either a vector of IDs or a column name |
|
16 |
+ geneid <- as.character(geneid) |
|
17 |
+ if(length(geneid) == ngenes) { |
|
18 |
+ universe <- geneid |
|
19 |
+ } else |
|
20 |
+ if(length(geneid) == 1L) { |
|
21 |
+ universe <- de$genes[[geneid]] |
|
22 |
+ if(is.null(universe)) stop(paste("Column",geneid,"not found in de$genes")) |
|
23 |
+ } else |
|
24 |
+ stop("geneid has incorrect length") |
|
25 |
+ |
|
26 |
+ # Check trend |
|
27 |
+ # Can be logical, or a numeric vector of covariate values, or the name of the column containing the covariate values |
|
28 |
+ if(is.logical(trend)) { |
|
29 |
+ if(trend) { |
|
30 |
+ covariate <- de$Amean |
|
31 |
+ if(is.null(covariate)) stop("Amean not found in fit") |
|
32 |
+ } |
|
33 |
+ } else |
|
34 |
+ if(is.numeric(trend)) { |
|
35 |
+ if(length(trend) != ngenes) stop("If trend is numeric, then length must equal nrow(de)") |
|
36 |
+ covariate <- trend |
|
37 |
+ trend <- TRUE |
|
38 |
+ } else { |
|
39 |
+ if(is.character(trend)) { |
|
40 |
+ if(length(trend) != 1L) stop("If trend is character, then length must be 1") |
|
41 |
+ covariate <- de$genes[[trend]] |
|
42 |
+ if(is.null(covariate)) stop(paste("Column",trend,"not found in de$genes")) |
|
43 |
+ trend <- TRUE |
|
44 |
+ } else |
|
45 |
+ stop("trend is neither logical, numeric nor character") |
|
46 |
+ } |
|
47 |
+ |
|
48 |
+ # Check FDR |
|
49 |
+ if(!is.numeric(FDR) | length(FDR) != 1) stop("FDR must be numeric and of length 1.") |
|
50 |
+ if(FDR < 0 | FDR > 1) stop("FDR should be between 0 and 1.") |
|
12 | 51 |
|
13 |
- fit$genes$GeneID <- geneid |
|
14 |
- EG.col <- "GeneID" |
|
52 |
+ # Get up and down DE genes |
|
53 |
+ fdr.coef <- p.adjust(de$p.value[,coef], method = "BH") |
|
54 |
+ EG.DE.UP <- universe[fdr.coef < FDR & de$coef[,coef] > 0] |
|
55 |
+ EG.DE.DN <- universe[fdr.coef < FDR & de$coef[,coef] < 0] |
|
56 |
+ de.gene <- list(Up=EG.DE.UP, Down=EG.DE.DN) |
|
57 |
+ |
|
58 |
+ # Fit monotonic cubic spline for DE genes vs. gene.weights |
|
59 |
+ if(trend) { |
|
60 |
+ PW <- isDE <- rep(0,nrow(de)) |
|
61 |
+ isDE[fdr.coef < FDR] <- 1 |
|
62 |
+ o <- order(covariate) |
|
63 |
+ PW[o] <- tricubeMovingAverage(isDE[o],span=0.5,full.length=TRUE) |
|
15 | 64 |
} |
65 |
+ if(!trend) PW <- NULL |
|
16 | 66 |
|
17 |
- else if(length(geneid) == 1) EG.col <- as.character(geneid) |
|
18 |
- |
|
19 |
- if(is.null(fit$genes)) stop("no annotation (genes) found.") |
|
20 |
- |
|
21 |
- EG.All <- as.character(fit$genes[[EG.col]]) |
|
22 |
- |
|
23 |
- # Get up and down DE genes |
|
24 |
- fdr.coef <- p.adjust(fit$p.value[,coef], method = "BH") |
|
67 |
+ NextMethod(de = de.gene, universe = universe, species = species, weights = PW, ...) |
|
68 |
+} |
|
25 | 69 |
|
26 |
- EG.DE.UP <- EG.All[fdr.coef < FDR & fit$coef[,coef] > 0] |
|
27 |
- EG.DE.DN <- EG.All[fdr.coef < FDR & fit$coef[,coef] < 0] |
|
70 |
+goana.default <- function(de, universe = NULL, species = "Hs", weights = NULL, ...) |
|
71 |
+# Gene ontology analysis of DE genes |
|
72 |
+# Gordon Smyth and Yifang Hu |
|
73 |
+# Created 20 June 2014. Last modified 23 July 2014. |
|
74 |
+{ |
|
75 |
+ # check de |
|
76 |
+ if(!is.list(de)) de <- list(DE1 = de) |
|
77 |
+ if(is.null(names(de))) names(de) <- paste0("DE", 1:length(de)) |
|
78 |
+ de <- lapply(de, as.character) |
|
28 | 79 |
|
29 | 80 |
# Select species |
30 | 81 |
species <- match.arg(species, c("Hs", "Mm", "Rn", "Dm")) |
... | ... |
@@ -42,46 +93,102 @@ goana <- function(fit, coef = ncol(fit), geneid = "GeneID", FDR = 0.05, species |
42 | 93 |
d <- duplicated(EG.GO[,c("gene_id", "go_id", "Ontology")]) |
43 | 94 |
EG.GO <- EG.GO[!d, ] |
44 | 95 |
|
96 |
+ # Check universe |
|
97 |
+ if(is.null(universe)) universe <- EG.GO$gene_id |
|
98 |
+ universe <- as.character(universe) |
|
99 |
+ |
|
100 |
+ # Check weights |
|
101 |
+ if(!is.null(weights)){ |
|
102 |
+ if(length(weights)!=length(universe)) stop("length(weights) must equal length(universe).") |
|
103 |
+ } |
|
104 |
+ |
|
45 | 105 |
# Reduce to universe |
46 |
- EG.GO <- EG.GO[EG.GO$gene_id %in% EG.All, ] |
|
106 |
+ EG.GO <- EG.GO[EG.GO$gene_id %in% universe, ] |
|
107 |
+ if(!length(EG.GO$gene_id)) stop("Universe is empty.") |
|
108 |
+ |
|
47 | 109 |
Total <- length(unique(EG.GO$gene_id)) |
48 | 110 |
|
49 | 111 |
# Overlap with DE genes |
50 |
- isDE.UP <- (EG.GO$gene_id %in% EG.DE.UP) |
|
51 |
- isDE.DN <- (EG.GO$gene_id %in% EG.DE.DN) |
|
52 |
- TotalDE.UP <- length(unique(EG.GO$gene_id[isDE.UP])) |
|
53 |
- TotalDE.DN <- length(unique(EG.GO$gene_id[isDE.DN])) |
|
112 |
+ isDE <- lapply(de, function(x) EG.GO$gene_id %in% x) |
|
113 |
+ TotalDE <- lapply(isDE, function(x) length(unique(EG.GO$gene_id[x]))) |
|
114 |
+ nDE <- length(isDE) |
|
115 |
+ |
|
116 |
+ if(length(weights)) { |
|
117 |
+ # Probability weight for each gene |
|
118 |
+ m <- match(EG.GO$gene_id, universe) |
|
119 |
+ PW2 <- list(weights[m]) |
|
120 |
+ X <- do.call(cbind, c(N=1, isDE, PW=PW2)) |
|
121 |
+ } else |
|
122 |
+ X <- do.call(cbind, c(N=1, isDE)) |
|
54 | 123 |
|
55 |
- X <- cbind(N=1, Up=isDE.UP, Down=isDE.DN) |
|
56 | 124 |
group <- paste(EG.GO$go_id, EG.GO$Ontology, sep=".") |
57 | 125 |
S <- rowsum(X, group=group, reorder=FALSE) |
58 | 126 |
|
59 |
- # Fisher's exact test |
|
60 |
- p.UP <- phyper(q=S[,"Up"]-0.5,m=TotalDE.UP,n=Total-TotalDE.UP,k=S[,"N"],lower.tail=FALSE) |
|
61 |
- p.DN <- phyper(q=S[,"Down"]-0.5,m=TotalDE.DN,n=Total-TotalDE.DN,k=S[,"N"],lower.tail=FALSE) |
|
127 |
+ P <- matrix(0, nrow = nrow(S), ncol = nDE) |
|
128 |
+ |
|
129 |
+ if(length(weights)) { |
|
130 |
+ |
|
131 |
+ # Calculate weight |
|
132 |
+ require("BiasedUrn", character.only = TRUE) |
|
133 |
+ PW.ALL <- sum(weights[universe %in% EG.GO$gene_id]) |
|
134 |
+ AVE.PW <- S[,"PW"]/S[,"N"] |
|
135 |
+ W <- AVE.PW*(Total-S[,"N"])/(PW.ALL-S[,"N"]*AVE.PW) |
|
136 |
+ |
|
137 |
+ # Wallenius' noncentral hypergeometric test |
|
138 |
+ for(j in 1:nDE){ |
|
139 |
+ |
|
140 |
+ for(i in 1:nrow(S)){ |
|
141 |
+ |
|
142 |
+ P[i,j] <- pWNCHypergeo(S[i,1+j], S[i,"N"], Total-S[i,"N"], TotalDE[[j]], W[i],lower.tail=FALSE)+ |
|
143 |
+ dWNCHypergeo(S[i,1+j], S[i,"N"], Total-S[i,"N"], TotalDE[[j]], W[i]) |
|
144 |
+ } |
|
145 |
+ |
|
146 |
+ } |
|
147 |
+ |
|
148 |
+ S <- S[,-ncol(S)] |
|
149 |
+ |
|
150 |
+ } else { |
|
151 |
+ |
|
152 |
+ # Fisher's exact test |
|
153 |
+ for(j in 1:nDE){ |
|
154 |
+ |
|
155 |
+ P[,j] <- phyper(q=S[,1+j]-0.5,m=TotalDE[[j]],n=Total-TotalDE[[j]], k=S[,"N"],lower.tail=FALSE) |
|
156 |
+ } |
|
157 |
+ |
|
158 |
+ } |
|
62 | 159 |
|
63 | 160 |
# Assemble output |
64 | 161 |
g <- strsplit2(rownames(S),split="\\.") |
65 | 162 |
TERM <- select(GO.db,keys=g[,1],columns="TERM") |
66 |
- Results <- data.frame(Term=TERM[[2]],Ont=g[,2],S,P.Up=p.UP,P.Down=p.DN,stringsAsFactors=FALSE) |
|
163 |
+ Results <- data.frame(Term = TERM[[2]], Ont = g[,2], S, P, stringsAsFactors=FALSE) |
|
67 | 164 |
rownames(Results) <- g[,1] |
68 | 165 |
|
166 |
+ # Name P value for the DE genes |
|
167 |
+ iTON <- c(1:3) |
|
168 |
+ iDE <- 3+c(1:nDE) |
|
169 |
+ PDE<- paste0("P.", colnames(Results)[iDE]) |
|
170 |
+ colnames(Results)[-c(iTON,iDE)] <- PDE |
|
171 |
+ |
|
69 | 172 |
Results |
70 | 173 |
} |
71 | 174 |
|
72 | 175 |
topGO <- function(results, ontology = c("BP", "CC", "MF"), sort = "up", number = 20L){ |
73 | 176 |
# Extract sorted goana gene ontology test results |
74 | 177 |
# Gordon Smyth and Yifang Hu |
75 |
-# Created 20 June 2014. Last modified 24 June 2014. |
|
76 |
- |
|
77 |
- # Check input |
|
78 |
- if(any(! c("Term", "N", "Up", "Down", "Ont", "P.Up", "P.Down") %in% colnames(results))) stop("Results column names don't match GOTest results.") |
|
79 |
- |
|
178 |
+# Created 20 June 2014. Last modified 21 July 2014. |
|
179 |
+ |
|
180 |
+ # Check results |
|
181 |
+ if(!is.data.frame(results)) stop("Expect a dataframe with goana results.") |
|
182 |
+ |
|
183 |
+ # Check ontology |
|
80 | 184 |
ontology <- match.arg(ontology, c("BP", "CC", "MF"), several.ok = TRUE) |
81 |
- |
|
82 |
- sort <- match.arg(tolower(sort), c("up", "down")) |
|
83 |
- sort <- switch(sort, up = "P.Up", down = "P.Down") |
|
84 |
- |
|
185 |
+ |
|
186 |
+ # Check sort and sort by P value |
|
187 |
+ if(length(sort) != 1) stop("sort length needs to be 1.") |
|
188 |
+ sort <- tolower(colnames(results)) %in% tolower(paste0("P.", sort)) |
|
189 |
+ if(sum(sort)!=1) stop("sort not found.") |
|
190 |
+ |
|
191 |
+ # Check number |
|
85 | 192 |
if(!is.numeric(number)) stop("Need to input number.") |
86 | 193 |
if(number < 1L) return(data.frame()) |
87 | 194 |
|
... | ... |
@@ -89,10 +196,9 @@ topGO <- function(results, ontology = c("BP", "CC", "MF"), sort = "up", number = |
89 | 196 |
sel <- results$Ont %in% ontology |
90 | 197 |
results <- results[sel,] |
91 | 198 |
|
92 |
- # Sort by Up or Down p value |
|
199 |
+ # Sort by p value |
|
93 | 200 |
o <- order(results[, sort], rownames(results)) |
94 | 201 |
results <- results[o,] |
95 | 202 |
|
96 | 203 |
if(number >= nrow(results)) results else results[1:number,] |
97 |
-} |
|
98 |
- |
|
204 |
+} |
|
99 | 205 |
\ No newline at end of file |
... | ... |
@@ -3,7 +3,7 @@ |
3 | 3 |
|
4 | 4 |
plotMA <- function(MA,...) UseMethod("plotMA") |
5 | 5 |
|
6 |
-plotMA.RGList <- function(MA, array=1, xlab="A", ylab="M", main=colnames(MA)[array], xlim=NULL, ylim=NULL, status, values, pch, col, cex, legend=TRUE, zero.weights=FALSE, ...) |
|
6 |
+plotMA.RGList <- function(MA, array=1, xlab="A", ylab="M", main=colnames(MA)[array], xlim=NULL, ylim=NULL, status=NULL, values=NULL, pch=NULL, col=NULL, cex=NULL, legend=TRUE, zero.weights=FALSE, ...) |
|
7 | 7 |
# MA-plot with color coding for controls |
8 | 8 |
# Gordon Smyth 7 April 2003, James Wettenhall 27 June 2003. |
9 | 9 |
# Last modified 23 April 2013. |
... | ... |
@@ -12,7 +12,7 @@ plotMA.RGList <- function(MA, array=1, xlab="A", ylab="M", main=colnames(MA)[arr |
12 | 12 |
plotMA(MA,array=1,xlab=xlab,ylab=ylab,main=main,xlim=xlim,ylim=ylim,status=status,values=values,pch=pch,col=col,cex=cex,legend=legend,zero.weights=zero.weights,...) |
13 | 13 |
} |
14 | 14 |
|
15 |
-plotMA.MAList <- function(MA, array=1, xlab="A", ylab="M", main=colnames(MA)[array], xlim=NULL, ylim=NULL, status, values, pch, col, cex, legend=TRUE, zero.weights=FALSE, ...) |
|
15 |
+plotMA.MAList <- function(MA, array=1, xlab="A", ylab="M", main=colnames(MA)[array], xlim=NULL, ylim=NULL, status=NULL, values=NULL, pch=NULL, col=NULL, cex=NULL, legend=TRUE, zero.weights=FALSE, ...) |
|
16 | 16 |
# MA-plot with color coding for controls |
17 | 17 |
# Gordon Smyth 7 April 2003, James Wettenhall 27 June 2003. |
18 | 18 |
# Last modified 23 April 2013. |
... | ... |
@@ -20,7 +20,7 @@ plotMA.MAList <- function(MA, array=1, xlab="A", ylab="M", main=colnames(MA)[arr |
20 | 20 |
x <- as.matrix(MA$A)[,array] |
21 | 21 |
y <- as.matrix(MA$M)[,array] |
22 | 22 |
if(is.null(MA$weights)) w <- NULL else w <- as.matrix(MA$weights)[,array] |
23 |
- if(missing(status)) status <- MA$genes$Status |
|
23 |
+ if(is.null(status)) status <- MA$genes$Status |
|
24 | 24 |
if(!is.null(w) && !zero.weights) { |
25 | 25 |
i <- is.na(w) | (w <= 0) |
26 | 26 |
y[i] <- NA |
... | ... |
@@ -28,7 +28,7 @@ plotMA.MAList <- function(MA, array=1, xlab="A", ylab="M", main=colnames(MA)[arr |
28 | 28 |
.plotMAxy(x,y,xlab=xlab,ylab=ylab,main=main,xlim=xlim,ylim=ylim,status=status,values=values,pch=pch,col=col,cex=cex,legend=legend, ...) |
29 | 29 |
} |
30 | 30 |
|
31 |
-plotMA.MArrayLM <- function(MA, coef=ncol(MA), xlab="AveExpr", ylab="logFC", main=colnames(MA)[coef], xlim=NULL, ylim=NULL, status, values, pch, col, cex, legend=TRUE, zero.weights=FALSE, ...) |
|
31 |
+plotMA.MArrayLM <- function(MA, coef=ncol(MA), xlab="AveExpr", ylab="logFC", main=colnames(MA)[coef], xlim=NULL, ylim=NULL, status=NULL, values=NULL, pch=NULL, col=NULL, cex=NULL, legend=TRUE, zero.weights=FALSE, ...) |
|
32 | 32 |
# MA-plot with color coding for controls |
33 | 33 |
# Gordon Smyth 7 April 2003, James Wettenhall 27 June 2003. |
34 | 34 |
# Last modified 21 March 2014. |
... | ... |
@@ -37,7 +37,7 @@ plotMA.MArrayLM <- function(MA, coef=ncol(MA), xlab="AveExpr", ylab="logFC", mai |
37 | 37 |
x <- MA$Amean |
38 | 38 |
y <- as.matrix(MA$coef)[,coef] |
39 | 39 |
if(is.null(MA$weights)) w <- NULL else w <- as.matrix(MA$weights)[,coef] |
40 |
- if(missing(status)) status <- MA$genes$Status |
|
40 |
+ if(is.null(status)) status <- MA$genes$Status |
|
41 | 41 |
if(!is.null(w) && !zero.weights) { |
42 | 42 |
i <- is.na(w) | (w <= 0) |
43 | 43 |
y[i] <- NA |
... | ... |
@@ -45,7 +45,7 @@ plotMA.MArrayLM <- function(MA, coef=ncol(MA), xlab="AveExpr", ylab="logFC", mai |
45 | 45 |
.plotMAxy(x,y,xlab=xlab,ylab=ylab,main=main,xlim=xlim,ylim=ylim,status=status,values=values,pch=pch,col=col,cex=cex,legend=legend, ...) |
46 | 46 |
} |
47 | 47 |
|
48 |
-plotMA.EList <- function(MA, array=1, xlab="A", ylab="M", main=colnames(MA)[array], xlim=NULL, ylim=NULL, status, values, pch, col, cex, legend=TRUE, zero.weights=FALSE, ...) |
|
48 |
+plotMA.EList <- function(MA, array=1, xlab="A", ylab="M", main=colnames(MA)[array], xlim=NULL, ylim=NULL, status=NULL, values=NULL, pch=NULL, col=NULL, cex=NULL, legend=TRUE, zero.weights=FALSE, ...) |
|
49 | 49 |
# MA-plot with color coding for controls |
50 | 50 |
# Gordon Smyth 7 April 2003, James Wettenhall 27 June 2003. |
51 | 51 |
# Last modified 23 April 2013. |
... | ... |
@@ -59,7 +59,7 @@ plotMA.EList <- function(MA, array=1, xlab="A", ylab="M", main=colnames(MA)[arra |
59 | 59 |
x <- rowMeans(MA$E,na.rm=TRUE) |
60 | 60 |
y <- MA$E[,array]-x |
61 | 61 |
if(is.null(MA$weights)) w <- NULL else w <- as.matrix(MA$weights)[,array] |
62 |
- if(missing(status)) status <- MA$genes$Status |
|
62 |
+ if(is.null(status)) status <- MA$genes$Status |
|
63 | 63 |
|
64 | 64 |
if(!is.null(w) && !zero.weights) { |
65 | 65 |
i <- is.na(w) | (w <= 0) |
... | ... |
@@ -68,34 +68,29 @@ plotMA.EList <- function(MA, array=1, xlab="A", ylab="M", main=colnames(MA)[arra |
68 | 68 |
.plotMAxy(x,y,xlab=xlab,ylab=ylab,main=main,xlim=xlim,ylim=ylim,status=status,values=values,pch=pch,col=col,cex=cex,legend=legend, ...) |
69 | 69 |
} |
70 | 70 |
|
71 |
-plotMA.default <- function(MA, array=1, xlab="A", ylab="M", main=colnames(MA)[array], xlim=NULL, ylim=NULL, status, values, pch, col, cex, legend=TRUE, zero.weights=FALSE, ...) |
|
71 |
+plotMA.default <- function(MA, array=1, xlab="A", ylab="M", main=colnames(MA)[array], xlim=NULL, ylim=NULL, status=NULL, values=NULL, pch=NULL, col=NULL, cex=NULL, legend=TRUE, ...) |
|
72 | 72 |
# MA-plot with color coding for controls |
73 | 73 |
# Gordon Smyth 7 April 2003, James Wettenhall 27 June 2003. |
74 |
-# Last modified 23 April 2013. |
|
74 |
+# Last modified 8 August 2014. |
|
75 | 75 |
{ |
76 | 76 |
# Data is assumed to be single-channel |
77 | 77 |
MA <- as.matrix(MA) |
78 | 78 |
narrays <- ncol(MA) |
79 |
- if(narrays < 2) stop("Need at least two arrays") |
|
80 |
- if(narrays > 5) |
|
81 |
- x <- apply(MA,1,median,na.rm=TRUE) |
|
82 |
- else |
|
83 |
- x <- rowMeans(MA,na.rm=TRUE) |
|
84 |
- y <- MA[,array]-x |
|
85 |
- w <- NULL |
|
86 |
- if(missing(status)) status <- NULL |
|
79 |
+ if(narrays<2) stop("Need at least two columns") |
|
80 |
+ array <- as.integer(array[1L]) |
|
81 |
+ Ave <- rowMeans(MA[,-array,drop=FALSE],na.rm=TRUE) |
|
82 |
+ y <- MA[,array]-Ave |
|
83 |
+ x <- (MA[,array]+Ave)/2 |
|
87 | 84 |
|
88 |
- if(!is.null(w) && !zero.weights) { |
|
89 |
- i <- is.na(w) | (w <= 0) |
|
90 |
- y[i] <- NA |
|
91 |
- } |
|
92 | 85 |
.plotMAxy(x,y,xlab=xlab,ylab=ylab,main=main,xlim=xlim,ylim=ylim,status=status,values=values,pch=pch,col=col,cex=cex,legend=legend, ...) |
93 | 86 |
} |
94 | 87 |
|
95 |
-.plotMAxy <- function(x, y, xlab="A", ylab="M", main=NULL, xlim=NULL, ylim=NULL, status, values, pch, col, cex, legend=TRUE, ...) |
|
88 |
+# Call this plotWithHighlights and document? |
|
89 |
+ |
|
90 |
+.plotMAxy <- function(x, y, xlab="A", ylab="M", main=NULL, xlim=NULL, ylim=NULL, status=NULL, values=NULL, pch=NULL, col=NULL, cex=NULL, legend=TRUE, pch0=16, col0="black", cex0=0.3, ...) |
|
96 | 91 |
# MA-plot with color coding for controls |
97 | 92 |
# Gordon Smyth 7 April 2003, James Wettenhall 27 June 2003. |
98 |
-# Last modified 23 April 2013. |
|
93 |
+# Last modified 13 April 2014. |
|
99 | 94 |
{ |
100 | 95 |
# Check legend |
101 | 96 |
legend.position <- "topleft" |
... | ... |
@@ -114,65 +109,69 @@ plotMA.default <- function(MA, array=1, xlab="A", ylab="M", main=colnames(MA)[ar |
114 | 109 |
|
115 | 110 |
# If no status information, just plot points normally |
116 | 111 |
if(is.null(status) || all(is.na(status))) { |
117 |
- if(missing(pch)) pch <- 16 |
|
118 |
- if(missing(cex)) cex <- 0.3 |
|
119 |
- points(x,y,pch=pch[[1]],cex=cex[1]) |
|
112 |
+ points(x,y,pch=pch0,cex=cex0) |
|
120 | 113 |
return(invisible()) |
121 | 114 |
} |
122 | 115 |
|
123 |
-# From here, status is not NULL and not all missing |
|
116 |
+# From here, status is not NULL and not all NA |
|
124 | 117 |
|
125 | 118 |
# Check values |
126 |
- if(missing(values)) { |
|
127 |
- if(is.null(attr(status,"values"))) |
|
128 |
- values <- names(sort(table(status),decreasing=TRUE)) |
|
129 |
- else |
|
130 |
- values <- attr(status,"values") |
|
119 |
+# Default is to set the most frequent status value as background, and to highlight all other status values in order of frequency |
|
120 |
+ if(is.null(values)) values <- attr(status,"values") |
|
121 |
+ if(is.null(values)) { |
|
122 |
+ status.values <- names(sort(table(status),decreasing=TRUE)) |
|
123 |
+ status <- as.character(status) |
|
124 |
+ values <- status.values[-1] |
|
131 | 125 |
} |
132 | 126 |
nvalues <- length(values) |
127 |
+ if(nvalues==0L) { |
|
128 |
+ points(x,y,pch=pch0,cex=cex0) |
|
129 |
+ return(invisible()) |
|
130 |
+ } |
|
131 |
+ |
|
132 |
+# From here, values has positive length |
|
133 | 133 |
|
134 | 134 |
# Plot non-highlighted points |
135 |
- sel <- !(status %in% values) |
|
136 |
- nonhi <- any(sel) |
|
137 |
- if(nonhi) points(x[sel],y[sel],pch=16,cex=0.3) |
|
135 |
+ bg <- !(status %in% values) |
|
136 |
+ nonhi <- any(bg) |
|
137 |
+ if(nonhi) points(x[bg],y[bg],pch=pch0,cex=cex0) |
|
138 | 138 |
|
139 |
- if(missing(pch)) { |
|
140 |
- if(is.null(attr(status,"pch"))) |
|
141 |
- pch <- rep(16,nvalues) |
|
142 |
- else |
|
143 |
- pch <- attr(status,"pch") |
|
144 |
- } |
|
139 |
+# Check parameters for plotting highlighted points |
|
145 | 140 |
|
146 |
- if(missing(cex)) { |
|
147 |
- if(is.null(attr(status,"cex"))) { |
|
148 |
- cex <- rep(1,nvalues) |
|
149 |
- if(!nonhi) cex[1] <- 0.3 |
|
150 |
- } else |
|
151 |
- cex <- attr(status,"cex") |
|
152 |
- } |
|
141 |
+ if(is.null(pch)) pch <- attr(status,"pch") |
|
142 |
+ if(is.null(pch)) pch <- pch0 |
|
143 |
+ pch <- rep(pch,length=nvalues) |
|
153 | 144 |
|
154 |
- if(missing(col)) { |
|
155 |
- if(is.null(attr(status,"col"))) { |
|
156 |
- col <- nonhi + 1:nvalues |
|
157 |
- } else |
|
158 |
- col <- attr(status,"col") |
|
159 |
- } |
|
145 |
+ if(is.null(cex)) cex <- attr(status,"cex") |
|
146 |
+ if(is.null(cex)) cex <- 1 |
|
147 |
+ cex <- rep(cex,length=nvalues) |
|
160 | 148 |
|
161 |
- pch <- rep(pch,length=nvalues) |
|
149 |
+ if(is.null(col)) col <- attr(status,"col") |
|
150 |
+ if(is.null(col)) col <- nonhi + 1L:nvalues |
|
162 | 151 |
col <- rep(col,length=nvalues) |
163 |
- cex <- rep(cex,length=nvalues) |
|
164 | 152 |
|
165 |
-# Plot highlighted classes of points |
|
153 |
+# Plot highlighted points |
|
166 | 154 |
for (i in 1:nvalues) { |
167 | 155 |
sel <- status==values[i] |
168 | 156 |
points(x[sel],y[sel],pch=pch[[i]],cex=cex[i],col=col[i]) |
169 | 157 |
} |
170 | 158 |
|
171 | 159 |
if(legend) { |
160 |
+ if(nonhi) { |
|
161 |
+# Include background value in legend |
|
162 |
+ bg.value <- unique(status[bg]) |
|
163 |
+ if(length(bg.value) > 1) bg.value <- "Other" |
|
164 |
+ values <- c(bg.value,values) |
|
165 |
+ pch <- c(pch0,pch) |
|
166 |
+ col <- c(col0,col) |
|
167 |
+ cex <- c(cex0,cex) |
|
168 |
+ } |
|
169 |
+ h <- cex>0.5 |
|
170 |
+ cex[h] <- 0.5+0.8*(cex[h]-0.5) |
|
172 | 171 |
if(is.list(pch)) |
173 |
- legend(legend.position,legend=values,fill=col,col=col,cex=0.9) |
|
172 |
+ legend(legend.position,legend=values,fill=col,col=col,cex=0.9,pt.cex=cex) |
|
174 | 173 |
else |
175 |
- legend(legend.position,legend=values,pch=pch,,col=col,cex=0.9) |
|
174 |
+ legend(legend.position,legend=values,pch=pch,,col=col,cex=0.9,pt.cex=cex) |
|
176 | 175 |
} |
177 | 176 |
invisible() |
178 | 177 |
} |
... | ... |
@@ -3,7 +3,7 @@ |
3 | 3 |
read.ilmn <- function(files=NULL, ctrlfiles=NULL, path=NULL, ctrlpath=NULL, probeid="Probe", annotation=c("TargetID", "SYMBOL"), expr="AVG_Signal", other.columns="Detection",sep="\t", quote="\"", verbose=TRUE, ...) |
4 | 4 |
# Read one or more files of Illumina BeadStudio output |
5 | 5 |
# Wei Shi and Gordon Smyth. |
6 |
-# Created 15 July 2009. Last modified 27 November 2013. |
|
6 |
+# Created 15 July 2009. Last modified 21 July 2014. |
|
7 | 7 |
{ |
8 | 8 |
if(!is.null(files)){ |
9 | 9 |
f <- unique(files) |
... | ... |
@@ -34,12 +34,22 @@ read.ilmn <- function(files=NULL, ctrlfiles=NULL, path=NULL, ctrlpath=NULL, prob |
34 | 34 |
else |
35 | 35 |
elist.ctrl <- cbind(elist.ctrl, elist.ctrl1) |
36 | 36 |
} |
37 |
- elist.ctrl$genes$Status <- elist.ctrl$genes[,ncol(elist.ctrl$genes)] |
|
37 |
+ |
|
38 |
+ if(is.null(elist.ctrl$genes)) elist.ctrl$genes <- data.frame(Status = rep("negative", nrow(elist.ctrl))) |
|
39 |
+ else { |
|
40 |
+ ctrl.status.negative <- apply(elist.ctrl$genes, 2, function(x) sum(tolower(x) %in% "negative")) |
|
41 |
+ STATUS.col <- which.max(ctrl.status.negative) |
|
42 |
+ if(ctrl.status.negative[STATUS.col] > 0) elist.ctrl$genes$Status <- elist.ctrl$genes[[STATUS.col]] |
|
43 |
+ else elist.ctrl$genes$Status <- "negative" |
|
44 |
+ } |
|
38 | 45 |
} |
39 | 46 |
|
40 | 47 |
if(!is.null(files)) |
41 | 48 |
if(!is.null(ctrlfiles)){ |
42 |
- colnames(elist.ctrl$genes) <- colnames(elist$genes) |
|
49 |
+ REG.col <- setdiff(colnames(elist$genes), colnames(elist.ctrl$genes)) |
|
50 |
+ if(length(REG.col)) for(i in REG.col) elist.ctrl$genes[[i]] <- NA |
|
51 |
+ CTRL.col <- setdiff(colnames(elist.ctrl$genes), colnames(elist$genes)) |
|
52 |
+ if(length(CTRL.col)) for(i in CTRL.col) elist$genes[[i]] <- NA |
|
43 | 53 |
return(rbind(elist, elist.ctrl)) |
44 | 54 |
} |
45 | 55 |
else |
... | ... |
@@ -64,7 +74,7 @@ read.ilmn.targets <- function(targets, ...) |
64 | 74 |
.read.oneilmnfile <- function(fname, probeid, annotation, expr, other.columns, sep, quote, verbose, ...) |
65 | 75 |
# Read a single file of Illumina BeadStudio output |
66 | 76 |
# Wei Shi and Gordon Smyth |
67 |
-# Created 15 July 2009. Last modified 16 June 2014. |
|
77 |
+# Created 15 July 2009. Last modified 21 July 2014. |
|
68 | 78 |
{ |
69 | 79 |
h <- readGenericHeader(fname,columns=expr,sep=sep) |
70 | 80 |
skip <- h$NHeaderRecords |
... | ... |
@@ -101,7 +111,7 @@ read.ilmn.targets <- function(targets, ...) |
101 | 111 |
# Add probe annotation |
102 | 112 |
if(length(anncol)) { |
103 | 113 |
elist$genes <- x[,anncol,drop=FALSE] |
104 |
- if(!any(duplicated(pids))) row.names(elist$genes) <- pids |
|
114 |
+ if(length(pids) & !any(duplicated(pids))) row.names(elist$genes) <- pids |
|
105 | 115 |
} |
106 | 116 |
|
107 | 117 |
# elist$targets <- data.frame(SampleNames=snames, stringsAsFactors=FALSE) |
... | ... |
@@ -1,3 +1,28 @@ |
1 |
+ 8 Aug 2014: limma 3.21.12 |
|
2 |
+ |
|
3 |
+- The definition of the M and A axes for an MA-plot of single channel |
|
4 |
+ data is changed slightly. Previously the A-axis was the average of |
|
5 |
+ all arrays in the dataset -- this has been definition since MA-plots |
|
6 |
+ were introduced for single channel data in April 2003. Now an |
|
7 |
+ artificial array is formed by averaging all arrays other than the |
|
8 |
+ one to be plotted. Then a mean-difference plot is formed from the |
|
9 |
+ specified array and the artificial array. This change ensures the |
|
10 |
+ specified and artificial arrays are computed from independent data, |
|
11 |
+ and ensures the MA-plot will reduce to a correct mean-difference |
|
12 |
+ plot when there are just two arrays in the dataset. |
|
13 |
+ |
|
14 |
+ 7 Aug 2014: limma 3.21.11 |
|
15 |
+ |
|
16 |
+- diffSplice() now includes Simes adjusted p-values |
|
17 |
+ |
|
18 |
+- topSplice() has a new level="hybrid" argument for ranking genes by |
|
19 |
+ Simes adjusted p-values. |
|
20 |
+ |
|
21 |
+- goana() is now an S3 generic function. |
|
22 |
+ |
|
23 |
+- goana() can now optionally adjust for gene length or abundance |
|
24 |
+ bias. |
|
25 |
+ |
|
1 | 26 |
27 June 2014: limma 3.21.10 |
2 | 27 |
|
3 | 28 |
- remove col argument from plotMDS(), as it handled by ... as are |
... | ... |
@@ -3448,6 +3473,10 @@ Apr 25 2003: limma 0.9.7 |
3448 | 3473 |
The smawehi package was renamed to limma, with the title "Linear Models |
3449 | 3474 |
for Microarray Data" and became part of the Bioconductor project. |
3450 | 3475 |
|
3476 |
+Apr 7, 2003 |
|
3477 |
+ |
|
3478 |
+MA plots added for both two-color and single channel data. |
|
3479 |
+ |
|
3451 | 3480 |
11 November 2002: smawehi 0.1 |
3452 | 3481 |
|
3453 | 3482 |
smawehi package made publicly available for the first time, through |
... | ... |
@@ -30,10 +30,10 @@ The method is explained briefly in Majewski et al (2010) and in full detail in P |
30 | 30 |
The \code{subset} argument specifies whether and how the fit object should be subsetted. |
31 | 31 |
Ideally, only genes that are truly differentially expressed for one or both of the contrasts should be used estimate the biological correlation. |
32 | 32 |
The default is \code{"all"}, which uses all genes in the fit object to estimate the biological correlation. |
33 |
-The option \code{"Fpval"} chooses genes based on how many F-test p-values are estimated to be truly significant using the method \code{propNotDE}. |
|
33 |
+The option \code{"Fpval"} chooses genes based on how many F-test p-values are estimated to be truly significant using the function \code{propTrueNull}. |
|
34 | 34 |
This should capture genes that display any evidence of differential expression in either of the two contrasts. |
35 | 35 |
The options \code{"p.union"} and \code{"p.int"} are based on the moderated t p-values from both contrasts. |
36 |
-From the \code{propNotDE} method an estimate of the number of p-values truly significant in either of the two contrasts can be obtained. |
|
36 |
+From the \code{propTrueNull} function an estimate of the number of p-values truly significant in either of the two contrasts can be obtained. |
|
37 | 37 |
"p.union" takes the union of these genes and \code{"p.int"} takes the intersection of these genes. |
38 | 38 |
The other options, \code{"logFC"} and \code{"predFC"} subsets on genes that attain a logFC or predFC at least as large as the 90th percentile of the log fold changes or predictive log fold changes on the absolute scale. |
39 | 39 |
|
... | ... |
@@ -1,48 +1,97 @@ |
1 | 1 |
\name{goana} |
2 | 2 |
\alias{goana} |
3 |
+\alias{goana.default} |
|
4 |
+\alias{goana.MArrayLM} |
|
3 | 5 |
\title{Gene Ontology Analysis of Differentially Expressed Genes} |
4 | 6 |
\description{ |
5 |
-Hypergeometric tests on up and down differentially expressed genes for over-representation of GO Terms. |
|
7 |
+Fisher's exact test or Wallenius' noncentral hypergeometric test on differentially expressed genes for over-representation of gene ontology (GO) terms. |
|
6 | 8 |
} |
7 | 9 |
\usage{ |
8 |
-goana(fit, coef = ncol(fit), geneid = "GeneID", FDR = 0.05, species = "Hs") |
|
10 |
+\method{goana}{default}(de, universe = NULL, species = "Hs", weights = NULL, \dots) |
|
11 |
+\method{goana}{MArrayLM}(de, coef = ncol(de), geneid = rownames(de), FDR = 0.05, |
|
12 |
+ species = "Hs", trend = FALSE, \dots) |
|
9 | 13 |
} |
10 | 14 |
\arguments{ |
11 |
- \item{fit}{should be an object of class \code{MArrayLM} as produced by \code{lmFit} and \code{eBayes}.} |
|
15 |
+ \item{de}{Differentially expressed genes. Can be a \code{MArrayLM} fit object, or a vector or list of Entrez Gene IDs of differentially expressed genes.} |
|
16 |
+ \item{universe}{vector specifying the Entrez Gene identifiers of the universe. If \code{NULL}, all Entrez Gene IDs with any gene ontology annotation will be the universe.} |
|
17 |
+ \item{species}{character string specifying the species. Possible values are \code{"Hs"}, \code{"Mm"}, \code{"Rn"} or \code{"Dm"}.} |
|
18 |
+ \item{weights}{probability weighting of the universe.} |
|
12 | 19 |
\item{coef}{column number or column name specifying which coefficient or contrast of the linear model is of interest.} |
13 |
- \item{geneid}{Entrez gene identifiers. Either a vector of length nrow(fit) or the name of the column of fit$genes containing the Entrez Gene IDs.} |
|
20 |
+ \item{geneid}{Entrez Gene identifiers. Either a vector of length \code{nrow(de)} or the name of the column of \code{de$genes} containing the Entrez Gene IDs.} |
|
14 | 21 |
\item{FDR}{numeric. False discovery rate of differentially expressed genes less than this cutoff.} |
15 |
- \item{species}{character string specifying the species. Possible values are \code{"Hs"}, \code{"Mm"}, \code{"Rn"} or \code{"Dm"}.} |
|
22 |
+ \item{trend}{adjust analysis for gene length or abundance? |
|
23 |
+ Can be logical, or a numeric vector of covariate values, or the name of the column of \code{de$genes} containing the covariate values. |
|
24 |
+ If \code{TRUE}, then \code{de$Amean} is used as the covariate.} |
|
25 |
+ \item{\dots}{further arguments.} |
|
16 | 26 |
} |
17 | 27 |
\details{ |
18 |
-This function is used in conjunction with \code{\link{lmFit}}, \code{\link{eBayes}} and \code{\link{topGO}}. |
|
28 |
+Performs a Gene Ontology enrichment analysis for one for more gene lists using the appropriate Bioconductor organism package. |
|
29 |
+The gene lists must be supplied as Entrez Gene IDs. |
|
30 |
+ |
|
31 |
+If applied to a linear model fit, then the differentially expressed genes are extracted automatically from the fit object, and analyses are done separately for the up and down significant genes. |
|
32 |
+ |
|
33 |
+If \code{trend=FALSE}, the function computes one-sided hypergeometric tests. |
|
34 |
+If \code{trend=TRUE} or a covariate is supplied, then a trend is fitted to the differential expression results and the method of Young et al (2010) is used to adjust for this trend. |
|
35 |
+The adjusted test uses Wallenius' noncentral hypergeometric distribution. |
|
19 | 36 |
} |
20 | 37 |
\value{ |
21 |
- A dataframe with rows for GO IDs and the following columns: |
|
38 |
+ A dataframe with rows for GO IDs and the following possible columns: |
|
22 | 39 |
\item{Term}{GO terms.} |
23 | 40 |
\item{Ont}{GO term ontology from BP, CC and MF.} |
24 | 41 |
\item{N}{Number of genes in the GO term.} |
25 | 42 |
\item{Up}{Number of up regulated differentially expressed genes for testing.} |
26 | 43 |
\item{Down}{Number of down regulated differentially expressed genes for testing.} |
27 |
- \item{P.Up}{P value from hypergeometric test of up regulated differentially expressed genes for over representation of GO terms.} |
|
28 |
- \item{P.Down}{P value from hypergeometric test of down regulated differentially expressed genes for over representation of GO terms.} |
|
44 |
+ \item{DE1}{Number of differentially expressed genes for testing.} |
|
45 |
+ \item{P.Up}{P value from hypergeometric test or Wallenius' noncentral hypergeometric test of up regulated differentially expressed genes for over representation of GO terms.} |
|
46 |
+ \item{P.Down}{P value from hypergeometric test or Wallenius' noncentral hypergeometric test of down regulated differentially expressed genes for over representation of GO terms.} |
|
47 |
+ \item{P.DE1}{P value from hypergeometric test or Wallenius' noncentral hypergeometric test of differentially expressed genes for over representation of GO terms.} |
|
48 |
+} |
|
49 |
+ |
|
50 |
+\references{ |
|
51 |
+ Young, M. D., Wakefield, M. J., Smyth, G. K., Oshlack, A. (2010). |
|
52 |
+ Gene ontology analysis for RNA-seq: accounting for selection bias. |
|
53 |
+ \emph{Genome Biology} 11, R14. |
|
54 |
+ \url{http://genomebiology.com/2010/11/2/R14} |
|
29 | 55 |
} |
30 | 56 |
|
31 | 57 |
\seealso{ |
32 | 58 |
\code{\link{topGO}} |
33 | 59 |
|
34 |
-The gostats package also does GO analyses with some extra options. |
|
60 |
+The goseq package implements a similar GO analysis. |
|
61 |
+The goseq version will work with a variety of gene identifiers, not only Entrez Gene as here, and includes a database of gene length information for various species. |
|
62 |
+ |
|
63 |
+The gostats package also does GO analyses with some different options. |
|
35 | 64 |
} |
36 | 65 |
\author{Gordon Smyth and Yifang Hu} |
37 | 66 |
\examples{ |
38 | 67 |
|
39 | 68 |
\dontrun{ |
40 | 69 |
|
41 |
-fit <- lmFit(y,design) |
|
70 |
+fit <- lmFit(y, design) |
|
42 | 71 |
fit <- eBayes(fit) |
43 |
-go.results <- goana(fit) |
|
44 |
-topGO(go.results, sort = "up") |
|
45 |
-topGO(go.results, sort = "down") |
|
72 |
+ |
|
73 |
+# goana without adjusting for gene length bias |
|
74 |
+go.fisher <- goana(fit) |
|
75 |
+topGO(go.fisher, sort = "up") |
|
76 |
+topGO(go.fisher, sort = "down") |
|
77 |
+ |
|
78 |
+# goana adjusting for gene length bias |
|
79 |
+go.len <- goana(fit, geneid = "GeneID", trend = "Length") |
|
80 |
+topGO(go.len, sort = "up") |
|
81 |
+topGO(go.len, sort = "down") |
|
82 |
+ |
|
83 |
+# goana adjusting for gene abundance |
|
84 |
+go.abund <- goana(fit, geneid = "GeneID", trend = TRUE) |
|
85 |
+topGO(go.abund, sort = "up") |
|
86 |
+topGO(go.abund, sort = "down") |
|
87 |
+ |
|
88 |
+# goana.default |
|
89 |
+go.de <- goana(list(DE1 = EG.DE1, DE2 = EG.DE2, DE3 = EG.DE3)) |
|
90 |
+topGO(go.de, sort = "DE1") |
|
91 |
+topGO(go.de, sort = "DE2") |
|
92 |
+topGO(go.de, ontology = "BP", sort = "DE3") |
|
93 |
+topGO(go.de, ontology = "CC", sort = "DE3") |
|
94 |
+topGO(go.de, ontology = "MF", sort = "DE3") |
|
46 | 95 |
|
47 | 96 |
} |
48 | 97 |
|
... | ... |
@@ -1,22 +1,24 @@ |
1 |
-\title{Plot exons on differentially spliced gene} |
|
1 |
+\title{Differential splicing plot} |
|
2 | 2 |
\name{plotSplice} |
3 | 3 |
\alias{plotSplice} |
4 | 4 |
\description{ |
5 |
-Plot exons of differentially spliced gene. |
|
5 |
+Plot relative log-fold changes by exons for the specified gene. |
|
6 | 6 |
} |
7 | 7 |
\usage{ |
8 |
-plotSplice(fit, coef=ncol(fit), geneid=NULL, rank=1L, FDR = 0.05) |
|
8 |
+plotSplice(fit, coef=ncol(fit), geneid=NULL, genecolname=NULL, rank=1L, FDR = 0.05) |
|
9 | 9 |
} |
10 | 10 |
\arguments{ |
11 | 11 |
\item{fit}{\code{MArrayLM} fit object produced by \code{diffSplice}.} |
12 | 12 |
\item{coef}{the coefficient (column) of fit for which differentially splicing is assessed.} |
13 | 13 |
\item{geneid}{character string, ID of the gene to plot.} |
14 |
+ \item{genecolname}{column name of \code{fit$genes} containing gene IDs. Defaults to \code{fit$genecolname}.} |
|
14 | 15 |
\item{rank}{integer, if \code{geneid=NULL} then this ranked gene will be plotted.} |
15 | 16 |
\item{FDR}{numeric, mark exons with false discovery rate less than this cutoff.} |
16 | 17 |
} |
17 | 18 |
|
18 | 19 |
\details{ |
19 |
-Plots interaction log2-fold-change by exon for the specified gene. |
|
20 |
+Plot relative log2-fold-changes by exon for the specified gene. |
|
21 |
+The relative logFC is the difference between the exon's logFC and the overall logFC for the gene, as computed by \code{diffSplice}. |
|
20 | 22 |
} |
21 | 23 |
|
22 | 24 |
\value{A plot is created on the current graphics device.} |
... | ... |
@@ -27,3 +29,4 @@ Plots interaction log2-fold-change by exon for the specified gene. |
27 | 29 |
An overview of diagnostic functions available in LIMMA is given in \link{09.Diagnostics}. |
28 | 30 |
} |
29 | 31 |
\examples{# See diffSplice} |
32 |
+\keyword{rna-seq} |
... | ... |
@@ -10,16 +10,15 @@ |
10 | 10 |
Creates an MA-plot with color coding for control spots. |
11 | 11 |
} |
12 | 12 |
\usage{ |
13 |
-\method{plotMA}{default}(MA, array = 1, xlab = "A", ylab = "M", main = colnames(MA)[array], |
|
14 |
- xlim = NULL, ylim = NULL, status, values, pch, col, cex, legend = TRUE, |
|
15 |
- zero.weights = FALSE, ...) |
|
16 |
-\method{plotMA}{MArrayLM}(MA, coef = ncol(MA), xlab = "AveExpr", ylab = "logFC", |
|
17 |
- main = colnames(MA)[coef], xlim = NULL, ylim = NULL, status, values, pch, col, |
|
18 |
- cex, legend = TRUE, zero.weights = FALSE, ...) |
|
13 |
+\method{plotMA}{default}(MA, array=1, xlab="A", ylab="M", main=colnames(MA)[array], xlim=NULL, |
|
14 |
+ ylim=NULL, status=NULL, values=NULL, pch=NULL, col=NULL, cex=NULL, legend=TRUE, \dots) |
|
15 |
+\method{plotMA}{MArrayLM}(MA, coef=ncol(MA), xlab="AveExpr", ylab="logFC", |
|
16 |
+ main=colnames(MA)[coef], xlim=NULL, ylim=NULL, status=NULL, values=NULL, pch=NULL, |
|
17 |
+ col=NULL, cex=NULL, legend=TRUE, zero.weights=FALSE, \dots) |
|
19 | 18 |
} |
20 | 19 |
\arguments{ |
21 |
- \item{MA}{an \code{RGList}, \code{MAList}, \code{EList} or \code{MArrayLM} object. |
|
22 |
- Alternatively a \code{matrix} or \code{ExpressionSet} object.} |
|
20 |
+ \item{MA}{an \code{RGList}, \code{MAList}, \code{EList}, \code{ExpressionSet} or \code{MArrayLM} object. |
|
21 |
+ Alternatively a numeric \code{matrix}.} |
|
23 | 22 |
\item{array}{integer giving the array to be plotted.} |
24 | 23 |
\item{coef}{integer giving the linear model coefficient to be plotted.} |
25 | 24 |
\item{xlab}{character string giving label for x-axis} |
... | ... |
@@ -28,7 +27,7 @@ Creates an MA-plot with color coding for control spots. |
28 | 27 |
\item{xlim}{numeric vector of length 2 giving limits for x-axis, defaults to min and max of the data} |
29 | 28 |
\item{ylim}{numeric vector of length 2 giving limits for y-axis, defaults to min and max of the data} |
30 | 29 |
\item{status}{character vector giving the control status of each spot on the array, of same length as the number of rows of \code{MA$M}. |
31 |
- If omitted, all points are plotted in the default color, symbol and size.} |
|
30 |
+ If omitted, all points are plotted in the default color (\code{"black"}), symbol (\code{pch=16}) and size (\code{cex=0.3}).} |
|
32 | 31 |
\item{values}{character vector giving values of \code{status} to be highlighted on the plot. Defaults to unique values of \code{status}. |
33 | 32 |
Ignored if there is no \code{status} vector.} |
34 | 33 |
\item{pch}{vector or list of plotting characters. Default is integer code 16 which gives a solid circle. |
... | ... |
@@ -36,25 +35,27 @@ Creates an MA-plot with color coding for control spots. |
36 | 35 |
\item{col}{numeric or character vector of colors, of the same length as \code{values}. Defaults to \code{1:length(values)}. |
37 | 36 |
Ignored if there is no \code{status} vector.} |
38 | 37 |
\item{cex}{numeric vector of plot symbol expansions, of the the same length as \code{values}. |
39 |
- Defaults to 0.3 for the most common status value and 1 for the others. |
|
38 |
+ Defaults is 1. |
|
40 | 39 |
Ignored if there is no \code{status} vector.} |
41 | 40 |
\item{legend}{logical, should a legend of plotting symbols and colors be included. Can also be a character string giving position to place legend. Ignored if there is no \code{status} vector.} |
42 | 41 |
\item{zero.weights}{logical, should spots with zero or negative weights be plotted?} |
43 |
- \item{...}{any other arguments are passed to \code{plot}} |
|
42 |
+ \item{\dots}{any other arguments are passed to \code{plot}} |
|
44 | 43 |
} |
45 | 44 |
|
46 | 45 |
\details{ |
47 | 46 |
An MA-plot is a plot of log-intensity ratios (M-values) versus log-intensity averages (A-values). |
48 |
-If \code{MA} is an \code{RGList} or \code{MAList} then this function produces an ordinary within-array MA-plot. |
|
49 |
-If \code{MA} is an \code{MArrayLM} object, then the plot is an fitted model MA-plot in which the estimated coefficient is on the y-axis and the average A-value is on the x-axis. |
|
47 |
+For two color data objects, a within-array MA-plot is produced with the M and A values computed from the two channels for the specified array. |
|
48 |
+This is the same as a mean-difference plot (\code{\link{mdplot}}) with the red and green log2-intensities of the array providing the two columns. |
|
49 |
+ |
|
50 |
+For single channel data objects, then a between-array MA-plot is produced. |
|
51 |
+An articifial array is produced by averaging all the arrays other than the array specified. |
|
52 |
+A mean-difference plot is then producing from the specified array and the artificial array. |
|
53 |
+Note that this procedure reduces to an ordinary mean-difference plot when there are just two arrays total. |
|
50 | 54 |
|
51 |
-If \code{MA} is a \code{matrix} or \code{ExpressionSet} object, then this function produces a between-array MA-plot. |
|
52 |
-In this case the A-values in the plot are the average log-intensities across the arrays and the M-values are the deviations of the log-intensities for the specified array from the average. |
|
53 |
-If there are more than five arays, then the average is computed robustly using medians. |
|
54 |
-With five or fewer arrays, it is computed by means. |
|
55 |
+If \code{MA} is an \code{MArrayLM} object, then the plot is an fitted model MA-plot in which the estimated coefficient is on the y-axis and the average A-value is on the x-axis. |
|
55 | 56 |
|
56 | 57 |
The \code{status} vector is intended to specify the control status of each spot, for example "gene", "ratio control", "house keeping gene", "buffer" and so on. |
57 |
-The vector is usually computed using the function \code{\link{controlStatus}} and a spot-types file. |
|
58 |
+The vector is often computed using the function \code{\link{controlStatus}} and a spot-types file. |
|
58 | 59 |
However the function may be used to highlight any subset of spots. |
59 | 60 |
|
60 | 61 |
The \code{status} can be included as the component \code{MA$genes$Status} instead of being passed as an argument to \code{plotMA}. |
... | ... |
@@ -77,8 +78,7 @@ status[4:6] <- "M=3" |
77 | 78 |
MA$M[4:6] <- 3 |
78 | 79 |
status[7:9] <- "M=-3" |
79 | 80 |
MA$M[7:9] <- -3 |
80 |
-plotMA(MA,main="MA-Plot with Simulated Data", |
|
81 |
- status=status,values=c("M=0","M=3","M=-3"),col=c("blue","red","green")) |
|
81 |
+plotMA(MA,main="MA-Plot with Simulated Data",status=status,values=c("M=0","M=3","M=-3"),col=c("blue","red","green")) |
|
82 | 82 |
|
83 | 83 |
# Same as above |
84 | 84 |
attr(status,"values") <- c("M=0","M=3","M=-3") |
... | ... |
@@ -5,18 +5,27 @@ |
5 | 5 |
Top table ranking the most differentially spliced genes or exons. |
6 | 6 |
} |
7 | 7 |
\usage{ |
8 |
-topSplice(fit, coef=ncol(fit), level="exon", number=10, FDR=1) |
|
8 |
+topSplice(fit, coef=ncol(fit), level="hybrid", number=10, FDR=1) |
|
9 | 9 |
} |
10 | 10 |
\arguments{ |
11 | 11 |
\item{fit}{\code{MArrayLM} fit object produced by \code{diffSplice}.} |
12 | 12 |
\item{coef}{the coefficient (column) of fit for which differentially splicing is assessed.} |
13 |
- \item{level}{character string, should the table be by \code{"exon"} or by \code{"gene"}.} |
|
13 |
+ \item{level}{character string, possible values are \code{"gene"}, \code{"exon} or \code{"hybrid}. |
|
14 |
+ \code{"gene"} gives F-tests for each gene. |
|
15 |
+ \code{"exon"} gives t-tests for each exon. |
|
16 |
+ \code{"hybrid"} gives gene level results with p-values derived from the exon-level tests.} |
|
14 | 17 |
\item{number}{integer, maximum number of rows to output.} |
15 | 18 |
\item{FDR}{numeric, only show exons or genes with false discovery rate less than this cutoff.} |
16 | 19 |
} |
17 | 20 |
|
18 | 21 |
\details{ |
19 |
-Ranks genes by the Plots interaction log2-fold-change by exon for the specified gene. |
|
22 |
+Ranks genes or exons by evidence for differential splicing. |
|
23 |
+The gene-level test is an F-test for any differences in exon usage between experimental conditions. |
|
24 |
+The exon-level tests are t-tests for differences between each exon and all other exons for the same gene. |
|
25 |
+ |
|
26 |
+The hybrid testing method processes the exon-level p-values to give an overall call of differential splicing for each gene. |
|
27 |
+It returns the minimum Simes-adjusted p-values for each gene. |
|
28 |
+It is likely to be more powerful than the genewise F-tests if only a minority of exons for a gene are differentially spliced. |
|
20 | 29 |
} |
21 | 30 |
|
22 | 31 |
\value{A data.frame with any annotation columns found in \code{fit} plus the following columns |