Browse code

29 Oct 2018: limma 3.37.11

- New function plotExonJunc() to plot results from diffSplice() when
exon-exon junctions as well as exons are included in the count
matrix.

- Update NEWS.Rd for Bioconductor 3.8 Release.

Gordon Smyth authored on 29/10/2018 09:06:35
Showing 6 changed files

... ...
@@ -1,6 +1,6 @@
1 1
 Package: limma
2
-Version: 3.37.10
3
-Date: 2018-10-15
2
+Version: 3.37.11
3
+Date: 2018-10-29
4 4
 Title: Linear Models for Microarray Data
5 5
 Description: Data analysis, linear models and differential expression for microarray data.
6 6
 Author: Gordon Smyth [cre,aut], Yifang Hu [ctb], Matthew Ritchie [ctb], Jeremy Silver [ctb], James Wettenhall [ctb], Davis McCarthy [ctb], Di Wu [ctb], Wei Shi [ctb], Belinda Phipson [ctb], Aaron Lun [ctb], Natalie Thorne [ctb], Alicia Oshlack [ctb], Carolyn de Graaf [ctb], Yunshun Chen [ctb], Mette Langaas [ctb], Egil Ferkingstad [ctb], Marcus Davy [ctb], Francois Pepin [ctb], Dongseok Choi [ctb]
... ...
@@ -8,7 +8,7 @@ import(methods)
8 8
 importFrom("grDevices", "col2rgb", "dev.cur", "dev.off", "png", "rgb")
9 9
 importFrom("graphics", "abline", "axis", "barplot", "coplot", "image",
10 10
            "legend", "lines", "matplot", "mtext", "panel.smooth",
11
-           "par", "plot", "plot.new", "points", "polygon", "rect",
11
+           "par", "plot", "plot.new", "plot.window", "points", "polygon", "rect",
12 12
            "segments", "text", "title")
13 13
 importFrom("stats", "approx", "approxfun", "as.dendrogram", "as.dist", "cmdscale",
14 14
            "coef", "contr.sum", "contrasts<-", "cov", "cov2cor",
15 15
new file mode 100644
... ...
@@ -0,0 +1,173 @@
1
+plotExonJunc <- function(fit, coef=ncol(fit), geneid, genecolname=NULL, FDR=0.05, annotation=NULL)
2
+#	Assuming the data for diffSplice analysis contains both exons and junctions,
3
+#	'fit' is a MArrayLM object produced by diffSplice().
4
+#	To distinguish between exons and junctions, 'fit$genes$Length' are set to 1 for all the junctions.
5
+#	Since the diffSplice analysis is usually performed after filtering,
6
+#	the full annotation (e.g. the inbuilt annotation used by featureCounts)
7
+#	is required for producing the plot.
8
+#	Yunshun Chen and Gordon Smyth.
9
+#	Created 9 March 2018. Last modified 29 Oct 2018.
10
+{
11
+	if(is.null(genecolname)) 
12
+		genecolname <- fit$genecolname
13
+	else
14
+		genecolname <- as.character(genecolname)
15
+
16
+	geneid <- as.character(geneid)
17
+	i <- fit$genes[, genecolname]==geneid
18
+	if(!any(i)) stop(paste0(geneid, " not found."))
19
+
20
+#	Subsetting
21
+	fdr <- p.adjust(fit$p.value[,coef], method="BH")
22
+	fdr <- fdr[i]
23
+	genes <- fit$genes[i,]
24
+	strand <- genes$Strand[1]
25
+	p.value <- fit$p.value[i,coef]
26
+	coefficients <- fit$coefficients[i,coef]
27
+
28
+#	Sorting	exons and junctions by their start positions
29
+	o <- order(genes$Start, genes$End)
30
+	genes <- genes[o,]
31
+	p.value <- p.value[o]
32
+	coefficients <- coefficients[o]
33
+	
34
+#	Which ones are exons? (Junctions are assigned length of 1 prior to the diffSplice analysis)
35
+	IsExon <- genes$Length > 1L
36
+	genes.e <- genes[IsExon, ]
37
+
38
+#	Check the format of the annotation file.
39
+	if(!is.null(annotation)){
40
+		if(is.null(annotation$GeneID)) stop("Annotation file must contain Entrez gene ids.")
41
+		if(is.null(annotation$Start) | is.null(annotation$End)) stop("Annotation file must contain start-end positions of exons.")
42
+		if(is.null(annotation$Length)) annotation$Length <- abs(annotation$End - annotation$Start) + 1L
43
+
44
+#		Retrieve annotation information for the exons that have been filtered prior to the diffSplice analysis
45
+		sel <- annotation$GeneID == genes$GeneID[1]
46
+		genes.e2 <- annotation[sel, ]
47
+		genes.e2 <- genes.e2[order(genes.e2$Start), ]
48
+		m <- match(genes.e$Start, genes.e2$Start)
49
+		genes.e <- genes.e2
50
+	}
51
+
52
+#	Get the start-end positions and the length for all the introns
53
+	Start.i <- genes.e$End[-nrow(genes.e)] + 1L
54
+	End.i <- genes.e$Start[-1] - 1L
55
+	genes.i <- genes.e[-1,]
56
+	genes.i$Start <- Start.i
57
+	genes.i$End <- End.i
58
+	genes.i$Length <- genes.i$End - genes.i$Start + 1L
59
+
60
+#	Get the start-end positions for all the junctions
61
+	if(any(!IsExon)){
62
+		genes.j <- genes[!IsExon, ]
63
+		
64
+#		Extend the plotting range for the gene in case there are junctions outside of the gene range.
65
+		if(min(genes.j$Start) < min(genes.e$Start)){
66
+			intron <- genes.i[1,,drop=FALSE]
67
+			intron$Start <- min(genes.j$Start)
68
+			intron$End <- min(genes.e$Start) - 1L
69
+			intron$Length <- intron$End - intron$Start + 1L
70
+			genes.i <- rbind(intron, genes.i)
71
+		}
72
+		if(max(genes.j$End) > max(genes.e$End)){
73
+			intron <- genes.i[1,,drop=FALSE]
74
+			intron$Start <- max(genes.e$End) + 1L
75
+			intron$End <- max(genes.j$End)
76
+			intron$Length <- intron$End - intron$Start + 1L
77
+			genes.i <- rbind(genes.i, intron)
78
+		}
79
+	}
80
+
81
+#	Combine introns and exons for plotting
82
+	genes.ie <- rbind(cbind(genes.e, Flag="Exon"), cbind(genes.i, Flag="Intron"))
83
+	genes.ie <- genes.ie[order(genes.ie$Start), ]
84
+
85
+#	Scale the length of intron/exon segments for better visualization
86
+	pseudo.length <- (genes.ie$Length)^.5
87
+	pseudo.pos <- cumsum((genes.ie$Length)^.5)
88
+	pseudo.start <- c(0, pseudo.pos[-nrow(genes.ie)])
89
+	pseudo.end <- pseudo.pos
90
+	genes.ie <- cbind(genes.ie, pseudo.start=pseudo.start, pseudo.end=pseudo.end, pseudo.length=pseudo.length)
91
+
92
+#	Update start-end postions for junctions on the pseudo scale
93
+	if(any(!IsExon)){
94
+		genes.j <- cbind(genes.j, pseudo.start=0, pseudo.end=0)
95
+		for(j in 1:nrow(genes.j)){
96
+			k <- which(genes.j$Start[j] <= genes.ie$End)[1]
97
+			genes.j$pseudo.start[j] <- genes.ie$pseudo.end[k] - (genes.ie$End[k] - genes.j$Start[j]) / genes.ie$Length[k] * genes.ie$pseudo.length[k]
98
+			k <- which(genes.j$End[j] <= genes.ie$End)[1]
99
+			genes.j$pseudo.end[j] <- genes.ie$pseudo.end[k] - (genes.ie$End[k] - genes.j$End[j]) / genes.ie$Length[k] * genes.ie$pseudo.length[k]
100
+		}
101
+	}
102
+
103
+#	Setup the plot
104
+	GeneStart <- min(genes.ie$pseudo.start)
105
+	GeneEnd <- max(genes.ie$pseudo.end)
106
+	gene.length <- GeneEnd - GeneStart
107
+	plot.new()
108
+	plot.window(xlim=c(GeneStart, GeneEnd), ylim=c(-0.7, 0.7))
109
+	title(main=paste0(geneid, " (", genes$Strand[1], ")"))
110
+
111
+#	Plot gene range
112
+	rect(xleft=GeneStart, xright=GeneEnd, ybottom=-0.02, ytop=0.02, col="gray", border="gray")
113
+	if(strand=="+"){
114
+		tx.left <- "5'"
115
+		tx.right <- "3'"
116
+	} else {
117
+		tx.left <- "3'"
118
+		tx.right <- "5'"
119
+	}
120
+	text(x=-0.02*gene.length, y=0.1, labels=tx.left)
121
+	text(x=1.02*gene.length, y=0.1, labels=tx.right)
122
+	
123
+#	Direction and significance of the diffSplice results
124
+	up <- coefficients > 0
125
+	down <- coefficients < 0
126
+	IsSig <- fdr < FDR
127
+	#IsSig <- p.adjust(p.value, method="holm") < FDR
128
+	IsSig.j <- IsSig[!IsExon]
129
+	down.j <- down[!IsExon]
130
+	
131
+#	Colouring
132
+	col <- rep("black", sum(i))
133
+	col[up & IsSig] <- "red"
134
+	col[down & IsSig] <- "dodgerblue"
135
+	col.e <- col[IsExon]
136
+	col.j <- col[!IsExon]
137
+	
138
+#	Filtered exons are coloured in grey
139
+	if(!is.null(annotation)){
140
+		col.e2 <- rep("grey", nrow(genes.e))
141
+		col.e2[m] <- col.e
142
+		col.e <- col.e2
143
+	}
144
+
145
+#	Plot exons
146
+	ex <- genes.ie$Flag=="Exon"
147
+	rect(xleft=genes.ie$pseudo.start[ex], xright=genes.ie$pseudo.end[ex], ybottom=-0.1,ytop=0.1, col=col.e, border=col.e)
148
+
149
+#	Plot junctions
150
+	if(any(!IsExon)){
151
+		MidPoint <- (genes.j$pseudo.start + genes.j$pseudo.end)/2
152
+		y0 <- rep(0.11, sum(!IsExon))
153
+		y1 <- rep(0.4, sum(!IsExon))
154
+		y1[IsSig.j] <- 0.6
155
+		y0[down.j] <- -y0[down.j]
156
+		y1[down.j] <- -y1[down.j]
157
+		segments(x0=genes.j$pseudo.start, x1=MidPoint, y0=y0, y1=y1, col=col.j, lwd=2)
158
+		segments(x0=MidPoint, x1=genes.j$pseudo.end, y0=y1, y1=y0, col=col.j, lwd=2)
159
+	}
160
+
161
+#	Label axis
162
+	if(genes$Strand[1]=="+")
163
+		labels <- paste0("Exon.", 1:length(col.e))
164
+	else
165
+		labels <- paste0("Exon.", length(col.e):1)
166
+	axis(side=1, at=(genes.ie$pseudo.start[ex]+genes.ie$pseudo.end[ex])/2, las=2, labels=labels)
167
+
168
+	invisible()
169
+}
170
+
171
+
172
+
173
+
... ...
@@ -3,6 +3,116 @@
3 3
 \encoding{UTF-8}
4 4
 
5 5
 
6
+\section{Version 3.38.0}{\itemize{
7
+
8
+\item
9
+New function plotExonJunc() to plot results from diffSplice().
10
+
11
+\item
12
+New function logsumexp().
13
+
14
+\item
15
+New argument hl.col for volcanoplot(), allowing users to specify
16
+  the color for the gene names when highlight > 0.
17
+
18
+\item
19
+barcodeplot() no longer assumes that 'statistic'
20
+  has unique names. Previously it returned an error if
21
+  names(statistic) contained any duplicated values.
22
+
23
+\item
24
+The colors "blue", "red" and "yellow" used by coolmap() changed to
25
+  "blue2", "red2" and "yellow2" when used in a color panel with white.
26
+
27
+\item
28
+goana.Rd now explains more explicitly that p-values are unadjusted
29
+  for multiple testing.
30
+
31
+\item
32
+arrayWeights.Rd now mentions minimum dimensions for expression
33
+  object.
34
+
35
+\item
36
+More advice on how to choose 'lfc' added to the treat() help page.
37
+
38
+\item
39
+Minor bug fix to the mixed p-value from roast() and mroast() when
40
+  set.statistic="floormean".
41
+
42
+\item
43
+Bug fix for cumOverlap(), which was under-counting overlaps in some
44
+  cases.
45
+
46
+}}
47
+
48
+
49
+\section{Version 3.36.0}{\itemize{
50
+
51
+\item
52
+New arguments 'quote' and 'row.names' for write.fit(). Write.fit()
53
+  now outputs row.names by default, which previously were suppressed.
54
+
55
+\item
56
+coolmap() will now accept an arbitrary vector of colors. A new
57
+  preset "whitered" panel is now also supported.
58
+
59
+\item
60
+In the past, contrasts.fit() always returned NA contrasts for genes
61
+  with any NA fitted coefficients. contrasts.fit() will now ignore an
62
+  NA coefficient if the contrast multiplier is zero for that
63
+  coefficient.
64
+
65
+\item
66
+The treat() default for 'lfc' changed from lfc=0 to lfc=log2(1.2).
67
+
68
+\item
69
+kegga() and goana() now check whether a data.frame has been input
70
+  by mistake and generates an error. Previously a data.frame value
71
+  for 'de' was interpreted as a list of gene sets without any error.
72
+
73
+\item
74
+voomWithQualityWeights() now returns 'sample.weights' as a column
75
+  of the 'targets' data.frame instead of as a separate vector.
76
+
77
+\item
78
+"TestResults" objects now include a 'labels' attribute, defaulting
79
+  to c("Down","NotSig","Up").
80
+
81
+\item
82
+Functions ebayes() and toptable(), long ago replaced by eBayes() and
83
+topTable(), are now formally deprecated.
84
+
85
+\item
86
+Update the User's Guide case study that analyses Lrp- E. Coli
87
+  samples and profiled by Affymetrix arrays.
88
+
89
+\item
90
+Update the Agilent single-channel case study in Section 17.4 of the
91
+  Users' Guide to use the Agilent gIsWellAboveBG detection column.
92
+
93
+\item
94
+Comments added to voom.Rd and eBayes.Rd to clarify the relationship
95
+  between limma-trend and voom.
96
+
97
+\item
98
+In the kegga help page, clarify default gene ID system used by
99
+kegga() when species="dme".
100
+
101
+\item
102
+Update wsva() help page to describe correct number of columns for output.
103
+
104
+\item
105
+bug fix for goana() when 'covariate' is specified and some of the
106
+  'universe' genes don't have GO annotation.
107
+
108
+\item
109
+Fix to how roast(), mroast() and fry() handle weights. The 'weight' argument
110
+can now be a matrix, or a vector of length nrow(y), or a vector of length ncol(y).
111
+This allows the functions to accept precision weights, or gene weights, or sample weights.
112
+
113
+}}
114
+
115
+
6 116
 \section{Version 3.34.0}{\itemize{
7 117
 
8 118
 \item 
... ...
@@ -62,6 +172,7 @@ plotWithHighlights() checks whether 'status' is a factor and
62 172
 Bug fixes for beadCountWeights(). Default is now set correct for
63 173
   'design' and the function now works correctly when 'y' is an EList
64 174
   object contain bead standard errors but not standard deviations.
175
+
65 176
 }}
66 177
 
67 178
 
... ...
@@ -1,3 +1,11 @@
1
+29 Oct 2018: limma 3.37.11
2
+
3
+- New function plotExonJunc() to plot results from diffSplice() when
4
+  exon-exon junctions as well as exons are included in the count
5
+  matrix.
6
+
7
+- Update NEWS.Rd for Bioconductor 3.8 Release.
8
+
1 9
 14 Oct 2018: limma 3.37.10
2 10
 
3 11
 - Edit to barcodeplot() so that it no longer assumes that 'statistic'
4 12
new file mode 100644
... ...
@@ -0,0 +1,51 @@
1
+\title{Differential splicing plot with junctions}
2
+\name{plotExonJunc}
3
+\alias{plotExonJunc}
4
+\description{
5
+Plot differential usage results by exons and junctions for the specified gene and highlight the significantly spliced exons and junctions.
6
+}
7
+\usage{
8
+plotExonJunc(fit, coef=ncol(fit), geneid, genecolname=NULL, FDR=0.05, annotation=NULL)
9
+}
10
+\arguments{
11
+  \item{fit}{\code{MArrayLM} fit object produced by \code{diffSplice}. Must have the Entrez gene ids for all the exons and junctions stored in \code{fit$genes$GeneID}, length information for all the exons and junctions stored in \code{fit$genes$Length} and the strand information stored in \code{fit$genes$Strand}. To distinguish between exons and junctions, \code{fit$genes$Length} has to be set to 1 for all the junctions.}
12
+  \item{coef}{the coefficient (column) of fit for which differentially splicing is assessed.}
13
+  \item{geneid}{character string, ID of the gene to plot.}
14
+  \item{genecolname}{column name of \code{fit$genes} containing \code{geneid}.}
15
+  \item{FDR}{numeric, highlight exons and junctions with false discovery rate less than this cutoff. Red indicates up-regulation whereas blue indicates down-regulation. The FDR of the individual exon/junction is calculated based on the exon-level t-statistics test for differences between each exon/junction and all other exons/junctions for the same gene.}
16
+  \item{annotation}{data frame containing the full exon annotation of the corresponding species. Must have the Entrez gene ids for all the exons stored in the \code{GeneID} column, start and end positions for all the exons stored in the \code{Start} and \code{End} columns, respectively.}
17
+}
18
+
19
+\details{
20
+Plot differential usage results by exons and junctions for the specified gene.
21
+The significantly spliced individual exons are highlighted as red blocks if up-regulated and blue blocks if down-regulated. 
22
+All other exons are displayed as black blocks.
23
+The significantly spliced individual junctions are highlighted as red lines if up-regulated and blue lines if down-regulated.
24
+All other junctions are displayed as black lines.
25
+
26
+Since the \code{diffSplice} analysis is usually performed after filtering, the full annotation (e.g. the inbuilt annotation in \code{featureCounts}) is highly recommended for producing the plot. When \code{annotation} is provided, the filtered exons are displayed as grey blocks.
27
+}
28
+
29
+\value{A plot is created on the current graphics device.}
30
+\author{Yunshun Chen and Gordon Smyth}
31
+\seealso{
32
+\code{\link{diffSplice}}, \code{\link{topSplice}}
33
+}
34
+\examples{
35
+\dontrun{
36
+# diffSplice analysis
37
+v <- voom(dge, design)
38
+fit <- lmFit(v, design)
39
+ex <- diffSplice(fit, geneid="GeneID")
40
+
41
+# Get full annotation from Rsubread
42
+library(Rsubread)
43
+annotation.full <- getInBuiltAnnotation("mm10")
44
+
45
+# Make a plot
46
+plotExonJunc(ex, geneid="Foxp1", genecolname="Symbol", annotation=annotation.full)
47
+}
48
+}
49
+
50
+\keyword{hplot}
51
+\keyword{rna-seq}