git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SplicingGraphs@75014 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,6 +1,6 @@ |
1 | 1 |
Package: SplicingGraphs |
2 | 2 |
Title: Tools for creating splicing graphs from annotations and RNA-Seq data |
3 |
-Version: 0.8.1 |
|
3 |
+Version: 0.8.2 |
|
4 | 4 |
Author: D. Bindreither, M. Carlson, M. Morgan, H. Pages |
5 | 5 |
License: Artistic-2.0 |
6 | 6 |
Description: This package provides tools for creating splicing graphs based on |
... | ... |
@@ -21,6 +21,7 @@ Collate: utils.R |
21 | 21 |
igraph-utils.R |
22 | 22 |
SplicingGraphs-class.R |
23 | 23 |
sgedgesByTranscript-methods.R |
24 |
+ txpath-methods.R |
|
24 | 25 |
sgedges-methods.R |
25 | 26 |
sgraph-methods.R |
26 | 27 |
bubbles-methods.R |
... | ... |
@@ -80,9 +80,11 @@ export( |
80 | 80 |
## sgedgesByTranscript-methods.R: |
81 | 81 |
sgedgesByTranscript, |
82 | 82 |
|
83 |
- ## sgedges-methods.R: |
|
84 |
- txpaths, |
|
83 |
+ ## txpath-methods.R: |
|
84 |
+ txpath, |
|
85 | 85 |
UATXHcount, |
86 |
+ |
|
87 |
+ ## sgedges-methods.R: |
|
86 | 88 |
sgedges, |
87 | 89 |
sgnodes, |
88 | 90 |
outdeg, indeg, |
... | ... |
@@ -100,7 +102,7 @@ exportMethods( |
100 | 102 |
plotTranscripts, |
101 | 103 |
SplicingGraphs, |
102 | 104 |
sgedgesByTranscript, |
103 |
- txpaths, |
|
105 |
+ txpath, |
|
104 | 106 |
UATXHcount, |
105 | 107 |
sgedges, |
106 | 108 |
sgnodes, |
... | ... |
@@ -369,19 +369,19 @@ setMethod("plotTranscripts", "SplicingGraphs", |
369 | 369 |
if (!is.null(tx_id)) |
370 | 370 |
gene_mcols$tx_id <- tx_id |
371 | 371 |
|
372 |
- ## Set "txpaths" metadata col. |
|
373 |
- if ("txpaths" %in% colnames(gene_mcols)) |
|
374 |
- stop("'gene' already has metadata column txpaths") |
|
372 |
+ ## Set "txpath" metadata col. |
|
373 |
+ if ("txpath" %in% colnames(gene_mcols)) |
|
374 |
+ stop("'gene' already has metadata column txpath") |
|
375 | 375 |
if (on.minus.strand) { |
376 |
- txpaths <- rbind(SSids$end_SSid, SSids$start_SSid) |
|
376 |
+ txpath <- rbind(SSids$end_SSid, SSids$start_SSid) |
|
377 | 377 |
} else { |
378 |
- txpaths <- rbind(SSids$start_SSid, SSids$end_SSid) |
|
378 |
+ txpath <- rbind(SSids$start_SSid, SSids$end_SSid) |
|
379 | 379 |
} |
380 |
- txpaths_partitioning_end <- end(PartitioningByEnd(gene)) * 2L |
|
381 |
- txpaths_partitioning <- PartitioningByEnd(txpaths_partitioning_end) |
|
382 |
- names(txpaths_partitioning) <- tx_id |
|
383 |
- txpaths <- splitAsList(as.vector(txpaths), txpaths_partitioning) |
|
384 |
- gene_mcols$txpaths <- txpaths |
|
380 |
+ txpath_partitioning_end <- end(PartitioningByEnd(gene)) * 2L |
|
381 |
+ txpath_partitioning <- PartitioningByEnd(txpath_partitioning_end) |
|
382 |
+ names(txpath_partitioning) <- tx_id |
|
383 |
+ txpath <- splitAsList(as.vector(txpath), txpath_partitioning) |
|
384 |
+ gene_mcols$txpath <- txpath |
|
385 | 385 |
|
386 | 386 |
mcols(gene) <- gene_mcols |
387 | 387 |
gene |
... | ... |
@@ -236,15 +236,15 @@ setMethod("bubbles", "SplicingGraphs", |
236 | 236 |
setMethod("bubbles", "ANY", |
237 | 237 |
function(x) |
238 | 238 |
{ |
239 |
- txpaths <- txpaths(x) |
|
240 |
- bubbles(txpaths) |
|
239 |
+ txpath <- txpath(x) |
|
240 |
+ bubbles(txpath) |
|
241 | 241 |
} |
242 | 242 |
) |
243 | 243 |
|
244 | 244 |
setMethod("bubbles", "IntegerList", |
245 | 245 |
function(x) |
246 | 246 |
{ |
247 |
- txpathmat <- make_matrix_from_txpaths(x) |
|
247 |
+ txpathmat <- make_matrix_from_txpath(x) |
|
248 | 248 |
#outdeg <- outdeg(x) |
249 | 249 |
#indeg <- indeg(x) |
250 | 250 |
#.extract_bubbles_from_txpathmat(txpathmat, outdeg, indeg) |
... | ... |
@@ -3,12 +3,6 @@ |
3 | 3 |
### ------------------------------------------------------------------------- |
4 | 4 |
|
5 | 5 |
|
6 |
-.get_sgnodes_from_txpaths <- function(txpaths) |
|
7 |
-{ |
|
8 |
- SSids <- unique(unlist(txpaths, use.names=FALSE)) |
|
9 |
- c("R", sort(SSids), "L") |
|
10 |
-} |
|
11 |
- |
|
12 | 6 |
.get_sgnodes_from_sgedges <- function(sgedges) |
13 | 7 |
{ |
14 | 8 |
from <- sgedges[ , "from"] |
... | ... |
@@ -17,68 +11,6 @@ |
17 | 11 |
c("R", sort(SSids), "L") |
18 | 12 |
} |
19 | 13 |
|
20 |
-make_matrix_from_txpaths <- function(txpaths) |
|
21 |
-{ |
|
22 |
- sgnodes <- .get_sgnodes_from_txpaths(txpaths) |
|
23 |
- ans_nrow <- length(txpaths) |
|
24 |
- ans_ncol <- length(sgnodes) |
|
25 |
- ans_dimnames <- list(names(txpaths), sgnodes) |
|
26 |
- ans <- matrix(FALSE , nrow=ans_nrow, ncol=ans_ncol, dimnames=ans_dimnames) |
|
27 |
- ans[ , 1L] <- ans[ , ans_ncol] <- TRUE |
|
28 |
- i <- cbind(rep.int(seq_along(txpaths), elementLengths(txpaths)), |
|
29 |
- unlist(txpaths, use.names=FALSE) + 1L) |
|
30 |
- ans[i] <- TRUE |
|
31 |
- ans |
|
32 |
-} |
|
33 |
- |
|
34 |
- |
|
35 |
-### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
36 |
-### txpaths() accessor |
|
37 |
-### |
|
38 |
-### Gets the splicing paths. |
|
39 |
-### Returns them in a named IntegerList with 1 top-level element per |
|
40 |
-### transcript in the specified gene. Each top-level element 'txpaths[[i]]' |
|
41 |
-### contains the splicing site ids for the i-th transcript. |
|
42 |
-### |
|
43 |
- |
|
44 |
-setGeneric("txpaths", signature="x", |
|
45 |
- function(x, as.matrix=FALSE) standardGeneric("txpaths") |
|
46 |
-) |
|
47 |
- |
|
48 |
-### Should return a CompressedIntegerList. |
|
49 |
-setMethod("txpaths", "SplicingGraphs", |
|
50 |
- function(x, as.matrix=FALSE) |
|
51 |
- { |
|
52 |
- if (length(x) != 1L) |
|
53 |
- stop("'x' must be a SplicingGraphs object of length 1") |
|
54 |
- if (!isTRUEorFALSE(as.matrix)) |
|
55 |
- stop("'as.matrix' must be TRUE or FALSE") |
|
56 |
- ans <- mcols(unlist(x, use.names=FALSE))[ , "txpaths"] |
|
57 |
- if (as.matrix) |
|
58 |
- ans <- make_matrix_from_txpaths(ans) |
|
59 |
- ans |
|
60 |
- } |
|
61 |
-) |
|
62 |
- |
|
63 |
- |
|
64 |
-### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
65 |
-### UATXHcount() accessor |
|
66 |
-### |
|
67 |
- |
|
68 |
-setGeneric("UATXHcount", signature="x", |
|
69 |
- function(x) standardGeneric("UATXHcount") |
|
70 |
-) |
|
71 |
- |
|
72 |
-### Should return an integer vector or a NULL. |
|
73 |
-setMethod("UATXHcount", "SplicingGraphs", |
|
74 |
- function(x) |
|
75 |
- { |
|
76 |
- if (length(x) != 1L) |
|
77 |
- stop("'x' must be a SplicingGraphs object of length 1") |
|
78 |
- mcols(unlist(x, use.names=FALSE))[["UATXHcount"]] |
|
79 |
- } |
|
80 |
-) |
|
81 |
- |
|
82 | 14 |
|
83 | 15 |
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
84 | 16 |
### sgedges() extractor |
... | ... |
@@ -86,29 +18,29 @@ setMethod("UATXHcount", "SplicingGraphs", |
86 | 18 |
### Returns the splicing graph in a DataFrame with 1 row per edge. |
87 | 19 |
### |
88 | 20 |
|
89 |
-### 'txpaths' must be an IntegerList containing all the splicing paths (1 per |
|
90 |
-### transcript) for a given gene. Should have been obtained thru the txpaths() |
|
21 |
+### 'txpath' must be an IntegerList containing all the splicing paths (1 per |
|
22 |
+### transcript) for a given gene. Should have been obtained thru the txpath() |
|
91 | 23 |
### accessor. Returns a 4-col (or 5-col if 'UATXHcount' is supplied) data.frame |
92 | 24 |
### representing the splicing graph. |
93 |
-.make_sgedges0_from_txpaths <- function(txpaths, UATXHcount=NULL) |
|
25 |
+.make_sgedges0_from_txpath <- function(txpath, UATXHcount=NULL) |
|
94 | 26 |
{ |
95 | 27 |
if (!is.null(UATXHcount)) { |
96 | 28 |
if (!is.integer(UATXHcount)) |
97 | 29 |
stop("'UATXHcount' must be an integer vector or NULL") |
98 |
- if (length(UATXHcount) != length(txpaths)) |
|
30 |
+ if (length(UATXHcount) != length(txpath)) |
|
99 | 31 |
stop("when not NULL, 'UATXHcount' must have ", |
100 |
- "the same length as 'txpaths'") |
|
32 |
+ "the same length as 'txpath'") |
|
101 | 33 |
} |
102 |
- sgedges0s <- lapply(seq_along(txpaths), |
|
34 |
+ sgedges0s <- lapply(seq_along(txpath), |
|
103 | 35 |
function(i) { |
104 |
- txpath <- txpaths[[i]] |
|
105 |
- txpath_len <- length(txpath) |
|
106 |
- if (txpath_len %% 2L != 0L) |
|
107 |
- stop("some paths in 'txpaths' contain ", |
|
36 |
+ txpath_i <- txpath[[i]] |
|
37 |
+ txpath_i_len <- length(txpath_i) |
|
38 |
+ if (txpath_i_len %% 2L != 0L) |
|
39 |
+ stop("some paths in 'txpath' contain ", |
|
108 | 40 |
"an odd number of splicing site ids") |
109 |
- from <- c("R", txpath) |
|
110 |
- to <- c(txpath, "L") |
|
111 |
- nexons <- txpath_len %/% 2L |
|
41 |
+ from <- c("R", txpath_i) |
|
42 |
+ to <- c(txpath_i, "L") |
|
43 |
+ nexons <- txpath_i_len %/% 2L |
|
112 | 44 |
if (nexons == 0L) { |
113 | 45 |
ex_or_in <- EX_OR_IN_LEVELS[3L] |
114 | 46 |
} else { |
... | ... |
@@ -128,9 +60,9 @@ setMethod("UATXHcount", "SplicingGraphs", |
128 | 60 |
}) |
129 | 61 |
nedges_per_tx <- sapply(sgedges0s, nrow) |
130 | 62 |
sgedges0 <- do.call(rbind, sgedges0s) |
131 |
- tx_id <- names(txpaths) |
|
63 |
+ tx_id <- names(txpath) |
|
132 | 64 |
if (is.null(tx_id)) |
133 |
- tx_id <- seq_along(txpaths) |
|
65 |
+ tx_id <- seq_along(txpath) |
|
134 | 66 |
tx_id <- rep.int(factor(tx_id, levels=tx_id), nedges_per_tx) |
135 | 67 |
sgedges0$tx_id <- tx_id |
136 | 68 |
if (!is.null(UATXHcount)) |
... | ... |
@@ -246,13 +178,13 @@ setMethod("sgedges", "SplicingGraphs", |
246 | 178 |
{ |
247 | 179 |
if (!isTRUEorFALSE(keep.dup.edges)) |
248 | 180 |
stop("'keep.dup.edges' must be TRUE or FALSE") |
249 |
- txpaths <- txpaths(x) |
|
181 |
+ txpath <- txpath(x) |
|
250 | 182 |
if (is.null(UATXHcount)) |
251 | 183 |
UATXHcount <- UATXHcount(x) |
252 | 184 |
if (keep.dup.edges) |
253 |
- return(sgedges(txpaths, UATXHcount=UATXHcount, |
|
185 |
+ return(sgedges(txpath, UATXHcount=UATXHcount, |
|
254 | 186 |
keep.dup.edges=keep.dup.edges)) |
255 |
- sgedges0 <- sgedges(txpaths, UATXHcount=UATXHcount, |
|
187 |
+ sgedges0 <- sgedges(txpath, UATXHcount=UATXHcount, |
|
256 | 188 |
keep.dup.edges=TRUE) |
257 | 189 |
exon_hits <- .extract_sgedges_exon_hits(x) |
258 | 190 |
intron_hits <- .extract_sgedges_intron_hits(x) |
... | ... |
@@ -269,7 +201,7 @@ setMethod("sgedges", "SplicingGraphs", |
269 | 201 |
setMethod("sgedges", "IntegerList", |
270 | 202 |
function(x, UATXHcount=NULL, keep.dup.edges=FALSE) |
271 | 203 |
{ |
272 |
- sgedges0 <- .make_sgedges0_from_txpaths(x, UATXHcount=UATXHcount) |
|
204 |
+ sgedges0 <- .make_sgedges0_from_txpath(x, UATXHcount=UATXHcount) |
|
273 | 205 |
sgedges(sgedges0, keep.dup.edges=keep.dup.edges) |
274 | 206 |
} |
275 | 207 |
) |
... | ... |
@@ -300,13 +232,13 @@ setGeneric("sgnodes", signature="x", |
300 | 232 |
setMethod("sgnodes", "ANY", |
301 | 233 |
function(x) |
302 | 234 |
{ |
303 |
- txpaths <- txpaths(x) |
|
304 |
- sgnodes(txpaths) |
|
235 |
+ txpath <- txpath(x) |
|
236 |
+ sgnodes(txpath) |
|
305 | 237 |
} |
306 | 238 |
) |
307 | 239 |
|
308 | 240 |
setMethod("sgnodes", "IntegerList", |
309 |
- function(x) .get_sgnodes_from_txpaths(x) |
|
241 |
+ function(x) get_sgnodes_from_txpath(x) |
|
310 | 242 |
) |
311 | 243 |
|
312 | 244 |
setMethod("sgnodes", "data.frame", |
313 | 245 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,89 @@ |
1 |
+### ========================================================================= |
|
2 |
+### "txpath" (and related) methods |
|
3 |
+### ------------------------------------------------------------------------- |
|
4 |
+ |
|
5 |
+ |
|
6 |
+get_sgnodes_from_txpath <- function(txpath) |
|
7 |
+{ |
|
8 |
+ SSids <- unique(unlist(txpath, use.names=FALSE)) |
|
9 |
+ c("R", sort(SSids), "L") |
|
10 |
+} |
|
11 |
+ |
|
12 |
+make_matrix_from_txpath <- function(txpath) |
|
13 |
+{ |
|
14 |
+ sgnodes <- get_sgnodes_from_txpath(txpath) |
|
15 |
+ ans_nrow <- length(txpath) |
|
16 |
+ ans_ncol <- length(sgnodes) |
|
17 |
+ ans_dimnames <- list(names(txpath), sgnodes) |
|
18 |
+ ans <- matrix(FALSE , nrow=ans_nrow, ncol=ans_ncol, dimnames=ans_dimnames) |
|
19 |
+ ans[ , 1L] <- ans[ , ans_ncol] <- TRUE |
|
20 |
+ i <- cbind(rep.int(seq_along(txpath), elementLengths(txpath)), |
|
21 |
+ unlist(txpath, use.names=FALSE) + 1L) |
|
22 |
+ ans[i] <- TRUE |
|
23 |
+ ans |
|
24 |
+} |
|
25 |
+ |
|
26 |
+ |
|
27 |
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
28 |
+### txpath() accessor |
|
29 |
+### |
|
30 |
+### Gets the splicing paths. |
|
31 |
+### Returns them in a named IntegerList object (actually a |
|
32 |
+### CompressedIntegerList instance) with 1 list element per transcript. |
|
33 |
+### Each list element 'txpath[[i]]' contains the Splicing Site ids for the |
|
34 |
+### i-th transcript. |
|
35 |
+### |
|
36 |
+ |
|
37 |
+setGeneric("txpath", signature="x", |
|
38 |
+ function(x, as.matrix=FALSE) standardGeneric("txpath") |
|
39 |
+) |
|
40 |
+ |
|
41 |
+setMethod("txpath", "GRangesList", |
|
42 |
+ function(x, as.matrix=FALSE) |
|
43 |
+ { |
|
44 |
+ if (!identical(as.matrix, FALSE)) |
|
45 |
+ stop("the 'as.matrix' arg is not supported ", |
|
46 |
+ "when 'x' is a GRangesList object") |
|
47 |
+ ans <- mcols(x)[ , "txpath"] |
|
48 |
+ x_names <- names(x) |
|
49 |
+ if (!is.null(x_names)) |
|
50 |
+ names(ans) <- x_names |
|
51 |
+ ans |
|
52 |
+ } |
|
53 |
+) |
|
54 |
+ |
|
55 |
+setMethod("txpath", "SplicingGraphs", |
|
56 |
+ function(x, as.matrix=FALSE) |
|
57 |
+ { |
|
58 |
+ if (length(x) != 1L) |
|
59 |
+ stop("'x' must be a SplicingGraphs object of length 1") |
|
60 |
+ if (!isTRUEorFALSE(as.matrix)) |
|
61 |
+ stop("'as.matrix' must be TRUE or FALSE") |
|
62 |
+ ## Same as 'x[[1]]' but should be faster. |
|
63 |
+ x_1 <- unlist(x, use.names=FALSE) |
|
64 |
+ ans <- txpath(x_1) |
|
65 |
+ if (!as.matrix) |
|
66 |
+ return(ans) |
|
67 |
+ make_matrix_from_txpath(ans) |
|
68 |
+ } |
|
69 |
+) |
|
70 |
+ |
|
71 |
+ |
|
72 |
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
73 |
+### UATXHcount() accessor |
|
74 |
+### |
|
75 |
+ |
|
76 |
+setGeneric("UATXHcount", signature="x", |
|
77 |
+ function(x) standardGeneric("UATXHcount") |
|
78 |
+) |
|
79 |
+ |
|
80 |
+### Should return an integer vector or a NULL. |
|
81 |
+setMethod("UATXHcount", "SplicingGraphs", |
|
82 |
+ function(x) |
|
83 |
+ { |
|
84 |
+ if (length(x) != 1L) |
|
85 |
+ stop("'x' must be a SplicingGraphs object of length 1") |
|
86 |
+ mcols(unlist(x, use.names=FALSE))[["UATXHcount"]] |
|
87 |
+ } |
|
88 |
+) |
|
89 |
+ |
... | ... |
@@ -135,7 +135,7 @@ A splicing graph is a directed acyclic graph (DAG) where: |
135 | 135 |
\begin{itemize} |
136 | 136 |
\item The vertices (a.k.a. \textit{nodes}) represent the splicing sites |
137 | 137 |
for a given gene. Splicing sites are numbered from $1$ to $n$ |
138 |
- based on their order of appearance when moving in the 5' to 3' |
|
138 |
+ based on the order they occur when moving in the 5' to 3' |
|
139 | 139 |
direction along the strand where the transcripts are located. |
140 | 140 |
Note that splicing graphs are only defined for genes that have all |
141 | 141 |
the exons of all their transcripts on the same chromosome and |
... | ... |
@@ -337,12 +337,12 @@ elementLengths(sg)[1:20] |
337 | 337 |
@ |
338 | 338 |
|
339 | 339 |
\Rfunction{elementLengths} is a core accessor for list-like objects |
340 |
-that returns |
|
340 |
+that returns the lengths of the individual list elements. |
|
341 | 341 |
|
342 | 342 |
At this point you might wonder why \Rfunction{elementLengths} works on |
343 |
-\Rclass{SplicingGraphs} object. Does this mean that those objects are |
|
343 |
+\Rclass{SplicingGraphs} objects. Does this mean that those objects are |
|
344 | 344 |
list-like objects? The answer is yes. The next thing you might wonder |
345 |
-is what do the list elements look like and how can I access them? |
|
345 |
+is what do the list elements look like and how you can access them. |
|
346 | 346 |
This is answered in the next subsection. |
347 | 347 |
|
348 | 348 |
\subsection{Basic manipulation of a \Rclass{SplicingGraphs} object} |
... | ... |
@@ -350,8 +350,8 @@ This is answered in the next subsection. |
350 | 350 |
The list elements of a list-like object can be accessed \textbf{one at a |
351 | 351 |
time} with \Rfunction{[[}. On a \Rclass{SplicingGraphs} object, this will |
352 | 352 |
extract the transcripts of a given gene. More precisely it will return |
353 |
-an \emph{unnamed} \Rclass{GRangesList} object containing the exons grouped |
|
354 |
-by transcript: |
|
353 |
+an \emph{unnamed} \Rclass{GRangesList} object containing the exons of the |
|
354 |
+gene grouped by transcript: |
|
355 | 355 |
|
356 | 356 |
\begin{small} |
357 | 357 |
<<sg_double_bracket_107328>>= |
... | ... |
@@ -364,29 +364,33 @@ The exon-level metadata columns are: |
364 | 364 |
\item \Rcode{exon\_id}: The original internal exon id as stored in the |
365 | 365 |
\Rclass{TranscriptDb} object. This id was created and assigned to |
366 | 366 |
each exon when the \Rclass{TranscriptDb} object was created. |
367 |
- It's not a public id like, say, an Ensembl, RefSeq, or GenBank id, |
|
368 |
- and it's only guaranteed to be unique within a \Rclass{TranscriptDb} |
|
369 |
- object. |
|
367 |
+ It's not a public id like, say, an Ensembl, RefSeq, or GenBank id. |
|
368 |
+ Furthermore, it's only guaranteed to be unique within a |
|
369 |
+ \Rclass{TranscriptDb} object. |
|
370 | 370 |
\item \Rcode{exon\_name}: The original exon name as provided by the |
371 | 371 |
annotation resource (e.g. UCSC, Ensembl, or GFF file) and stored |
372 | 372 |
in the \Rclass{TranscriptDb} object when it was created. |
373 |
- \Rcode{NA} if no exon name was provided. |
|
373 |
+ Set to \Rcode{NA} if no exon name was provided. |
|
374 | 374 |
\item \Rcode{exon\_rank}: The rank of the exon in the transcript. |
375 | 375 |
\item \Rcode{start\_SSid}, \Rcode{end\_SSid}: The ids of the splicing |
376 |
- sites corresponding to the start and end coordinates of the exon. |
|
376 |
+ sites (a.k.a. \textit{Splicing Site ids}) corresponding to the |
|
377 |
+ \textit{start} and \textit{end} coordinates of the exon. |
|
378 |
+ (Please be cautious to not misinterpret the meaning of \textit{start} |
|
379 |
+ and \textit{end} here. See IMPORTANT NOTE below.) |
|
377 | 380 |
Those ids were assigned by the \Rfunction{SplicingGraphs} constructor. |
378 |
- IMPORTANT NOTE: Please keep in mind that the start and end |
|
379 |
- coordinates of an exon, like the start and end coordinates of |
|
380 |
- a genomic range in general, are following the almost universal |
|
381 |
- convention that $start$ is $<= end$, and this \textbf{regardless |
|
382 |
- of the direction of transcription}. |
|
383 | 381 |
\end{itemize} |
384 | 382 |
|
385 |
-As mentioned previously, the splicing site ids are assigned based on their |
|
386 |
-order of appearance when moving in the 5' to 3' direction along the strand |
|
383 |
+IMPORTANT NOTE: Please be aware that the \textit{start} and \textit{end} |
|
384 |
+coordinates of an exon, like the \textit{start} and \textit{end} |
|
385 |
+coordinates of a genomic range in general, are following the almost |
|
386 |
+universal convention that \textit{start} is $<=$ \textit{end}, and this |
|
387 |
+\textbf{regardless of the direction of transcription}. |
|
388 |
+ |
|
389 |
+As mentioned previously, the \textit{Splicing Site ids} are assigned based |
|
390 |
+on the order they occur when moving in the 5' to 3' direction along the strand |
|
387 | 391 |
of the gene. This means that, for a gene on the plus (resp. minus) strand, |
388 | 392 |
the ids in the \Rcode{start\_SSid} metadata column are always lower (resp. |
389 |
-greater) than those in the \Rcode{end\_SSid} metadata column: |
|
393 |
+greater) than those in the \Rcode{end\_SSid} metadata column. |
|
390 | 394 |
|
391 | 395 |
\begin{small} |
392 | 396 |
<<sg_double_bracket_104252>>= |
... | ... |
@@ -394,7 +398,7 @@ sg[["104252"]] |
394 | 398 |
@ |
395 | 399 |
\end{small} |
396 | 400 |
|
397 |
-However, on both strands, the splicing site id increases with the |
|
401 |
+However, on both strands, the \textit{Splicing Site id} increases with the |
|
398 | 402 |
rank of the exon. |
399 | 403 |
|
400 | 404 |
The \Rfunction{show} method for \Rclass{GRangesList} objects only |
... | ... |
@@ -412,20 +416,27 @@ The transcript-level metadata columns are: |
412 | 416 |
\item \Rcode{tx\_id}: The original transcript id as provided by the |
413 | 417 |
annotation resource (e.g. UCSC, Ensembl, or GFF file) and stored |
414 | 418 |
in the \Rclass{TranscriptDb} object when it was created. |
415 |
- \item \Rcode{txpaths}: A named list-like object with one list element |
|
416 |
- per transcript. Each list element is an integer vector that |
|
417 |
- describes the \textit{path} of the transcript i.e. the splicing |
|
418 |
- site ids that it goes thru. |
|
419 |
+ \item \Rcode{txpath}: A named list-like object with one list element |
|
420 |
+ per transcript in the gene. Each list element is an integer vector |
|
421 |
+ that describes the \textit{path} of the transcript i.e. the |
|
422 |
+ \textit{Splicing Site ids} that it goes thru. |
|
419 | 423 |
\end{itemize} |
420 | 424 |
|
421 | 425 |
<<mcols_sg_double_bracket_107328>>= |
422 |
-mcols(sg[["107328"]])$txpaths |
|
423 |
-mcols(sg[["104252"]])$txpaths |
|
426 |
+mcols(sg[["107328"]])$txpath |
|
427 |
+mcols(sg[["104252"]])$txpath |
|
428 |
+@ |
|
429 |
+ |
|
430 |
+A more convenient way to extract this information is to use the |
|
431 |
+\Rfunction{txpath} accessor: |
|
432 |
+ |
|
433 |
+<<txpath_sg_104252>>= |
|
434 |
+txpath(sg[["104252"]]) |
|
424 | 435 |
@ |
425 | 436 |
|
426 |
-Note that the list elements of the \Rcode{txpaths} metadata column |
|
427 |
-always consist of an even number of splicing site ids in ascending |
|
428 |
-order. |
|
437 |
+Note that the list elements of the \Rcode{txpath} metadata column |
|
438 |
+always consist of an even number of \textit{Splicing Site ids} in |
|
439 |
+ascending order. |
|
429 | 440 |
|
430 | 441 |
The transcripts in a \Rclass{GRangesList} object like \Rcode{sg[["107328"]]} |
431 | 442 |
can be plotted with \Rfunction{plotTranscripts}: |
... | ... |
@@ -6,9 +6,9 @@ |
6 | 6 |
\alias{class:SplicingGraphs} |
7 | 7 |
\alias{SplicingGraphs-class} |
8 | 8 |
|
9 |
-\alias{SplicingGraphs} |
|
10 |
-\alias{SplicingGraphs,GRangesList-method} |
|
11 |
-\alias{SplicingGraphs,TranscriptDb-method} |
|
9 |
+\alias{seqnames,.SplicingGraphGenes-method} |
|
10 |
+\alias{strand,.SplicingGraphGenes-method} |
|
11 |
+\alias{seqinfo,.SplicingGraphGenes-method} |
|
12 | 12 |
|
13 | 13 |
\alias{length,SplicingGraphs-method} |
14 | 14 |
\alias{names,SplicingGraphs-method} |
... | ... |
@@ -27,6 +27,10 @@ |
27 | 27 |
\alias{plotTranscripts,TranscriptDb-method} |
28 | 28 |
\alias{plotTranscripts,SplicingGraphs-method} |
29 | 29 |
|
30 |
+\alias{SplicingGraphs} |
|
31 |
+\alias{SplicingGraphs,GRangesList-method} |
|
32 |
+\alias{SplicingGraphs,TranscriptDb-method} |
|
33 |
+ |
|
30 | 34 |
|
31 | 35 |
\title{ |
32 | 36 |
SplicingGraphs objects |
... | ... |
@@ -181,6 +185,9 @@ plotTranscripts(x) |
181 | 185 |
\item \code{\link{sgedgesByTranscript}} for extracting all the introns |
182 | 186 |
or splicing graph edges from a SplicingGraphs object. |
183 | 187 |
|
188 |
+ \item \code{\link{txpath}} for extracting the transcript paths of a |
|
189 |
+ splicing graph. |
|
190 |
+ |
|
184 | 191 |
\item \code{\link{sgedges}} for extracting the edges (and nodes) of a |
185 | 192 |
splicing graph. |
186 | 193 |
|
... | ... |
@@ -50,6 +50,9 @@ ASCODE2DESC |
50 | 50 |
\item \code{\link{sgedgesByTranscript}} for extracting all the introns |
51 | 51 |
or splicing graph edges from a \link{SplicingGraphs} object. |
52 | 52 |
|
53 |
+ \item \code{\link{txpath}} for extracting the transcript paths of a |
|
54 |
+ splicing graph. |
|
55 |
+ |
|
53 | 56 |
\item \code{\link{sgedges}} for extracting the edges (and nodes) of a |
54 | 57 |
splicing graph. |
55 | 58 |
|
... | ... |
@@ -58,6 +58,9 @@ countReads(sg) |
58 | 58 |
\item \code{\link{sgedgesByTranscript}} for extracting all the introns |
59 | 59 |
or splicing graph edges from a \link{SplicingGraphs} object. |
60 | 60 |
|
61 |
+ \item \code{\link{txpath}} for extracting the transcript paths of a |
|
62 |
+ splicing graph. |
|
63 |
+ |
|
61 | 64 |
\item \code{\link{sgedges}} for extracting the edges (and nodes) of a |
62 | 65 |
splicing graph. |
63 | 66 |
|
... | ... |
@@ -2,12 +2,6 @@ |
2 | 2 |
|
3 | 3 |
\alias{sgedges-methods} |
4 | 4 |
|
5 |
-\alias{txpaths} |
|
6 |
-\alias{txpaths,SplicingGraphs-method} |
|
7 |
- |
|
8 |
-\alias{UATXHcount} |
|
9 |
-\alias{UATXHcount,SplicingGraphs-method} |
|
10 |
- |
|
11 | 5 |
\alias{sgedges} |
12 | 6 |
\alias{sgedges,SplicingGraphs-method} |
13 | 7 |
\alias{sgedges,IntegerList-method} |
... | ... |
@@ -51,10 +45,7 @@ indeg(x) |
51 | 45 |
|
52 | 46 |
sgedges2(x) |
53 | 47 |
|
54 |
-## Related utilities: |
|
55 |
- |
|
56 |
-txpaths(x, as.matrix=FALSE) |
|
57 |
-UATXHcount(x) |
|
48 |
+## Related utility: |
|
58 | 49 |
uninformativeSSids(x) |
59 | 50 |
} |
60 | 51 |
|
... | ... |
@@ -68,9 +59,6 @@ uninformativeSSids(x) |
68 | 59 |
\item{keep.dup.edges}{ |
69 | 60 |
TODO |
70 | 61 |
} |
71 |
- \item{as.matrix}{ |
|
72 |
- TODO |
|
73 |
- } |
|
74 | 62 |
} |
75 | 63 |
|
76 | 64 |
\details{ |
... | ... |
@@ -92,6 +80,9 @@ uninformativeSSids(x) |
92 | 80 |
\item \code{\link{sgedgesByTranscript}} for extracting all the introns |
93 | 81 |
or splicing graph edges from a \link{SplicingGraphs} object. |
94 | 82 |
|
83 |
+ \item \code{\link{txpath}} for extracting the transcript paths of a |
|
84 |
+ splicing graph. |
|
85 |
+ |
|
95 | 86 |
\item \code{\link{sgraph}} for extracting a splicing graph as a |
96 | 87 |
plottable graph-like object. |
97 | 88 |
|
... | ... |
@@ -114,9 +105,6 @@ sgnodes(sg["geneD"]) |
114 | 105 |
outdeg(sg["geneD"]) |
115 | 106 |
indeg(sg["geneD"]) |
116 | 107 |
|
117 |
-txpaths(sg["geneD"]) |
|
118 |
-txpaths(sg["geneD"], as.matrix=TRUE) # splicing matrix |
|
119 |
- |
|
120 | 108 |
## Sanity check: |
121 | 109 |
geneD_sgedges1 <- sgedges(sg["geneD"], keep.dup.edges=TRUE) |
122 | 110 |
geneD_sgedges1 <- geneD_sgedges1[geneD_sgedges1$ex_or_in != "", ] |
... | ... |
@@ -53,6 +53,9 @@ sgedgesByTranscript(x) |
53 | 53 |
\itemize{ |
54 | 54 |
\item The \link{SplicingGraphs} class. |
55 | 55 |
|
56 |
+ \item \code{\link{txpath}} for extracting the transcript paths of a |
|
57 |
+ splicing graph. |
|
58 |
+ |
|
56 | 59 |
\item \code{\link{sgedges}} for extracting the edges (and nodes) of a |
57 | 60 |
splicing graph. |
58 | 61 |
|
... | ... |
@@ -70,6 +70,9 @@ slideshow(x) |
70 | 70 |
\item \code{\link{sgedgesByTranscript}} for extracting all the introns |
71 | 71 |
or splicing graph edges from a \link{SplicingGraphs} object. |
72 | 72 |
|
73 |
+ \item \code{\link{txpath}} for extracting the transcript paths of a |
|
74 |
+ splicing graph. |
|
75 |
+ |
|
73 | 76 |
\item \code{\link{sgedges}} for extracting the edges (and nodes) of a |
74 | 77 |
splicing graph. |
75 | 78 |
|
76 | 79 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,88 @@ |
1 |
+\name{txpath-methods} |
|
2 |
+ |
|
3 |
+\alias{txpath-methods} |
|
4 |
+ |
|
5 |
+\alias{txpath} |
|
6 |
+\alias{txpath,GRangesList-method} |
|
7 |
+\alias{txpath,SplicingGraphs-method} |
|
8 |
+ |
|
9 |
+\alias{UATXHcount} |
|
10 |
+\alias{UATXHcount,SplicingGraphs-method} |
|
11 |
+ |
|
12 |
+ |
|
13 |
+\title{ |
|
14 |
+ Extract the transcript paths of a splicing graph |
|
15 |
+} |
|
16 |
+ |
|
17 |
+\description{ |
|
18 |
+ \code{txpath} extracts the transcript paths of the splicing graph |
|
19 |
+ of a given gene from a \link{SplicingGraphs} object. |
|
20 |
+} |
|
21 |
+ |
|
22 |
+\usage{ |
|
23 |
+txpath(x, as.matrix=FALSE) |
|
24 |
+ |
|
25 |
+## Related utility: |
|
26 |
+UATXHcount(x) |
|
27 |
+} |
|
28 |
+ |
|
29 |
+\arguments{ |
|
30 |
+ \item{x}{ |
|
31 |
+ A \link{SplicingGraphs} object of length 1 |
|
32 |
+ or a \link[GenomicRanges]{GRangesList} object. |
|
33 |
+ } |
|
34 |
+ \item{as.matrix}{ |
|
35 |
+ TODO |
|
36 |
+ } |
|
37 |
+} |
|
38 |
+ |
|
39 |
+\details{ |
|
40 |
+ TODO |
|
41 |
+} |
|
42 |
+ |
|
43 |
+\value{ |
|
44 |
+ A named list-like object with one list element per transcript in the gene. |
|
45 |
+ Each list element is an integer vector that describes the \emph{path} |
|
46 |
+ of the transcript i.e. the \emph{Splicing Site ids} that it goes thru. |
|
47 |
+} |
|
48 |
+ |
|
49 |
+\author{ |
|
50 |
+ H. Pages |
|
51 |
+} |
|
52 |
+ |
|
53 |
+\seealso{ |
|
54 |
+ \itemize{ |
|
55 |
+ \item The \link{SplicingGraphs} class. |
|
56 |
+ |
|
57 |
+ \item \code{\link{sgedgesByTranscript}} for extracting all the introns |
|
58 |
+ or splicing graph edges from a \link{SplicingGraphs} object. |
|
59 |
+ |
|
60 |
+ \item \code{\link{sgedges}} for extracting the edges (and nodes) of a |
|
61 |
+ splicing graph. |
|
62 |
+ |
|
63 |
+ \item \code{\link{sgraph}} for extracting a splicing graph as a |
|
64 |
+ plottable graph-like object. |
|
65 |
+ |
|
66 |
+ \item \code{\link{bubbles}} for computing the bubbles of a splicing graph. |
|
67 |
+ |
|
68 |
+ \item \code{\link{countReads}} for assigning reads to a |
|
69 |
+ \link{SplicingGraphs} object and counting them. |
|
70 |
+ } |
|
71 |
+} |
|
72 |
+ |
|
73 |
+\examples{ |
|
74 |
+example(SplicingGraphs) # create SplicingGraphs object 'sg' |
|
75 |
+sg |
|
76 |
+ |
|
77 |
+## 'sg' has 1 element per gene and 'names(sg)' gives the gene ids. |
|
78 |
+names(sg) |
|
79 |
+ |
|
80 |
+## Note that the list elements in the returned IntegerList object |
|
81 |
+## always consist of an even number of Splicing Site ids in ascending |
|
82 |
+## order. |
|
83 |
+txpath(sg["geneB"]) |
|
84 |
+txpath(sg["geneD"]) |
|
85 |
+strand(sg) |
|
86 |
+ |
|
87 |
+txpath(sg["geneD"], as.matrix=TRUE) # splicing matrix |
|
88 |
+} |