git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SplicingGraphs@73962 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,6 +1,6 @@ |
1 | 1 |
Package: SplicingGraphs |
2 | 2 |
Title: Tools for creating splicing graphs from annotations and RNA-Seq data |
3 |
-Version: 0.3.0 |
|
3 |
+Version: 0.4.0 |
|
4 | 4 |
Author: D. Bindreither, M. Carlson, M. Morgan, H. Pages |
5 | 5 |
License: Artistic-2.0 |
6 | 6 |
Description: This package provides tools for creating splicing graphs based on |
... | ... |
@@ -75,7 +75,10 @@ export( |
75 | 75 |
uninformativeSSids, |
76 | 76 |
|
77 | 77 |
## sgraph-methods.R: |
78 |
- sgraph |
|
78 |
+ sgraph, |
|
79 |
+ |
|
80 |
+ ## bubbles-methods.R: |
|
81 |
+ bubbles |
|
79 | 82 |
) |
80 | 83 |
|
81 | 84 |
### Exactly the same list as above. |
... | ... |
@@ -86,6 +89,7 @@ exportMethods( |
86 | 89 |
sgedges, |
87 | 90 |
sgnodes, |
88 | 91 |
uninformativeSSids, |
89 |
- sgraph |
|
92 |
+ sgraph, |
|
93 |
+ bubbles |
|
90 | 94 |
) |
91 | 95 |
|
... | ... |
@@ -1,13 +1,149 @@ |
1 |
+### ========================================================================= |
|
2 |
+### "bubbles" methods |
|
3 |
+### ------------------------------------------------------------------------- |
|
1 | 4 |
|
2 |
-.make_spath_matrix_from_spath <- function(spath) |
|
5 |
+ |
|
6 |
+### Returns a DataFrame with 1 row per bubble and the following cols: |
|
7 |
+### source <character>, sink <character>, left_path <IntegerList>, and |
|
8 |
+### right_path <IntegerList>. |
|
9 |
+### Must find at least 1 bubble if 'left_SSids' and 'right_SSids' are not |
|
10 |
+### identical. |
|
11 |
+.find_bubbles_in_tx_pair <- function(left_SSids, right_SSids) |
|
3 | 12 |
{ |
4 |
- nodes <- sgnodes(spath) |
|
13 |
+ ans_source <- ans_sink <- character(0) |
|
14 |
+ ans_left_path <- ans_right_path <- IntegerList() |
|
15 |
+ left_len <- length(left_SSids) |
|
16 |
+ right_len <- length(right_SSids) |
|
17 |
+ i <- j <- 1L |
|
18 |
+ prev_left <- prev_right <- "R" |
|
19 |
+ in_bubble <- FALSE |
|
20 |
+ while (i <= left_len + 1L || j <= right_len + 1L) { |
|
21 |
+ if (i <= left_len) { |
|
22 |
+ curr_left <- left_SSids[i] |
|
23 |
+ } else { |
|
24 |
+ curr_left <- .Machine$integer.max |
|
25 |
+ } |
|
26 |
+ if (j <= right_len) { |
|
27 |
+ curr_right <- right_SSids[j] |
|
28 |
+ } else { |
|
29 |
+ curr_right <- .Machine$integer.max |
|
30 |
+ } |
|
31 |
+ if (curr_left == curr_right) { |
|
32 |
+ if (in_bubble) { |
|
33 |
+ ## End of the current bubble. |
|
34 |
+ in_bubble <- FALSE |
|
35 |
+ if (curr_left == .Machine$integer.max) { |
|
36 |
+ sink <- "L" |
|
37 |
+ } else { |
|
38 |
+ sink <- as.character(curr_left) |
|
39 |
+ } |
|
40 |
+ ans_sink <- c(ans_sink, sink) |
|
41 |
+ ans_left_path <- c(ans_left_path, |
|
42 |
+ IntegerList(bubble_left_path)) |
|
43 |
+ ans_right_path <- c(ans_right_path, |
|
44 |
+ IntegerList(bubble_right_path)) |
|
45 |
+ } |
|
46 |
+ prev_left <- curr_left |
|
47 |
+ i <- i + 1L |
|
48 |
+ prev_right <- curr_right |
|
49 |
+ j <- j + 1L |
|
50 |
+ next |
|
51 |
+ } |
|
52 |
+ if (!in_bubble) { |
|
53 |
+ ## Start a new bubble. |
|
54 |
+ in_bubble <- TRUE |
|
55 |
+ ans_source <- c(ans_source, as.character(prev_left)) |
|
56 |
+ bubble_left_path <- bubble_right_path <- integer(0) |
|
57 |
+ } |
|
58 |
+ if (curr_left < curr_right) { |
|
59 |
+ bubble_left_path <- c(bubble_left_path, curr_left) |
|
60 |
+ prev_left <- curr_left |
|
61 |
+ i <- i + 1L |
|
62 |
+ } else { |
|
63 |
+ bubble_right_path <- c(bubble_right_path, curr_right) |
|
64 |
+ prev_right <- curr_right |
|
65 |
+ j <- j + 1L |
|
66 |
+ } |
|
67 |
+ } |
|
68 |
+ DataFrame(source=ans_source, |
|
69 |
+ sink=ans_sink, |
|
70 |
+ left_path=ans_left_path, |
|
71 |
+ right_path=ans_right_path) |
|
5 | 72 |
} |
6 | 73 |
|
7 |
-findBubbles <- function(x, gene_id=NA) |
|
74 |
+.extract_bubbles_from_spath <- function(spath) |
|
8 | 75 |
{ |
9 |
- spath <- spath(sg, gene_id=gene_id) |
|
10 |
- spath_mat <- .make_spath_matrix_from_spath(spath) |
|
11 |
- spath_mat |
|
76 |
+ ntx <- length(spath) |
|
77 |
+ if (ntx <= 1L) { |
|
78 |
+ ## No bubbles. |
|
79 |
+ bubbles <- .find_bubbles_in_tx_pair(integer(0), integer(0)) |
|
80 |
+ bubbles <- cbind(bubbles, DataFrame(left_tx_id=character(0), |
|
81 |
+ right_tx_id=character(0))) |
|
82 |
+ return(bubbles) |
|
83 |
+ } |
|
84 |
+ tx_id <- names(spath) |
|
85 |
+ if (is.null(tx_id)) |
|
86 |
+ tx_id <- seq_along(spath) |
|
87 |
+ |
|
88 |
+ npairs <- (ntx * (ntx - 1L)) %/% 2L |
|
89 |
+ all_bubbles <- vector(mode="list", length=npairs) |
|
90 |
+ z <- 1L |
|
91 |
+ for (i in 1:(ntx-1L)) { |
|
92 |
+ left_SSids <- spath[[i]] |
|
93 |
+ left_tx_id <- tx_id[i] |
|
94 |
+ for (j in (i+1L):ntx) { |
|
95 |
+ right_SSids <- spath[[j]] |
|
96 |
+ right_tx_id <- tx_id[j] |
|
97 |
+ bubbles <- .find_bubbles_in_tx_pair(left_SSids, right_SSids) |
|
98 |
+ bubbles <- cbind(bubbles, DataFrame(left_tx_id=left_tx_id, |
|
99 |
+ right_tx_id=right_tx_id)) |
|
100 |
+ all_bubbles[[z]] <- bubbles |
|
101 |
+ z <- z + 1L |
|
102 |
+ } |
|
103 |
+ } |
|
104 |
+ ans <- do.call(rbind, all_bubbles) |
|
105 |
+ |
|
106 |
+ ## Remove duplicate bubbles. |
|
107 |
+ keys <- c("source", "sink", "left_path", "right_path") |
|
108 |
+ key1 <- ans[ , "source"] |
|
109 |
+ key2 <- ans[ , "sink"] |
|
110 |
+ key3 <- unlist(lapply(ans[ , "left_path"], paste, collapse=","), |
|
111 |
+ use.names=FALSE) |
|
112 |
+ key4 <- unlist(lapply(ans[ , "right_path"], paste, collapse=","), |
|
113 |
+ use.names=FALSE) |
|
114 |
+ key <- paste(key1, key2, key3, key4, sep="|") |
|
115 |
+ left_tx_id <- ans[ , "left_tx_id"] |
|
116 |
+ right_tx_id <- ans[ , "right_tx_id"] |
|
117 |
+ sm <- match(key, key) |
|
118 |
+ is_not_dup <- sm == seq_along(sm) |
|
119 |
+ ans <- ans[is_not_dup, keys, drop=FALSE] |
|
120 |
+ rownames(ans) <- NULL |
|
121 |
+ ans$left_tx_id <- unique(splitAsList(left_tx_id, sm)) |
|
122 |
+ ans$right_tx_id <- unique(splitAsList(right_tx_id, sm)) |
|
123 |
+ ans |
|
12 | 124 |
} |
13 | 125 |
|
126 |
+setGeneric("bubbles", signature="x", |
|
127 |
+ function(x, gene_id=NA) standardGeneric("bubbles") |
|
128 |
+) |
|
129 |
+ |
|
130 |
+setMethod("bubbles", "ANY", |
|
131 |
+ function(x, gene_id=NA) |
|
132 |
+ { |
|
133 |
+ spath <- spath(x, gene_id=gene_id) |
|
134 |
+ bubbles(spath) |
|
135 |
+ } |
|
136 |
+) |
|
137 |
+ |
|
138 |
+setMethod("bubbles", "IntegerList", |
|
139 |
+ function(x, gene_id=NA) |
|
140 |
+ { |
|
141 |
+ if (!identical(gene_id, NA)) |
|
142 |
+ stop("the 'gene_id' arg is not supported ", |
|
143 |
+ "when 'x' is an IntegerList") |
|
144 |
+ .extract_bubbles_from_spath(x) |
|
145 |
+ } |
|
146 |
+) |
|
147 |
+ |
|
148 |
+### TODO: Add "bubbles" methods for data.frame and DataFrame objects. |
|
149 |
+ |
... | ... |
@@ -139,10 +139,13 @@ SplicingGraphs(x, grouping=NULL, check.introns=TRUE) |
139 | 139 |
\item The \link[IRanges]{IntegerList}, \link[IRanges]{CharacterList}, |
140 | 140 |
and \link[IRanges]{DataFrame} classes in the IRanges package. |
141 | 141 |
|
142 |
- \item \code{\link{sgedges}} for extracting the edges of a splicing graph. |
|
142 |
+ \item \code{\link{sgedges}} for extracting the edges (and nodes) of a |
|
143 |
+ splicing graph. |
|
143 | 144 |
|
144 | 145 |
\item \code{\link{sgraph}} for extracting a splicing graph as a plottable |
145 | 146 |
graph-like object. |
147 |
+ |
|
148 |
+ \item \code{\link{bubbles}} for extracting the bubbles of a splicing graph. |
|
146 | 149 |
} |
147 | 150 |
} |
148 | 151 |
|
149 | 152 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,74 @@ |
1 |
+\name{bubbles-methods} |
|
2 |
+ |
|
3 |
+\alias{bubbles-methods} |
|
4 |
+ |
|
5 |
+\alias{bubbles} |
|
6 |
+\alias{bubbles,ANY-method} |
|
7 |
+\alias{bubbles,IntegerList-method} |
|
8 |
+ |
|
9 |
+ |
|
10 |
+\title{ |
|
11 |
+ Extract the bubbles of a splicing graph |
|
12 |
+} |
|
13 |
+ |
|
14 |
+\description{ |
|
15 |
+ \code{bubbles} extracts the bubbles of the splicing graph of a given gene |
|
16 |
+ from a \link{SplicingGraphs} object. |
|
17 |
+} |
|
18 |
+ |
|
19 |
+\usage{ |
|
20 |
+bubbles(x, gene_id=NA) |
|
21 |
+} |
|
22 |
+ |
|
23 |
+\arguments{ |
|
24 |
+ \item{x}{ |
|
25 |
+ TODO |
|
26 |
+ } |
|
27 |
+ \item{gene_id}{ |
|
28 |
+ TODO |
|
29 |
+ } |
|
30 |
+} |
|
31 |
+ |
|
32 |
+\details{ |
|
33 |
+ TODO |
|
34 |
+} |
|
35 |
+ |
|
36 |
+\value{ |
|
37 |
+ TODO |
|
38 |
+} |
|
39 |
+ |
|
40 |
+\author{ |
|
41 |
+ H. Pages |
|
42 |
+} |
|
43 |
+ |
|
44 |
+\seealso{ |
|
45 |
+ \itemize{ |
|
46 |
+ \item The \link{SplicingGraphs} class. |
|
47 |
+ |
|
48 |
+ \item \code{\link{sgedges}} for extracting the edges (and nodes) of a |
|
49 |
+ splicing graph. |
|
50 |
+ |
|
51 |
+ \item \code{\link{sgraph}} for extracting a splicing graph as a |
|
52 |
+ plottable graph-like object. |
|
53 |
+ } |
|
54 |
+} |
|
55 |
+ |
|
56 |
+\examples{ |
|
57 |
+example(SplicingGraphs) # create SplicingGraphs object 'sg' |
|
58 |
+sg |
|
59 |
+ |
|
60 |
+## 'sg' has 1 element per transcript, and each transcript is |
|
61 |
+## assigned a name that is the id of the gene it belongs to. All the |
|
62 |
+## transcripts belonging to the same gene are guaranteed to be |
|
63 |
+## consecutive elements in 'sg'. |
|
64 |
+names(sg) |
|
65 |
+ |
|
66 |
+plot(sgraph(sg, gene_id="geneA", tx_id.as.edge.label=TRUE)) |
|
67 |
+bubbles(sg, gene_id="geneA") |
|
68 |
+ |
|
69 |
+plot(sgraph(sg, gene_id="geneB", tx_id.as.edge.label=TRUE)) |
|
70 |
+bubbles(sg, gene_id="geneB") |
|
71 |
+ |
|
72 |
+plot(sgraph(sg, gene_id="geneD", tx_id.as.edge.label=TRUE)) |
|
73 |
+bubbles(sg, gene_id="geneD") |
|
74 |
+} |
... | ... |
@@ -27,12 +27,12 @@ |
27 | 27 |
|
28 | 28 |
|
29 | 29 |
\title{ |
30 |
- Extract the edges of a splicing graph |
|
30 |
+ Extract the edges (and nodes) of a splicing graph |
|
31 | 31 |
} |
32 | 32 |
|
33 | 33 |
\description{ |
34 |
- Extract the edges of the splicing graph of a given gene from a |
|
35 |
- \link{SplicingGraphs} object and return it as a \link[IRanges]{DataFrame}. |
|
34 |
+ \code{sgedges} (resp. \code{sgnodes}) extracts the edges (resp. the nodes) |
|
35 |
+ of the splicing graph of a given gene from a \link{SplicingGraphs} object. |
|
36 | 36 |
} |
37 | 37 |
|
38 | 38 |
\usage{ |
... | ... |
@@ -84,6 +84,8 @@ uninformativeSSids(x, gene_id=NA) |
84 | 84 |
|
85 | 85 |
\item \code{\link{sgraph}} for extracting a splicing graph as a |
86 | 86 |
plottable graph-like object. |
87 |
+ |
|
88 |
+ \item \code{\link{bubbles}} for extracting the bubbles of a splicing graph. |
|
87 | 89 |
} |
88 | 90 |
} |
89 | 91 |
|
... | ... |
@@ -72,7 +72,10 @@ slideshow(x) |
72 | 72 |
\itemize{ |
73 | 73 |
\item The \link{SplicingGraphs} class. |
74 | 74 |
|
75 |
- \item \code{\link{sgedges}} for extracting the edges of a splicing graph. |
|
75 |
+ \item \code{\link{sgedges}} for extracting the edges (and nodes) of a |
|
76 |
+ splicing graph. |
|
77 |
+ |
|
78 |
+ \item \code{\link{bubbles}} for extracting the bubbles of a splicing graph. |
|
76 | 79 |
} |
77 | 80 |
} |
78 | 81 |
|