Browse code

Add bubbles() for extracting the bubbles of a splicing graph.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SplicingGraphs@73962 bc3139a8-67e5-0310-9ffc-ced21a209358

Herve Pages authored on 06/03/2013 07:44:09
Showing 9 changed files

... ...
@@ -1,6 +1,6 @@
1 1
 Package: SplicingGraphs
2 2
 Title: Tools for creating splicing graphs from annotations and RNA-Seq data
3
-Version: 0.3.0
3
+Version: 0.4.0
4 4
 Author: D. Bindreither, M. Carlson, M. Morgan, H. Pages
5 5
 License: Artistic-2.0
6 6
 Description: This package provides tools for creating splicing graphs based on
... ...
@@ -75,7 +75,10 @@ export(
75 75
     uninformativeSSids,
76 76
 
77 77
     ## sgraph-methods.R:
78
-    sgraph
78
+    sgraph,
79
+
80
+    ## bubbles-methods.R:
81
+    bubbles
79 82
 )
80 83
 
81 84
 ### Exactly the same list as above.
... ...
@@ -86,6 +89,7 @@ exportMethods(
86 89
     sgedges,
87 90
     sgnodes,
88 91
     uninformativeSSids,
89
-    sgraph
92
+    sgraph,
93
+    bubbles
90 94
 )
91 95
 
... ...
@@ -1,13 +1,149 @@
1
+### =========================================================================
2
+### "bubbles" methods
3
+### -------------------------------------------------------------------------
1 4
 
2
-.make_spath_matrix_from_spath <- function(spath)
5
+
6
+### Returns a DataFrame with 1 row per bubble and the following cols:
7
+### source <character>, sink <character>, left_path <IntegerList>, and
8
+### right_path <IntegerList>.
9
+### Must find at least 1 bubble if 'left_SSids' and 'right_SSids' are not
10
+### identical.
11
+.find_bubbles_in_tx_pair <- function(left_SSids, right_SSids)
3 12
 {
4
-    nodes <- sgnodes(spath)
13
+    ans_source <- ans_sink <- character(0)
14
+    ans_left_path <- ans_right_path <- IntegerList()
15
+    left_len <- length(left_SSids)
16
+    right_len <- length(right_SSids)
17
+    i <- j <- 1L
18
+    prev_left <- prev_right <- "R"
19
+    in_bubble <- FALSE
20
+    while (i <= left_len + 1L || j <= right_len + 1L) {
21
+        if (i <= left_len) {
22
+            curr_left <- left_SSids[i]
23
+        } else {
24
+            curr_left <- .Machine$integer.max
25
+        }
26
+        if (j <= right_len) {
27
+            curr_right <- right_SSids[j]
28
+        } else {
29
+            curr_right <- .Machine$integer.max
30
+        }
31
+        if (curr_left == curr_right) {
32
+            if (in_bubble) {
33
+                ## End of the current bubble.
34
+                in_bubble <- FALSE
35
+                if (curr_left == .Machine$integer.max) {
36
+                    sink <- "L"
37
+                } else {
38
+                    sink <- as.character(curr_left)
39
+                }
40
+                ans_sink <- c(ans_sink, sink)
41
+                ans_left_path <- c(ans_left_path,
42
+                                   IntegerList(bubble_left_path))
43
+                ans_right_path <- c(ans_right_path,
44
+                                    IntegerList(bubble_right_path))
45
+            }
46
+            prev_left <- curr_left
47
+            i <- i + 1L
48
+            prev_right <- curr_right
49
+            j <- j + 1L
50
+            next
51
+        }
52
+        if (!in_bubble) {
53
+            ## Start a new bubble.
54
+            in_bubble <- TRUE
55
+            ans_source <- c(ans_source, as.character(prev_left))
56
+            bubble_left_path <- bubble_right_path <- integer(0)
57
+        }
58
+        if (curr_left < curr_right) {
59
+            bubble_left_path <- c(bubble_left_path, curr_left)
60
+            prev_left <- curr_left
61
+            i <- i + 1L
62
+        } else {
63
+            bubble_right_path <- c(bubble_right_path, curr_right)
64
+            prev_right <- curr_right
65
+            j <- j + 1L
66
+        }
67
+    }
68
+    DataFrame(source=ans_source,
69
+              sink=ans_sink,
70
+              left_path=ans_left_path,
71
+              right_path=ans_right_path)
5 72
 }
6 73
 
7
-findBubbles <- function(x, gene_id=NA)
74
+.extract_bubbles_from_spath <- function(spath)
8 75
 {
9
-    spath <- spath(sg, gene_id=gene_id)
10
-    spath_mat <- .make_spath_matrix_from_spath(spath)
11
-    spath_mat
76
+    ntx <- length(spath)
77
+    if (ntx <= 1L) {
78
+        ## No bubbles.
79
+        bubbles <- .find_bubbles_in_tx_pair(integer(0), integer(0))
80
+        bubbles <- cbind(bubbles, DataFrame(left_tx_id=character(0),
81
+                                            right_tx_id=character(0)))
82
+        return(bubbles)
83
+    }
84
+    tx_id <- names(spath)
85
+    if (is.null(tx_id))
86
+        tx_id <- seq_along(spath)
87
+
88
+    npairs <- (ntx * (ntx - 1L)) %/% 2L
89
+    all_bubbles <- vector(mode="list", length=npairs)
90
+    z <- 1L
91
+    for (i in 1:(ntx-1L)) {
92
+        left_SSids <- spath[[i]]
93
+        left_tx_id <- tx_id[i]
94
+        for (j in (i+1L):ntx) {
95
+            right_SSids <- spath[[j]]
96
+            right_tx_id <- tx_id[j]
97
+            bubbles <- .find_bubbles_in_tx_pair(left_SSids, right_SSids)
98
+            bubbles <- cbind(bubbles, DataFrame(left_tx_id=left_tx_id,
99
+                                                right_tx_id=right_tx_id))
100
+            all_bubbles[[z]] <- bubbles
101
+            z <- z + 1L
102
+        }
103
+    }
104
+    ans <- do.call(rbind, all_bubbles)
105
+
106
+    ## Remove duplicate bubbles.
107
+    keys <- c("source", "sink", "left_path", "right_path")
108
+    key1 <- ans[ , "source"]
109
+    key2 <- ans[ , "sink"]
110
+    key3 <- unlist(lapply(ans[ , "left_path"], paste, collapse=","),
111
+                   use.names=FALSE)
112
+    key4 <- unlist(lapply(ans[ , "right_path"], paste, collapse=","),
113
+                   use.names=FALSE)
114
+    key <- paste(key1, key2, key3, key4, sep="|")
115
+    left_tx_id <- ans[ , "left_tx_id"]
116
+    right_tx_id <- ans[ , "right_tx_id"]
117
+    sm <- match(key, key)
118
+    is_not_dup <- sm == seq_along(sm)
119
+    ans <- ans[is_not_dup, keys, drop=FALSE]
120
+    rownames(ans) <- NULL
121
+    ans$left_tx_id <- unique(splitAsList(left_tx_id, sm))
122
+    ans$right_tx_id <- unique(splitAsList(right_tx_id, sm))
123
+    ans
12 124
 }
13 125
 
126
+setGeneric("bubbles", signature="x",
127
+    function(x, gene_id=NA) standardGeneric("bubbles")
128
+)
129
+
130
+setMethod("bubbles", "ANY",
131
+    function(x, gene_id=NA)
132
+    {
133
+        spath <- spath(x, gene_id=gene_id)
134
+        bubbles(spath)
135
+    }
136
+)
137
+
138
+setMethod("bubbles", "IntegerList",
139
+    function(x, gene_id=NA)
140
+    {
141
+        if (!identical(gene_id, NA))
142
+            stop("the 'gene_id' arg is not supported ",
143
+                 "when 'x' is an IntegerList")
144
+        .extract_bubbles_from_spath(x)
145
+    }
146
+)
147
+
148
+### TODO: Add "bubbles" methods for data.frame and DataFrame objects.
149
+
... ...
@@ -1,5 +1,5 @@
1 1
 ### =========================================================================
2
-### sgedges (and related) methods
2
+### "sgedges" (and related) methods
3 3
 ### -------------------------------------------------------------------------
4 4
 
5 5
 
... ...
@@ -1,5 +1,5 @@
1 1
 ### =========================================================================
2
-### sgraph (and related) methods
2
+### "sgraph" (and related) methods
3 3
 ### -------------------------------------------------------------------------
4 4
 
5 5
 
... ...
@@ -139,10 +139,13 @@ SplicingGraphs(x, grouping=NULL, check.introns=TRUE)
139 139
     \item The \link[IRanges]{IntegerList}, \link[IRanges]{CharacterList},
140 140
           and \link[IRanges]{DataFrame} classes in the IRanges package.
141 141
 
142
-    \item \code{\link{sgedges}} for extracting the edges of a splicing graph.
142
+    \item \code{\link{sgedges}} for extracting the edges (and nodes) of a
143
+          splicing graph.
143 144
 
144 145
     \item \code{\link{sgraph}} for extracting a splicing graph as a plottable
145 146
           graph-like object.
147
+
148
+    \item \code{\link{bubbles}} for extracting the bubbles of a splicing graph.
146 149
   }
147 150
 }
148 151
 
149 152
new file mode 100644
... ...
@@ -0,0 +1,74 @@
1
+\name{bubbles-methods}
2
+
3
+\alias{bubbles-methods}
4
+
5
+\alias{bubbles}
6
+\alias{bubbles,ANY-method}
7
+\alias{bubbles,IntegerList-method}
8
+
9
+
10
+\title{
11
+  Extract the bubbles of a splicing graph
12
+}
13
+
14
+\description{
15
+  \code{bubbles} extracts the bubbles of the splicing graph of a given gene
16
+  from a \link{SplicingGraphs} object.
17
+}
18
+
19
+\usage{
20
+bubbles(x, gene_id=NA)
21
+}
22
+
23
+\arguments{
24
+  \item{x}{
25
+    TODO
26
+  }
27
+  \item{gene_id}{
28
+    TODO
29
+  }
30
+}
31
+
32
+\details{
33
+  TODO
34
+}
35
+
36
+\value{
37
+  TODO
38
+}
39
+
40
+\author{
41
+  H. Pages
42
+}
43
+
44
+\seealso{
45
+  \itemize{
46
+    \item The \link{SplicingGraphs} class.
47
+
48
+    \item \code{\link{sgedges}} for extracting the edges (and nodes) of a
49
+          splicing graph.
50
+
51
+    \item \code{\link{sgraph}} for extracting a splicing graph as a
52
+          plottable graph-like object.
53
+  }
54
+}
55
+
56
+\examples{
57
+example(SplicingGraphs)  # create SplicingGraphs object 'sg'
58
+sg
59
+
60
+## 'sg' has 1 element per transcript, and each transcript is
61
+## assigned a name that is the id of the gene it belongs to. All the
62
+## transcripts belonging to the same gene are guaranteed to be
63
+## consecutive elements in 'sg'.
64
+names(sg)
65
+
66
+plot(sgraph(sg, gene_id="geneA", tx_id.as.edge.label=TRUE))
67
+bubbles(sg, gene_id="geneA")
68
+
69
+plot(sgraph(sg, gene_id="geneB", tx_id.as.edge.label=TRUE))
70
+bubbles(sg, gene_id="geneB")
71
+
72
+plot(sgraph(sg, gene_id="geneD", tx_id.as.edge.label=TRUE))
73
+bubbles(sg, gene_id="geneD")
74
+}
... ...
@@ -27,12 +27,12 @@
27 27
 
28 28
 
29 29
 \title{
30
-  Extract the edges of a splicing graph
30
+  Extract the edges (and nodes) of a splicing graph
31 31
 }
32 32
 
33 33
 \description{
34
-  Extract the edges of the splicing graph of a given gene from a
35
-  \link{SplicingGraphs} object and return it as a \link[IRanges]{DataFrame}.
34
+  \code{sgedges} (resp. \code{sgnodes}) extracts the edges (resp. the nodes)
35
+  of the splicing graph of a given gene from a \link{SplicingGraphs} object.
36 36
 }
37 37
 
38 38
 \usage{
... ...
@@ -84,6 +84,8 @@ uninformativeSSids(x, gene_id=NA)
84 84
 
85 85
     \item \code{\link{sgraph}} for extracting a splicing graph as a
86 86
           plottable graph-like object.
87
+
88
+    \item \code{\link{bubbles}} for extracting the bubbles of a splicing graph.
87 89
   }
88 90
 }
89 91
 
... ...
@@ -72,7 +72,10 @@ slideshow(x)
72 72
   \itemize{
73 73
     \item The \link{SplicingGraphs} class.
74 74
 
75
-    \item \code{\link{sgedges}} for extracting the edges of a splicing graph.
75
+    \item \code{\link{sgedges}} for extracting the edges (and nodes) of a
76
+          splicing graph.
77
+
78
+    \item \code{\link{bubbles}} for extracting the bubbles of a splicing graph.
76 79
   }
77 80
 }
78 81