... | ... |
@@ -159,6 +159,27 @@ |
159 | 159 |
#.extract_bubbles_from_txpathmat <- function(txpathmat, outdeg, indeg) |
160 | 160 |
.extract_bubbles_from_txpathmat <- function(txpathmat) |
161 | 161 |
{ |
162 |
+ ## The call to .get_bubble_variants() in the inner loop below calls |
|
163 |
+ ## order() on a character vector. Unfortunately the behavior of order() |
|
164 |
+ ## on a character vector depends on the collating sequence of the locale |
|
165 |
+ ## in use! |
|
166 |
+ ## So we set LC_COLLATE to C so that (1) the ouput of order() on a |
|
167 |
+ ## character vector is platform/country independent, and (2) it will |
|
168 |
+ ## behave the same way when called in the context of the unit tests |
|
169 |
+ ## run by 'R CMD check' ('R CMD check' also sets the LC_COLLATE to C |
|
170 |
+ ## when running the tests) vs when called in the context of an |
|
171 |
+ ## interactive session. |
|
172 |
+ ## Another advantage of doing this is that order() seems to be at least |
|
173 |
+ ## 2x faster when LC_COLLATE is set to C vs to something like en_US.UTF-8, |
|
174 |
+ ## which comes as a pleasant surprise! |
|
175 |
+ ## TODO: Maybe we should define an strorder() function in |
|
176 |
+ ## S4Vectors/R/str-utils.R for portable/deterministic ordering of a |
|
177 |
+ ## character vector. See R/utils.R in the GenomicFeatures package |
|
178 |
+ ## for a similar discussion about using rank() on a character vector. |
|
179 |
+ prev_locale <- Sys.getlocale("LC_COLLATE") |
|
180 |
+ Sys.setlocale("LC_COLLATE", "C") |
|
181 |
+ on.exit(Sys.setlocale("LC_COLLATE", prev_locale)) |
|
182 |
+ |
|
162 | 183 |
sgnodetypes <- .get_sgnodetypes_from_txpathmat(txpathmat) |
163 | 184 |
ans_source <- ans_sink <- ans_AScode <- character(0) |
164 | 185 |
ans_d <- integer(0) |
... | ... |
@@ -185,13 +185,13 @@ |
185 | 185 |
ans_d <- c(ans_d, bubble_d) |
186 | 186 |
## Format the bubble partitions. |
187 | 187 |
bubble_partitions <- bubble_variants[ , "partition"] |
188 |
- bubble_partitions <- sapply(bubble_partitions, paste, collapse=",") |
|
188 |
+ bubble_partitions <- sapply(bubble_partitions, base::paste, collapse=",") |
|
189 | 189 |
bubble_partitions <- paste0("{", bubble_partitions, "}") |
190 | 190 |
ans_partitions <- c(ans_partitions, |
191 | 191 |
CharacterList(bubble_partitions)) |
192 | 192 |
## Format the bubble paths. |
193 | 193 |
bubble_paths <- bubble_variants[ , "path"] |
194 |
- bubble_paths <- sapply(bubble_paths, paste, collapse=",") |
|
194 |
+ bubble_paths <- sapply(bubble_paths, base::paste, collapse=",") |
|
195 | 195 |
bubble_paths <- paste0("{", bubble_paths, "}") |
196 | 196 |
ans_paths <- c(ans_paths, CharacterList(bubble_paths)) |
197 | 197 |
## Format the bubble AScode. |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SplicingGraphs@75014 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -236,15 +236,15 @@ setMethod("bubbles", "SplicingGraphs", |
236 | 236 |
setMethod("bubbles", "ANY", |
237 | 237 |
function(x) |
238 | 238 |
{ |
239 |
- txpaths <- txpaths(x) |
|
240 |
- bubbles(txpaths) |
|
239 |
+ txpath <- txpath(x) |
|
240 |
+ bubbles(txpath) |
|
241 | 241 |
} |
242 | 242 |
) |
243 | 243 |
|
244 | 244 |
setMethod("bubbles", "IntegerList", |
245 | 245 |
function(x) |
246 | 246 |
{ |
247 |
- txpathmat <- make_matrix_from_txpaths(x) |
|
247 |
+ txpathmat <- make_matrix_from_txpath(x) |
|
248 | 248 |
#outdeg <- outdeg(x) |
249 | 249 |
#indeg <- indeg(x) |
250 | 250 |
#.extract_bubbles_from_txpathmat(txpathmat, outdeg, indeg) |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SplicingGraphs@74373 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -205,7 +205,7 @@ |
205 | 205 |
partitions=ans_partitions, |
206 | 206 |
paths=ans_paths, |
207 | 207 |
AScode=ans_AScode, |
208 |
- description=unname(AScode2desc[ans_AScode])) |
|
208 |
+ description=unname(ASCODE2DESC[ans_AScode])) |
|
209 | 209 |
} |
210 | 210 |
|
211 | 211 |
|
... | ... |
@@ -217,6 +217,22 @@ setGeneric("bubbles", signature="x", |
217 | 217 |
function(x) standardGeneric("bubbles") |
218 | 218 |
) |
219 | 219 |
|
220 |
+setMethod("bubbles", "SplicingGraphs", |
|
221 |
+ function(x) |
|
222 |
+ { |
|
223 |
+ if (length(x) != 1L) |
|
224 |
+ stop("'x' must be a SplicingGraphs object of length 1") |
|
225 |
+ bubbles_cache <- [email protected]_cache |
|
226 |
+ ans <- try(get(names(x), envir=bubbles_cache, inherits=FALSE), |
|
227 |
+ silent=TRUE) |
|
228 |
+ if (is(ans, "try-error")) { |
|
229 |
+ ans <- callNextMethod() |
|
230 |
+ assign(names(x), ans, envir=bubbles_cache) |
|
231 |
+ } |
|
232 |
+ ans |
|
233 |
+ } |
|
234 |
+) |
|
235 |
+ |
|
220 | 236 |
setMethod("bubbles", "ANY", |
221 | 237 |
function(x) |
222 | 238 |
{ |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SplicingGraphs@74370 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -37,132 +37,6 @@ |
37 | 37 |
} |
38 | 38 |
|
39 | 39 |
|
40 |
-### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
41 |
-### .extract_ASevents2_from_txpaths() |
|
42 |
-### |
|
43 |
- |
|
44 |
-### Returns a DataFrame with 1 row per AS event and the following cols: |
|
45 |
-### source <character>, sink <character>, left_path <IntegerList>, and |
|
46 |
-### right_path <IntegerList>. |
|
47 |
-### Must find at least 1 AS event if 'left_txpath' and 'right_txpath' are |
|
48 |
-### not identical. |
|
49 |
-.extract_ASevents2_from_tx_pair <- function(left_txpath, right_txpath) |
|
50 |
-{ |
|
51 |
- ans_source <- ans_sink <- character(0) |
|
52 |
- ans_left_path <- ans_right_path <- IntegerList() |
|
53 |
- left_len <- length(left_txpath) |
|
54 |
- right_len <- length(right_txpath) |
|
55 |
- i <- j <- 1L |
|
56 |
- in_ASevent <- FALSE |
|
57 |
- while (i <= left_len + 1L || j <= right_len + 1L) { |
|
58 |
- if (i <= left_len) { |
|
59 |
- curr_left_SSid <- left_txpath[i] |
|
60 |
- } else { |
|
61 |
- curr_left_SSid <- .Machine$integer.max |
|
62 |
- } |
|
63 |
- if (j <= right_len) { |
|
64 |
- curr_right_SSid <- right_txpath[j] |
|
65 |
- } else { |
|
66 |
- curr_right_SSid <- .Machine$integer.max |
|
67 |
- } |
|
68 |
- if (curr_left_SSid == curr_right_SSid) { |
|
69 |
- if (in_ASevent) { |
|
70 |
- ## End of the current AS event. |
|
71 |
- in_ASevent <- FALSE |
|
72 |
- if (curr_left_SSid == .Machine$integer.max) { |
|
73 |
- sink <- "L" |
|
74 |
- } else { |
|
75 |
- sink <- as.character(curr_left_SSid) |
|
76 |
- } |
|
77 |
- ans_sink <- c(ans_sink, sink) |
|
78 |
- ans_left_path <- c(ans_left_path, |
|
79 |
- IntegerList(ASevent_left_path)) |
|
80 |
- ans_right_path <- c(ans_right_path, |
|
81 |
- IntegerList(ASevent_right_path)) |
|
82 |
- } |
|
83 |
- i <- i + 1L |
|
84 |
- j <- j + 1L |
|
85 |
- next |
|
86 |
- } |
|
87 |
- if (!in_ASevent) { |
|
88 |
- ## Start a new AS event. |
|
89 |
- in_ASevent <- TRUE |
|
90 |
- if (i == 1L) { |
|
91 |
- ASevent_source <- "R" |
|
92 |
- } else { |
|
93 |
- ASevent_source <- as.character(left_txpath[i - 1L]) |
|
94 |
- } |
|
95 |
- ans_source <- c(ans_source, ASevent_source) |
|
96 |
- ASevent_left_path <- ASevent_right_path <- integer(0) |
|
97 |
- } |
|
98 |
- if (curr_left_SSid < curr_right_SSid) { |
|
99 |
- ASevent_left_path <- c(ASevent_left_path, curr_left_SSid) |
|
100 |
- i <- i + 1L |
|
101 |
- } else { |
|
102 |
- ASevent_right_path <- c(ASevent_right_path, curr_right_SSid) |
|
103 |
- j <- j + 1L |
|
104 |
- } |
|
105 |
- } |
|
106 |
- DataFrame(source=ans_source, |
|
107 |
- sink=ans_sink, |
|
108 |
- left_path=ans_left_path, |
|
109 |
- right_path=ans_right_path) |
|
110 |
-} |
|
111 |
- |
|
112 |
-.extract_ASevents2_from_txpaths <- function(txpaths) |
|
113 |
-{ |
|
114 |
- ntx <- length(txpaths) |
|
115 |
- if (ntx <= 1L) { |
|
116 |
- ## No AS events. |
|
117 |
- ASevents2 <- .extract_ASevents2_from_tx_pair(integer(0), integer(0)) |
|
118 |
- ans <- cbind(ASevents2, DataFrame(left_tx_id=character(0), |
|
119 |
- right_tx_id=character(0))) |
|
120 |
- return(ans) |
|
121 |
- } |
|
122 |
- tx_id <- names(txpaths) |
|
123 |
- if (is.null(tx_id)) |
|
124 |
- tx_id <- seq_along(txpaths) |
|
125 |
- |
|
126 |
- npairs <- (ntx * (ntx - 1L)) %/% 2L |
|
127 |
- all_ASevents2 <- vector(mode="list", length=npairs) |
|
128 |
- z <- 1L |
|
129 |
- for (i in 1:(ntx-1L)) { |
|
130 |
- left_txpath <- txpaths[[i]] |
|
131 |
- left_tx_id <- tx_id[i] |
|
132 |
- for (j in (i+1L):ntx) { |
|
133 |
- right_txpath <- txpaths[[j]] |
|
134 |
- right_tx_id <- tx_id[j] |
|
135 |
- ASevents2 <- .extract_ASevents2_from_tx_pair(left_txpath, |
|
136 |
- right_txpath) |
|
137 |
- ASevents2 <- cbind(ASevents2, DataFrame(left_tx_id=left_tx_id, |
|
138 |
- right_tx_id=right_tx_id)) |
|
139 |
- all_ASevents2[[z]] <- ASevents2 |
|
140 |
- z <- z + 1L |
|
141 |
- } |
|
142 |
- } |
|
143 |
- ans <- do.call(rbind, all_ASevents2) |
|
144 |
- |
|
145 |
- ## Remove duplicate AS events. |
|
146 |
- keys <- c("source", "sink", "left_path", "right_path") |
|
147 |
- key1 <- ans[ , "source"] |
|
148 |
- key2 <- ans[ , "sink"] |
|
149 |
- key3 <- unlist(lapply(ans[ , "left_path"], paste, collapse=","), |
|
150 |
- use.names=FALSE) |
|
151 |
- key4 <- unlist(lapply(ans[ , "right_path"], paste, collapse=","), |
|
152 |
- use.names=FALSE) |
|
153 |
- key <- paste(key1, key2, key3, key4, sep="|") |
|
154 |
- left_tx_id <- ans[ , "left_tx_id"] |
|
155 |
- right_tx_id <- ans[ , "right_tx_id"] |
|
156 |
- sm <- match(key, key) |
|
157 |
- is_not_dup <- sm == seq_along(sm) |
|
158 |
- ans <- ans[is_not_dup, keys, drop=FALSE] |
|
159 |
- rownames(ans) <- NULL |
|
160 |
- ans$left_tx_id <- unique(splitAsList(left_tx_id, sm)) |
|
161 |
- ans$right_tx_id <- unique(splitAsList(right_tx_id, sm)) |
|
162 |
- ans |
|
163 |
-} |
|
164 |
- |
|
165 |
- |
|
166 | 40 |
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
167 | 41 |
### .extract_bubbles_from_txpathmat() |
168 | 42 |
### |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SplicingGraphs@74211 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -340,23 +340,20 @@ |
340 | 340 |
### |
341 | 341 |
|
342 | 342 |
setGeneric("bubbles", signature="x", |
343 |
- function(x, gene_id=NA) standardGeneric("bubbles") |
|
343 |
+ function(x) standardGeneric("bubbles") |
|
344 | 344 |
) |
345 | 345 |
|
346 | 346 |
setMethod("bubbles", "ANY", |
347 |
- function(x, gene_id=NA) |
|
347 |
+ function(x) |
|
348 | 348 |
{ |
349 |
- txpaths <- txpaths(x, gene_id=gene_id) |
|
349 |
+ txpaths <- txpaths(x) |
|
350 | 350 |
bubbles(txpaths) |
351 | 351 |
} |
352 | 352 |
) |
353 | 353 |
|
354 | 354 |
setMethod("bubbles", "IntegerList", |
355 |
- function(x, gene_id=NA) |
|
355 |
+ function(x) |
|
356 | 356 |
{ |
357 |
- if (!identical(gene_id, NA)) |
|
358 |
- stop("the 'gene_id' arg is not supported ", |
|
359 |
- "when 'x' is an IntegerList") |
|
360 | 357 |
txpathmat <- make_matrix_from_txpaths(x) |
361 | 358 |
#outdeg <- outdeg(x) |
362 | 359 |
#indeg <- indeg(x) |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SplicingGraphs@74063 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -280,7 +280,8 @@ |
280 | 280 |
|
281 | 281 |
### Returns a DataFrame with 1 row per bubble and the following cols: |
282 | 282 |
### source <character>, sink <character>, d <integer>, partitions |
283 |
-### <CharacterList>, paths <CharacterList>, and AScode <character>. |
|
283 |
+### <CharacterList>, paths <CharacterList>, AScode <character>, and |
|
284 |
+### description <character>. |
|
284 | 285 |
#.extract_bubbles_from_txpathmat <- function(txpathmat, outdeg, indeg) |
285 | 286 |
.extract_bubbles_from_txpathmat <- function(txpathmat) |
286 | 287 |
{ |
... | ... |
@@ -329,7 +330,8 @@ |
329 | 330 |
d=ans_d, |
330 | 331 |
partitions=ans_partitions, |
331 | 332 |
paths=ans_paths, |
332 |
- AScode=ans_AScode) |
|
333 |
+ AScode=ans_AScode, |
|
334 |
+ description=unname(AScode2desc[ans_AScode])) |
|
333 | 335 |
} |
334 | 336 |
|
335 | 337 |
|
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SplicingGraphs@74059 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -177,7 +177,17 @@ |
177 | 177 |
for (k in (i+1L):(j-1L)) |
178 | 178 |
if (all(txpathmat[ , k] >= txbase)) |
179 | 179 |
return(FALSE) |
180 |
- return(TRUE) |
|
180 |
+ TRUE |
|
181 |
+} |
|
182 |
+ |
|
183 |
+### A fancy alternative to .is_bubble() that tries to avoid the for loop. |
|
184 |
+### Surprisingly, it turned out to be slower than .is_bubble() in practise. |
|
185 |
+.is_bubble2 <- function(txpathmat, i, j) |
|
186 |
+{ |
|
187 |
+ txbase <- txpathmat[, i] & txpathmat[, j] |
|
188 |
+ if (sum(txbase) <= 1L) |
|
189 |
+ return(FALSE) |
|
190 |
+ all(colSums(txpathmat[ , (i+1L):(j-1L), drop=FALSE] < txbase) >= 1) |
|
181 | 191 |
} |
182 | 192 |
|
183 | 193 |
### Assumes 'm' to be a logical matrix. Not checked. |
... | ... |
@@ -271,15 +281,21 @@ |
271 | 281 |
### Returns a DataFrame with 1 row per bubble and the following cols: |
272 | 282 |
### source <character>, sink <character>, d <integer>, partitions |
273 | 283 |
### <CharacterList>, paths <CharacterList>, and AScode <character>. |
284 |
+#.extract_bubbles_from_txpathmat <- function(txpathmat, outdeg, indeg) |
|
274 | 285 |
.extract_bubbles_from_txpathmat <- function(txpathmat) |
275 | 286 |
{ |
276 | 287 |
sgnodetypes <- .get_sgnodetypes_from_txpathmat(txpathmat) |
277 | 288 |
ans_source <- ans_sink <- ans_AScode <- character(0) |
278 | 289 |
ans_d <- integer(0) |
279 | 290 |
ans_partitions <- ans_paths <- CharacterList() |
291 |
+ ## Surprisingly, the outdeg/indeg trick turned out to not make any |
|
292 |
+ ## noticeable speed difference in practise. |
|
293 |
+ #ii0 <- which(outdeg >= 2L) |
|
294 |
+ #jj0 <- which(indeg >= 2L) |
|
295 |
+ #for (i in ii0) { |
|
296 |
+ # jj <- jj0[jj0 >= i + 2L] |
|
297 |
+ # for (j in jj) { |
|
280 | 298 |
ncol <- ncol(txpathmat) |
281 |
- ## TODO: Walk on informative nodes only (this should result in a |
|
282 |
- ## significant speedup). |
|
283 | 299 |
for (i in 1:(ncol-2L)) { |
284 | 300 |
for (j in (i+2L):ncol) { |
285 | 301 |
if (!.is_bubble(txpathmat, i, j)) |
... | ... |
@@ -339,8 +355,10 @@ setMethod("bubbles", "IntegerList", |
339 | 355 |
if (!identical(gene_id, NA)) |
340 | 356 |
stop("the 'gene_id' arg is not supported ", |
341 | 357 |
"when 'x' is an IntegerList") |
342 |
- #.extract_ASevents2_from_txpaths(x) |
|
343 | 358 |
txpathmat <- make_matrix_from_txpaths(x) |
359 |
+ #outdeg <- outdeg(x) |
|
360 |
+ #indeg <- indeg(x) |
|
361 |
+ #.extract_bubbles_from_txpathmat(txpathmat, outdeg, indeg) |
|
344 | 362 |
.extract_bubbles_from_txpathmat(txpathmat) |
345 | 363 |
} |
346 | 364 |
) |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SplicingGraphs@74047 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -3,6 +3,40 @@ |
3 | 3 |
### ------------------------------------------------------------------------- |
4 | 4 |
|
5 | 5 |
|
6 |
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
7 |
+### .get_sgnodetypes_from_txpathmat() |
|
8 |
+### |
|
9 |
+ |
|
10 |
+### Supported types are 1 (exon start), 2 (exon end), 0 (R or L nodes). |
|
11 |
+### Returns an integer vector of length the nb of cols in 'txpathmat' and |
|
12 |
+### named with 'txpathmat' colnames. |
|
13 |
+### TODO: (a) Add an sgnodetypes() accessor to sgedges-methods.R, (b) move |
|
14 |
+### the .get_sgnodetypes_from_txpathmat() helper to that file, and (c) use |
|
15 |
+### it internally in sgnodetypes(). |
|
16 |
+.get_sgnodetypes_from_txpathmat <- function(txpathmat, check.matrix=FALSE) |
|
17 |
+{ |
|
18 |
+ ans <- integer(ncol(txpathmat)) |
|
19 |
+ names(ans) <- colnames(txpathmat) |
|
20 |
+ for (i in seq_len(nrow(txpathmat))) { |
|
21 |
+ idx <- which(txpathmat[i, , drop=FALSE]) |
|
22 |
+ if (length(idx) <= 2L) |
|
23 |
+ next() |
|
24 |
+ idx <- idx[-c(1L, length(idx))] |
|
25 |
+ exon_start_idx <- idx[c(TRUE, FALSE)] |
|
26 |
+ exon_end_idx <- idx[c(FALSE, TRUE)] |
|
27 |
+ if (check.matrix) { |
|
28 |
+ if (any(ans[exon_start_idx] == 2L) || any(ans[exon_end_idx] == 1L)) |
|
29 |
+ stop("invalid matrix of transcript paths: ", |
|
30 |
+ "some columns in 'txpathmat' seem to correspond ", |
|
31 |
+ "at the same time to an exon start and an exon end") |
|
32 |
+ } |
|
33 |
+ ans[exon_start_idx] <- 1L |
|
34 |
+ ans[exon_end_idx] <- 2L |
|
35 |
+ } |
|
36 |
+ ans |
|
37 |
+} |
|
38 |
+ |
|
39 |
+ |
|
6 | 40 |
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
7 | 41 |
### .extract_ASevents2_from_txpaths() |
8 | 42 |
### |
... | ... |
@@ -133,16 +167,153 @@ |
133 | 167 |
### .extract_bubbles_from_txpathmat() |
134 | 168 |
### |
135 | 169 |
|
170 |
+### We must have 1 <= i, j <= ncol(txpathmat), and j - i >= 2. This is not |
|
171 |
+### checked! |
|
172 |
+.is_bubble <- function(txpathmat, i, j) |
|
173 |
+{ |
|
174 |
+ txbase <- txpathmat[, i] & txpathmat[, j] |
|
175 |
+ if (sum(txbase) <= 1L) |
|
176 |
+ return(FALSE) |
|
177 |
+ for (k in (i+1L):(j-1L)) |
|
178 |
+ if (all(txpathmat[ , k] >= txbase)) |
|
179 |
+ return(FALSE) |
|
180 |
+ return(TRUE) |
|
181 |
+} |
|
182 |
+ |
|
183 |
+### Assumes 'm' to be a logical matrix. Not checked. |
|
184 |
+.make_bit_strings_from_logical_matrix <- function(m) |
|
185 |
+{ |
|
186 |
+ apply(m, 1L, function(x) paste(as.integer(x), collapse="")) |
|
187 |
+} |
|
188 |
+ |
|
189 |
+### Assumes 's' to be a vector of equal-length strings made of 0s and 1s. Not |
|
190 |
+### checked. |
|
191 |
+.make_logical_matrix_from_bit_strings <- function(s) |
|
192 |
+{ |
|
193 |
+ all_letters <- unlist(strsplit(s, NULL, fixed=TRUE), use.names=FALSE) |
|
194 |
+ matrix(as.logical(as.integer(all_letters)), nrow=length(s), byrow=TRUE, |
|
195 |
+ dimnames=list(names(s), NULL)) |
|
196 |
+} |
|
197 |
+ |
|
198 |
+### Returns a DataFrame with 1 row per variant and the following cols: |
|
199 |
+### partition <CharacterList>, path <CharacterList>, and code <character>. |
|
200 |
+.get_bubble_variants <- function(txpathmat, sgnodetypes, i, j) |
|
201 |
+{ |
|
202 |
+ txbase <- txpathmat[, i] & txpathmat[, j] |
|
203 |
+ bubble_submat <- txpathmat[txbase, (i+1L):(j-1L), drop=FALSE] |
|
204 |
+ |
|
205 |
+ ## Remove cols with FALSEs only. |
|
206 |
+ bubble_submat <- bubble_submat[ , colSums(bubble_submat) != 0L, drop=FALSE] |
|
207 |
+ bubble_submat_rownames <- rownames(bubble_submat) |
|
208 |
+ bubble_submat_colnames <- colnames(bubble_submat) |
|
209 |
+ |
|
210 |
+ ## Compute variant paths (1 per row). |
|
211 |
+ ans_path <- CharacterList( |
|
212 |
+ lapply(seq_len(nrow(bubble_submat)), |
|
213 |
+ function(i) |
|
214 |
+ bubble_submat_colnames[bubble_submat[i, ]]) |
|
215 |
+ ) |
|
216 |
+ |
|
217 |
+ ## Compute variant relative paths (1 per row). |
|
218 |
+ ans_rpath <- IntegerList( |
|
219 |
+ lapply(seq_len(nrow(bubble_submat)), |
|
220 |
+ function(i) |
|
221 |
+ which(bubble_submat[i, ])) |
|
222 |
+ ) |
|
223 |
+ |
|
224 |
+ ## Compute variant code (1 per row). |
|
225 |
+ ans_code <- sapply(seq_len(length(ans_path)), |
|
226 |
+ function(k) |
|
227 |
+ { |
|
228 |
+ path <- ans_path[[k]] |
|
229 |
+ path_len <- length(path) |
|
230 |
+ if (path_len == 0L) |
|
231 |
+ return("0") |
|
232 |
+ types <- c("-", "^")[sgnodetypes[path]] |
|
233 |
+ ## Sanity check would fail if 'sgnodetypes[path]' |
|
234 |
+ ## contained 0s but this should never happen. |
|
235 |
+ if (length(types) != path_len) |
|
236 |
+ stop("some splicing sites in variant path ", |
|
237 |
+ "are of type 0") |
|
238 |
+ if (i == 1L) |
|
239 |
+ types[1L] <- "[" |
|
240 |
+ if (j == ncol(txpathmat)) |
|
241 |
+ types[length(types)] <- "]" |
|
242 |
+ paste0(ans_rpath[[k]], types, collapse="") |
|
243 |
+ }) |
|
244 |
+ |
|
245 |
+ ## Order the variants by lexicographic order on their code. |
|
246 |
+ oo <- order(ans_code) |
|
247 |
+ bubble_submat_rownames <- bubble_submat_rownames[oo] |
|
248 |
+ ans_path <- ans_path[oo] |
|
249 |
+ #ans_rpath <- ans_rpath[oo] |
|
250 |
+ ans_code <- ans_code[oo] |
|
251 |
+ |
|
252 |
+ ## Identify unique variants. |
|
253 |
+ ans_code1 <- ans_code[-length(ans_code)] |
|
254 |
+ ans_code2 <- ans_code[-1L] |
|
255 |
+ is_not_dup <- c(TRUE, ans_code1 != ans_code2) |
|
256 |
+ ans_path <- ans_path[is_not_dup] |
|
257 |
+ #ans_rpath <- ans_rpath[is_not_dup] |
|
258 |
+ ans_code <- ans_code[is_not_dup] |
|
259 |
+ |
|
260 |
+ ## Compute variant partitions. |
|
261 |
+ ans_partition <- unname(splitAsList(bubble_submat_rownames, |
|
262 |
+ cumsum(is_not_dup))) |
|
263 |
+ |
|
264 |
+ ## Make and return the DataFrame. |
|
265 |
+ DataFrame(partition=ans_partition, |
|
266 |
+ path=ans_path, |
|
267 |
+ #rpath=ans_rpath, |
|
268 |
+ code=ans_code) |
|
269 |
+} |
|
270 |
+ |
|
136 | 271 |
### Returns a DataFrame with 1 row per bubble and the following cols: |
137 |
-### source <character>, sink <character>, d <integer>. |
|
272 |
+### source <character>, sink <character>, d <integer>, partitions |
|
273 |
+### <CharacterList>, paths <CharacterList>, and AScode <character>. |
|
138 | 274 |
.extract_bubbles_from_txpathmat <- function(txpathmat) |
139 | 275 |
{ |
140 |
- stop("not ready yet, sorry!") |
|
276 |
+ sgnodetypes <- .get_sgnodetypes_from_txpathmat(txpathmat) |
|
277 |
+ ans_source <- ans_sink <- ans_AScode <- character(0) |
|
278 |
+ ans_d <- integer(0) |
|
279 |
+ ans_partitions <- ans_paths <- CharacterList() |
|
141 | 280 |
ncol <- ncol(txpathmat) |
281 |
+ ## TODO: Walk on informative nodes only (this should result in a |
|
282 |
+ ## significant speedup). |
|
142 | 283 |
for (i in 1:(ncol-2L)) { |
143 | 284 |
for (j in (i+2L):ncol) { |
285 |
+ if (!.is_bubble(txpathmat, i, j)) |
|
286 |
+ next |
|
287 |
+ bubble_variants <- .get_bubble_variants(txpathmat, sgnodetypes, |
|
288 |
+ i, j) |
|
289 |
+ bubble_d <- nrow(bubble_variants) |
|
290 |
+ if (bubble_d <= 1L) |
|
291 |
+ next |
|
292 |
+ ans_source <- c(ans_source, colnames(txpathmat)[i]) |
|
293 |
+ ans_sink <- c(ans_sink, colnames(txpathmat)[j]) |
|
294 |
+ ans_d <- c(ans_d, bubble_d) |
|
295 |
+ ## Format the bubble partitions. |
|
296 |
+ bubble_partitions <- bubble_variants[ , "partition"] |
|
297 |
+ bubble_partitions <- sapply(bubble_partitions, paste, collapse=",") |
|
298 |
+ bubble_partitions <- paste0("{", bubble_partitions, "}") |
|
299 |
+ ans_partitions <- c(ans_partitions, |
|
300 |
+ CharacterList(bubble_partitions)) |
|
301 |
+ ## Format the bubble paths. |
|
302 |
+ bubble_paths <- bubble_variants[ , "path"] |
|
303 |
+ bubble_paths <- sapply(bubble_paths, paste, collapse=",") |
|
304 |
+ bubble_paths <- paste0("{", bubble_paths, "}") |
|
305 |
+ ans_paths <- c(ans_paths, CharacterList(bubble_paths)) |
|
306 |
+ ## Format the bubble AScode. |
|
307 |
+ bubble_AScode <- paste(bubble_variants[ , "code"], collapse=",") |
|
308 |
+ ans_AScode <- c(ans_AScode, bubble_AScode) |
|
144 | 309 |
} |
145 | 310 |
} |
311 |
+ DataFrame(source=ans_source, |
|
312 |
+ sink=ans_sink, |
|
313 |
+ d=ans_d, |
|
314 |
+ partitions=ans_partitions, |
|
315 |
+ paths=ans_paths, |
|
316 |
+ AScode=ans_AScode) |
|
146 | 317 |
} |
147 | 318 |
|
148 | 319 |
|
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SplicingGraphs@74002 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -3,19 +3,23 @@ |
3 | 3 |
### ------------------------------------------------------------------------- |
4 | 4 |
|
5 | 5 |
|
6 |
-### Returns a DataFrame with 1 row per bubble and the following cols: |
|
6 |
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
7 |
+### .extract_ASevents2_from_txpaths() |
|
8 |
+### |
|
9 |
+ |
|
10 |
+### Returns a DataFrame with 1 row per AS event and the following cols: |
|
7 | 11 |
### source <character>, sink <character>, left_path <IntegerList>, and |
8 | 12 |
### right_path <IntegerList>. |
9 |
-### Must find at least 1 bubble if 'left_txpath' and 'right_txpath' are not |
|
10 |
-### identical. |
|
11 |
-.find_bubbles_in_tx_pair <- function(left_txpath, right_txpath) |
|
13 |
+### Must find at least 1 AS event if 'left_txpath' and 'right_txpath' are |
|
14 |
+### not identical. |
|
15 |
+.extract_ASevents2_from_tx_pair <- function(left_txpath, right_txpath) |
|
12 | 16 |
{ |
13 | 17 |
ans_source <- ans_sink <- character(0) |
14 | 18 |
ans_left_path <- ans_right_path <- IntegerList() |
15 | 19 |
left_len <- length(left_txpath) |
16 | 20 |
right_len <- length(right_txpath) |
17 | 21 |
i <- j <- 1L |
18 |
- in_bubble <- FALSE |
|
22 |
+ in_ASevent <- FALSE |
|
19 | 23 |
while (i <= left_len + 1L || j <= right_len + 1L) { |
20 | 24 |
if (i <= left_len) { |
21 | 25 |
curr_left_SSid <- left_txpath[i] |
... | ... |
@@ -28,9 +32,9 @@ |
28 | 32 |
curr_right_SSid <- .Machine$integer.max |
29 | 33 |
} |
30 | 34 |
if (curr_left_SSid == curr_right_SSid) { |
31 |
- if (in_bubble) { |
|
32 |
- ## End of the current bubble. |
|
33 |
- in_bubble <- FALSE |
|
35 |
+ if (in_ASevent) { |
|
36 |
+ ## End of the current AS event. |
|
37 |
+ in_ASevent <- FALSE |
|
34 | 38 |
if (curr_left_SSid == .Machine$integer.max) { |
35 | 39 |
sink <- "L" |
36 | 40 |
} else { |
... | ... |
@@ -38,30 +42,30 @@ |
38 | 42 |
} |
39 | 43 |
ans_sink <- c(ans_sink, sink) |
40 | 44 |
ans_left_path <- c(ans_left_path, |
41 |
- IntegerList(bubble_left_path)) |
|
45 |
+ IntegerList(ASevent_left_path)) |
|
42 | 46 |
ans_right_path <- c(ans_right_path, |
43 |
- IntegerList(bubble_right_path)) |
|
47 |
+ IntegerList(ASevent_right_path)) |
|
44 | 48 |
} |
45 | 49 |
i <- i + 1L |
46 | 50 |
j <- j + 1L |
47 | 51 |
next |
48 | 52 |
} |
49 |
- if (!in_bubble) { |
|
50 |
- ## Start a new bubble. |
|
51 |
- in_bubble <- TRUE |
|
53 |
+ if (!in_ASevent) { |
|
54 |
+ ## Start a new AS event. |
|
55 |
+ in_ASevent <- TRUE |
|
52 | 56 |
if (i == 1L) { |
53 |
- bubble_source <- "R" |
|
57 |
+ ASevent_source <- "R" |
|
54 | 58 |
} else { |
55 |
- bubble_source <- as.character(left_txpath[i - 1L]) |
|
59 |
+ ASevent_source <- as.character(left_txpath[i - 1L]) |
|
56 | 60 |
} |
57 |
- ans_source <- c(ans_source, bubble_source) |
|
58 |
- bubble_left_path <- bubble_right_path <- integer(0) |
|
61 |
+ ans_source <- c(ans_source, ASevent_source) |
|
62 |
+ ASevent_left_path <- ASevent_right_path <- integer(0) |
|
59 | 63 |
} |
60 | 64 |
if (curr_left_SSid < curr_right_SSid) { |
61 |
- bubble_left_path <- c(bubble_left_path, curr_left_SSid) |
|
65 |
+ ASevent_left_path <- c(ASevent_left_path, curr_left_SSid) |
|
62 | 66 |
i <- i + 1L |
63 | 67 |
} else { |
64 |
- bubble_right_path <- c(bubble_right_path, curr_right_SSid) |
|
68 |
+ ASevent_right_path <- c(ASevent_right_path, curr_right_SSid) |
|
65 | 69 |
j <- j + 1L |
66 | 70 |
} |
67 | 71 |
} |
... | ... |
@@ -71,22 +75,22 @@ |
71 | 75 |
right_path=ans_right_path) |
72 | 76 |
} |
73 | 77 |
|
74 |
-.extract_bubbles_from_txpaths <- function(txpaths) |
|
78 |
+.extract_ASevents2_from_txpaths <- function(txpaths) |
|
75 | 79 |
{ |
76 | 80 |
ntx <- length(txpaths) |
77 | 81 |
if (ntx <= 1L) { |
78 |
- ## No bubbles. |
|
79 |
- bubbles <- .find_bubbles_in_tx_pair(integer(0), integer(0)) |
|
80 |
- bubbles <- cbind(bubbles, DataFrame(left_tx_id=character(0), |
|
81 |
- right_tx_id=character(0))) |
|
82 |
- return(bubbles) |
|
82 |
+ ## No AS events. |
|
83 |
+ ASevents2 <- .extract_ASevents2_from_tx_pair(integer(0), integer(0)) |
|
84 |
+ ans <- cbind(ASevents2, DataFrame(left_tx_id=character(0), |
|
85 |
+ right_tx_id=character(0))) |
|
86 |
+ return(ans) |
|
83 | 87 |
} |
84 | 88 |
tx_id <- names(txpaths) |
85 | 89 |
if (is.null(tx_id)) |
86 | 90 |
tx_id <- seq_along(txpaths) |
87 | 91 |
|
88 | 92 |
npairs <- (ntx * (ntx - 1L)) %/% 2L |
89 |
- all_bubbles <- vector(mode="list", length=npairs) |
|
93 |
+ all_ASevents2 <- vector(mode="list", length=npairs) |
|
90 | 94 |
z <- 1L |
91 | 95 |
for (i in 1:(ntx-1L)) { |
92 | 96 |
left_txpath <- txpaths[[i]] |
... | ... |
@@ -94,16 +98,17 @@ |
94 | 98 |
for (j in (i+1L):ntx) { |
95 | 99 |
right_txpath <- txpaths[[j]] |
96 | 100 |
right_tx_id <- tx_id[j] |
97 |
- bubbles <- .find_bubbles_in_tx_pair(left_txpath, right_txpath) |
|
98 |
- bubbles <- cbind(bubbles, DataFrame(left_tx_id=left_tx_id, |
|
99 |
- right_tx_id=right_tx_id)) |
|
100 |
- all_bubbles[[z]] <- bubbles |
|
101 |
+ ASevents2 <- .extract_ASevents2_from_tx_pair(left_txpath, |
|
102 |
+ right_txpath) |
|
103 |
+ ASevents2 <- cbind(ASevents2, DataFrame(left_tx_id=left_tx_id, |
|
104 |
+ right_tx_id=right_tx_id)) |
|
105 |
+ all_ASevents2[[z]] <- ASevents2 |
|
101 | 106 |
z <- z + 1L |
102 | 107 |
} |
103 | 108 |
} |
104 |
- ans <- do.call(rbind, all_bubbles) |
|
109 |
+ ans <- do.call(rbind, all_ASevents2) |
|
105 | 110 |
|
106 |
- ## Remove duplicate bubbles. |
|
111 |
+ ## Remove duplicate AS events. |
|
107 | 112 |
keys <- c("source", "sink", "left_path", "right_path") |
108 | 113 |
key1 <- ans[ , "source"] |
109 | 114 |
key2 <- ans[ , "sink"] |
... | ... |
@@ -123,6 +128,28 @@ |
123 | 128 |
ans |
124 | 129 |
} |
125 | 130 |
|
131 |
+ |
|
132 |
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
133 |
+### .extract_bubbles_from_txpathmat() |
|
134 |
+### |
|
135 |
+ |
|
136 |
+### Returns a DataFrame with 1 row per bubble and the following cols: |
|
137 |
+### source <character>, sink <character>, d <integer>. |
|
138 |
+.extract_bubbles_from_txpathmat <- function(txpathmat) |
|
139 |
+{ |
|
140 |
+ stop("not ready yet, sorry!") |
|
141 |
+ ncol <- ncol(txpathmat) |
|
142 |
+ for (i in 1:(ncol-2L)) { |
|
143 |
+ for (j in (i+2L):ncol) { |
|
144 |
+ } |
|
145 |
+ } |
|
146 |
+} |
|
147 |
+ |
|
148 |
+ |
|
149 |
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
150 |
+### bubbles() |
|
151 |
+### |
|
152 |
+ |
|
126 | 153 |
setGeneric("bubbles", signature="x", |
127 | 154 |
function(x, gene_id=NA) standardGeneric("bubbles") |
128 | 155 |
) |
... | ... |
@@ -141,7 +168,9 @@ setMethod("bubbles", "IntegerList", |
141 | 168 |
if (!identical(gene_id, NA)) |
142 | 169 |
stop("the 'gene_id' arg is not supported ", |
143 | 170 |
"when 'x' is an IntegerList") |
144 |
- .extract_bubbles_from_txpaths(x) |
|
171 |
+ #.extract_ASevents2_from_txpaths(x) |
|
172 |
+ txpathmat <- make_matrix_from_txpaths(x) |
|
173 |
+ .extract_bubbles_from_txpathmat(txpathmat) |
|
145 | 174 |
} |
146 | 175 |
) |
147 | 176 |
|
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SplicingGraphs@73979 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -6,36 +6,35 @@ |
6 | 6 |
### Returns a DataFrame with 1 row per bubble and the following cols: |
7 | 7 |
### source <character>, sink <character>, left_path <IntegerList>, and |
8 | 8 |
### right_path <IntegerList>. |
9 |
-### Must find at least 1 bubble if 'left_SSids' and 'right_SSids' are not |
|
9 |
+### Must find at least 1 bubble if 'left_txpath' and 'right_txpath' are not |
|
10 | 10 |
### identical. |
11 |
-.find_bubbles_in_tx_pair <- function(left_SSids, right_SSids) |
|
11 |
+.find_bubbles_in_tx_pair <- function(left_txpath, right_txpath) |
|
12 | 12 |
{ |
13 | 13 |
ans_source <- ans_sink <- character(0) |
14 | 14 |
ans_left_path <- ans_right_path <- IntegerList() |
15 |
- left_len <- length(left_SSids) |
|
16 |
- right_len <- length(right_SSids) |
|
15 |
+ left_len <- length(left_txpath) |
|
16 |
+ right_len <- length(right_txpath) |
|
17 | 17 |
i <- j <- 1L |
18 |
- prev_left <- prev_right <- "R" |
|
19 | 18 |
in_bubble <- FALSE |
20 | 19 |
while (i <= left_len + 1L || j <= right_len + 1L) { |
21 | 20 |
if (i <= left_len) { |
22 |
- curr_left <- left_SSids[i] |
|
21 |
+ curr_left_SSid <- left_txpath[i] |
|
23 | 22 |
} else { |
24 |
- curr_left <- .Machine$integer.max |
|
23 |
+ curr_left_SSid <- .Machine$integer.max |
|
25 | 24 |
} |
26 | 25 |
if (j <= right_len) { |
27 |
- curr_right <- right_SSids[j] |
|
26 |
+ curr_right_SSid <- right_txpath[j] |
|
28 | 27 |
} else { |
29 |
- curr_right <- .Machine$integer.max |
|
28 |
+ curr_right_SSid <- .Machine$integer.max |
|
30 | 29 |
} |
31 |
- if (curr_left == curr_right) { |
|
30 |
+ if (curr_left_SSid == curr_right_SSid) { |
|
32 | 31 |
if (in_bubble) { |
33 | 32 |
## End of the current bubble. |
34 | 33 |
in_bubble <- FALSE |
35 |
- if (curr_left == .Machine$integer.max) { |
|
34 |
+ if (curr_left_SSid == .Machine$integer.max) { |
|
36 | 35 |
sink <- "L" |
37 | 36 |
} else { |
38 |
- sink <- as.character(curr_left) |
|
37 |
+ sink <- as.character(curr_left_SSid) |
|
39 | 38 |
} |
40 | 39 |
ans_sink <- c(ans_sink, sink) |
41 | 40 |
ans_left_path <- c(ans_left_path, |
... | ... |
@@ -43,25 +42,26 @@ |
43 | 42 |
ans_right_path <- c(ans_right_path, |
44 | 43 |
IntegerList(bubble_right_path)) |
45 | 44 |
} |
46 |
- prev_left <- curr_left |
|
47 | 45 |
i <- i + 1L |
48 |
- prev_right <- curr_right |
|
49 | 46 |
j <- j + 1L |
50 | 47 |
next |
51 | 48 |
} |
52 | 49 |
if (!in_bubble) { |
53 | 50 |
## Start a new bubble. |
54 | 51 |
in_bubble <- TRUE |
55 |
- ans_source <- c(ans_source, as.character(prev_left)) |
|
52 |
+ if (i == 1L) { |
|
53 |
+ bubble_source <- "R" |
|
54 |
+ } else { |
|
55 |
+ bubble_source <- as.character(left_txpath[i - 1L]) |
|
56 |
+ } |
|
57 |
+ ans_source <- c(ans_source, bubble_source) |
|
56 | 58 |
bubble_left_path <- bubble_right_path <- integer(0) |
57 | 59 |
} |
58 |
- if (curr_left < curr_right) { |
|
59 |
- bubble_left_path <- c(bubble_left_path, curr_left) |
|
60 |
- prev_left <- curr_left |
|
60 |
+ if (curr_left_SSid < curr_right_SSid) { |
|
61 |
+ bubble_left_path <- c(bubble_left_path, curr_left_SSid) |
|
61 | 62 |
i <- i + 1L |
62 | 63 |
} else { |
63 |
- bubble_right_path <- c(bubble_right_path, curr_right) |
|
64 |
- prev_right <- curr_right |
|
64 |
+ bubble_right_path <- c(bubble_right_path, curr_right_SSid) |
|
65 | 65 |
j <- j + 1L |
66 | 66 |
} |
67 | 67 |
} |
... | ... |
@@ -71,9 +71,9 @@ |
71 | 71 |
right_path=ans_right_path) |
72 | 72 |
} |
73 | 73 |
|
74 |
-.extract_bubbles_from_spath <- function(spath) |
|
74 |
+.extract_bubbles_from_txpaths <- function(txpaths) |
|
75 | 75 |
{ |
76 |
- ntx <- length(spath) |
|
76 |
+ ntx <- length(txpaths) |
|
77 | 77 |
if (ntx <= 1L) { |
78 | 78 |
## No bubbles. |
79 | 79 |
bubbles <- .find_bubbles_in_tx_pair(integer(0), integer(0)) |
... | ... |
@@ -81,20 +81,20 @@ |
81 | 81 |
right_tx_id=character(0))) |
82 | 82 |
return(bubbles) |
83 | 83 |
} |
84 |
- tx_id <- names(spath) |
|
84 |
+ tx_id <- names(txpaths) |
|
85 | 85 |
if (is.null(tx_id)) |
86 |
- tx_id <- seq_along(spath) |
|
86 |
+ tx_id <- seq_along(txpaths) |
|
87 | 87 |
|
88 | 88 |
npairs <- (ntx * (ntx - 1L)) %/% 2L |
89 | 89 |
all_bubbles <- vector(mode="list", length=npairs) |
90 | 90 |
z <- 1L |
91 | 91 |
for (i in 1:(ntx-1L)) { |
92 |
- left_SSids <- spath[[i]] |
|
92 |
+ left_txpath <- txpaths[[i]] |
|
93 | 93 |
left_tx_id <- tx_id[i] |
94 | 94 |
for (j in (i+1L):ntx) { |
95 |
- right_SSids <- spath[[j]] |
|
95 |
+ right_txpath <- txpaths[[j]] |
|
96 | 96 |
right_tx_id <- tx_id[j] |
97 |
- bubbles <- .find_bubbles_in_tx_pair(left_SSids, right_SSids) |
|
97 |
+ bubbles <- .find_bubbles_in_tx_pair(left_txpath, right_txpath) |
|
98 | 98 |
bubbles <- cbind(bubbles, DataFrame(left_tx_id=left_tx_id, |
99 | 99 |
right_tx_id=right_tx_id)) |
100 | 100 |
all_bubbles[[z]] <- bubbles |
... | ... |
@@ -130,8 +130,8 @@ setGeneric("bubbles", signature="x", |
130 | 130 |
setMethod("bubbles", "ANY", |
131 | 131 |
function(x, gene_id=NA) |
132 | 132 |
{ |
133 |
- spath <- spath(x, gene_id=gene_id) |
|
134 |
- bubbles(spath) |
|
133 |
+ txpaths <- txpaths(x, gene_id=gene_id) |
|
134 |
+ bubbles(txpaths) |
|
135 | 135 |
} |
136 | 136 |
) |
137 | 137 |
|
... | ... |
@@ -141,7 +141,7 @@ setMethod("bubbles", "IntegerList", |
141 | 141 |
if (!identical(gene_id, NA)) |
142 | 142 |
stop("the 'gene_id' arg is not supported ", |
143 | 143 |
"when 'x' is an IntegerList") |
144 |
- .extract_bubbles_from_spath(x) |
|
144 |
+ .extract_bubbles_from_txpaths(x) |
|
145 | 145 |
} |
146 | 146 |
) |
147 | 147 |
|
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SplicingGraphs@73962 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,13 +1,149 @@ |
1 |
+### ========================================================================= |
|
2 |
+### "bubbles" methods |
|
3 |
+### ------------------------------------------------------------------------- |
|
1 | 4 |
|
2 |
-.make_spath_matrix_from_spath <- function(spath) |
|
5 |
+ |
|
6 |
+### Returns a DataFrame with 1 row per bubble and the following cols: |
|
7 |
+### source <character>, sink <character>, left_path <IntegerList>, and |
|
8 |
+### right_path <IntegerList>. |
|
9 |
+### Must find at least 1 bubble if 'left_SSids' and 'right_SSids' are not |
|
10 |
+### identical. |
|
11 |
+.find_bubbles_in_tx_pair <- function(left_SSids, right_SSids) |
|
3 | 12 |
{ |
4 |
- nodes <- sgnodes(spath) |
|
13 |
+ ans_source <- ans_sink <- character(0) |
|
14 |
+ ans_left_path <- ans_right_path <- IntegerList() |
|
15 |
+ left_len <- length(left_SSids) |
|
16 |
+ right_len <- length(right_SSids) |
|
17 |
+ i <- j <- 1L |
|
18 |
+ prev_left <- prev_right <- "R" |
|
19 |
+ in_bubble <- FALSE |
|
20 |
+ while (i <= left_len + 1L || j <= right_len + 1L) { |
|
21 |
+ if (i <= left_len) { |
|
22 |
+ curr_left <- left_SSids[i] |
|
23 |
+ } else { |
|
24 |
+ curr_left <- .Machine$integer.max |
|
25 |
+ } |
|
26 |
+ if (j <= right_len) { |
|
27 |
+ curr_right <- right_SSids[j] |
|
28 |
+ } else { |
|
29 |
+ curr_right <- .Machine$integer.max |
|
30 |
+ } |
|
31 |
+ if (curr_left == curr_right) { |
|
32 |
+ if (in_bubble) { |
|
33 |
+ ## End of the current bubble. |
|
34 |
+ in_bubble <- FALSE |
|
35 |
+ if (curr_left == .Machine$integer.max) { |
|
36 |
+ sink <- "L" |
|
37 |
+ } else { |
|
38 |
+ sink <- as.character(curr_left) |
|
39 |
+ } |
|
40 |
+ ans_sink <- c(ans_sink, sink) |
|
41 |
+ ans_left_path <- c(ans_left_path, |
|
42 |
+ IntegerList(bubble_left_path)) |
|
43 |
+ ans_right_path <- c(ans_right_path, |
|
44 |
+ IntegerList(bubble_right_path)) |
|
45 |
+ } |
|
46 |
+ prev_left <- curr_left |
|
47 |
+ i <- i + 1L |
|
48 |
+ prev_right <- curr_right |
|
49 |
+ j <- j + 1L |
|
50 |
+ next |
|
51 |
+ } |
|
52 |
+ if (!in_bubble) { |
|
53 |
+ ## Start a new bubble. |
|
54 |
+ in_bubble <- TRUE |
|
55 |
+ ans_source <- c(ans_source, as.character(prev_left)) |
|
56 |
+ bubble_left_path <- bubble_right_path <- integer(0) |
|
57 |
+ } |
|
58 |
+ if (curr_left < curr_right) { |
|
59 |
+ bubble_left_path <- c(bubble_left_path, curr_left) |
|
60 |
+ prev_left <- curr_left |
|
61 |
+ i <- i + 1L |
|
62 |
+ } else { |
|
63 |
+ bubble_right_path <- c(bubble_right_path, curr_right) |
|
64 |
+ prev_right <- curr_right |
|
65 |
+ j <- j + 1L |
|
66 |
+ } |
|
67 |
+ } |
|
68 |
+ DataFrame(source=ans_source, |
|
69 |
+ sink=ans_sink, |
|
70 |
+ left_path=ans_left_path, |
|
71 |
+ right_path=ans_right_path) |
|
5 | 72 |
} |
6 | 73 |
|
7 |
-findBubbles <- function(x, gene_id=NA) |
|
74 |
+.extract_bubbles_from_spath <- function(spath) |
|
8 | 75 |
{ |
9 |
- spath <- spath(sg, gene_id=gene_id) |
|
10 |
- spath_mat <- .make_spath_matrix_from_spath(spath) |
|
11 |
- spath_mat |
|
76 |
+ ntx <- length(spath) |
|
77 |
+ if (ntx <= 1L) { |
|
78 |
+ ## No bubbles. |
|
79 |
+ bubbles <- .find_bubbles_in_tx_pair(integer(0), integer(0)) |
|
80 |
+ bubbles <- cbind(bubbles, DataFrame(left_tx_id=character(0), |
|
81 |
+ right_tx_id=character(0))) |
|
82 |
+ return(bubbles) |
|
83 |
+ } |
|
84 |
+ tx_id <- names(spath) |
|
85 |
+ if (is.null(tx_id)) |
|
86 |
+ tx_id <- seq_along(spath) |
|
87 |
+ |
|
88 |
+ npairs <- (ntx * (ntx - 1L)) %/% 2L |
|
89 |
+ all_bubbles <- vector(mode="list", length=npairs) |
|
90 |
+ z <- 1L |
|
91 |
+ for (i in 1:(ntx-1L)) { |
|
92 |
+ left_SSids <- spath[[i]] |
|
93 |
+ left_tx_id <- tx_id[i] |
|
94 |
+ for (j in (i+1L):ntx) { |
|
95 |
+ right_SSids <- spath[[j]] |
|
96 |
+ right_tx_id <- tx_id[j] |
|
97 |
+ bubbles <- .find_bubbles_in_tx_pair(left_SSids, right_SSids) |
|
98 |
+ bubbles <- cbind(bubbles, DataFrame(left_tx_id=left_tx_id, |
|
99 |
+ right_tx_id=right_tx_id)) |
|
100 |
+ all_bubbles[[z]] <- bubbles |
|
101 |
+ z <- z + 1L |
|
102 |
+ } |
|
103 |
+ } |
|
104 |
+ ans <- do.call(rbind, all_bubbles) |
|
105 |
+ |
|
106 |
+ ## Remove duplicate bubbles. |
|
107 |
+ keys <- c("source", "sink", "left_path", "right_path") |
|
108 |
+ key1 <- ans[ , "source"] |
|
109 |
+ key2 <- ans[ , "sink"] |
|
110 |
+ key3 <- unlist(lapply(ans[ , "left_path"], paste, collapse=","), |
|
111 |
+ use.names=FALSE) |
|
112 |
+ key4 <- unlist(lapply(ans[ , "right_path"], paste, collapse=","), |
|
113 |
+ use.names=FALSE) |
|
114 |
+ key <- paste(key1, key2, key3, key4, sep="|") |
|
115 |
+ left_tx_id <- ans[ , "left_tx_id"] |
|
116 |
+ right_tx_id <- ans[ , "right_tx_id"] |
|
117 |
+ sm <- match(key, key) |
|
118 |
+ is_not_dup <- sm == seq_along(sm) |
|
119 |
+ ans <- ans[is_not_dup, keys, drop=FALSE] |
|
120 |
+ rownames(ans) <- NULL |
|
121 |
+ ans$left_tx_id <- unique(splitAsList(left_tx_id, sm)) |
|
122 |
+ ans$right_tx_id <- unique(splitAsList(right_tx_id, sm)) |
|
123 |
+ ans |
|
12 | 124 |
} |
13 | 125 |
|
126 |
+setGeneric("bubbles", signature="x", |
|
127 |
+ function(x, gene_id=NA) standardGeneric("bubbles") |
|
128 |
+) |
|
129 |
+ |
|
130 |
+setMethod("bubbles", "ANY", |
|
131 |
+ function(x, gene_id=NA) |
|
132 |
+ { |
|
133 |
+ spath <- spath(x, gene_id=gene_id) |
|
134 |
+ bubbles(spath) |
|
135 |
+ } |
|
136 |
+) |
|
137 |
+ |
|
138 |
+setMethod("bubbles", "IntegerList", |
|
139 |
+ function(x, gene_id=NA) |
|
140 |
+ { |
|
141 |
+ if (!identical(gene_id, NA)) |
|
142 |
+ stop("the 'gene_id' arg is not supported ", |
|
143 |
+ "when 'x' is an IntegerList") |
|
144 |
+ .extract_bubbles_from_spath(x) |
|
145 |
+ } |
|
146 |
+) |
|
147 |
+ |
|
148 |
+### TODO: Add "bubbles" methods for data.frame and DataFrame objects. |
|
149 |
+ |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SplicingGraphs@73960 bc3139a8-67e5-0310-9ffc-ced21a209358
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SplicingGraphs@73957 bc3139a8-67e5-0310-9ffc-ced21a209358
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,13 @@ |
1 |
+ |
|
2 |
+.make_spath_matrix_from_spath <- function(spath) |
|
3 |
+{ |
|
4 |
+ nodes <- c("R", sort(unique(unlist(spath))), "L") |
|
5 |
+} |
|
6 |
+ |
|
7 |
+findBubbles <- function(x, gene_id=NA) |
|
8 |
+{ |
|
9 |
+ spath <- spath(sg, gene_id=gene_id) |
|
10 |
+ spath_mat <- .make_spath_matrix_from_spath(spath) |
|
11 |
+ spath_mat |
|
12 |
+} |
|
13 |
+ |