git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SplicingGraphs@73957 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -19,8 +19,9 @@ Suggests: igraph, Gviz, |
19 | 19 |
RUnit |
20 | 20 |
Collate: utils.R |
21 | 21 |
SplicingGraphs-class.R |
22 |
- sgdf-methods.R |
|
22 |
+ sgedges-methods.R |
|
23 | 23 |
sgraph-methods.R |
24 |
+ bubbles-methods.R |
|
24 | 25 |
countReads.R |
25 | 26 |
toy_data.R |
26 | 27 |
biocViews: Genetics, Annotation, HighThroughputSequencing |
... | ... |
@@ -39,8 +39,8 @@ exportMethods( |
39 | 39 |
### |
40 | 40 |
|
41 | 41 |
export( |
42 |
- ## sgdf-methods.R: |
|
43 |
- sgdf2, |
|
42 |
+ ## sgedges-methods.R: |
|
43 |
+ sgedges2, |
|
44 | 44 |
|
45 | 45 |
## sgraph-methods.R: |
46 | 46 |
sgraph2, |
... | ... |
@@ -67,10 +67,10 @@ export( |
67 | 67 |
## SplicingGraphs-class.R: |
68 | 68 |
SplicingGraphs, |
69 | 69 |
|
70 |
- ## sgdf-methods.R: |
|
70 |
+ ## sgedges-methods.R: |
|
71 | 71 |
spath, |
72 | 72 |
UATXHcount, |
73 |
- sgdf, |
|
73 |
+ sgedges, |
|
74 | 74 |
uninformativeSSids, |
75 | 75 |
|
76 | 76 |
## sgraph-methods.R: |
... | ... |
@@ -82,7 +82,7 @@ exportMethods( |
82 | 82 |
SplicingGraphs, |
83 | 83 |
spath, |
84 | 84 |
UATXHcount, |
85 |
- sgdf, |
|
85 |
+ sgedges, |
|
86 | 86 |
uninformativeSSids, |
87 | 87 |
sgraph |
88 | 88 |
) |
89 | 89 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,13 @@ |
1 |
+ |
|
2 |
+.make_spath_matrix_from_spath <- function(spath) |
|
3 |
+{ |
|
4 |
+ nodes <- c("R", sort(unique(unlist(spath))), "L") |
|
5 |
+} |
|
6 |
+ |
|
7 |
+findBubbles <- function(x, gene_id=NA) |
|
8 |
+{ |
|
9 |
+ spath <- spath(sg, gene_id=gene_id) |
|
10 |
+ spath_mat <- .make_spath_matrix_from_spath(spath) |
|
11 |
+ spath_mat |
|
12 |
+} |
|
13 |
+ |
0 | 14 |
similarity index 84% |
1 | 15 |
rename from R/sgdf-methods.R |
2 | 16 |
rename to R/sgedges-methods.R |
... | ... |
@@ -1,5 +1,5 @@ |
1 | 1 |
### ========================================================================= |
2 |
-### sgdf (and related) methods |
|
2 |
+### sgedges (and related) methods |
|
3 | 3 |
### ------------------------------------------------------------------------- |
4 | 4 |
|
5 | 5 |
|
... | ... |
@@ -124,7 +124,7 @@ setMethod(".hits", "GRangesList", |
124 | 124 |
|
125 | 125 |
|
126 | 126 |
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
127 |
-### sgdf() extractor |
|
127 |
+### sgedges() extractor |
|
128 | 128 |
### |
129 | 129 |
### Returns the splicing graph in a DataFrame with 1 row per edge. |
130 | 130 |
### |
... | ... |
@@ -133,7 +133,7 @@ setMethod(".hits", "GRangesList", |
133 | 133 |
### given gene. Should have been obtained thru the spath() accessor. |
134 | 134 |
### Returns a 4-col (or 5-col if 'UATXHcount' is supplied) data.frame |
135 | 135 |
### representing the splicing graph. |
136 |
-.make_sgdf0_from_spath <- function(spath, UATXHcount=NULL) |
|
136 |
+.make_sgedges0_from_spath <- function(spath, UATXHcount=NULL) |
|
137 | 137 |
{ |
138 | 138 |
if (!is.null(UATXHcount)) { |
139 | 139 |
if (!is.integer(UATXHcount)) |
... | ... |
@@ -142,7 +142,7 @@ setMethod(".hits", "GRangesList", |
142 | 142 |
stop("when not NULL, 'UATXHcount' must have ", |
143 | 143 |
"the same length as 'spath'") |
144 | 144 |
} |
145 |
- sgdf0s <- lapply(seq_along(spath), |
|
145 |
+ sgedges0s <- lapply(seq_along(spath), |
|
146 | 146 |
function(i) { |
147 | 147 |
SSids <- spath[[i]] |
148 | 148 |
from <- c("R", SSids) |
... | ... |
@@ -169,48 +169,48 @@ setMethod(".hits", "GRangesList", |
169 | 169 |
ex_or_in=ex_or_in, |
170 | 170 |
stringsAsFactors=FALSE) |
171 | 171 |
}) |
172 |
- nedges_per_tx <- sapply(sgdf0s, nrow) |
|
173 |
- sgdf0 <- do.call(rbind, sgdf0s) |
|
172 |
+ nedges_per_tx <- sapply(sgedges0s, nrow) |
|
173 |
+ sgedges0 <- do.call(rbind, sgedges0s) |
|
174 | 174 |
tx_id <- names(spath) |
175 | 175 |
if (is.null(tx_id)) |
176 | 176 |
tx_id <- seq_along(spath) |
177 | 177 |
tx_id <- rep.int(factor(tx_id, levels=tx_id), nedges_per_tx) |
178 |
- sgdf0$tx_id <- tx_id |
|
178 |
+ sgedges0$tx_id <- tx_id |
|
179 | 179 |
if (!is.null(UATXHcount)) |
180 |
- sgdf0$UATXHcount <- rep.int(UATXHcount, nedges_per_tx) |
|
181 |
- sgdf0 |
|
180 |
+ sgedges0$UATXHcount <- rep.int(UATXHcount, nedges_per_tx) |
|
181 |
+ sgedges0 |
|
182 | 182 |
} |
183 | 183 |
|
184 |
-### Collapse the duplicated edges in 'sgdf0' into a DataFrame. |
|
184 |
+### Collapse the duplicated edges in 'sgedges0' into a DataFrame. |
|
185 | 185 |
### We use a DataFrame instead of a data.frame because we want to store |
186 | 186 |
### the tx_id col in a CompressedFactorList (even though this container |
187 | 187 |
### doesn't formally exist and a CompressedIntegerList is actually what's |
188 | 188 |
### being used). |
189 |
-.make_sgdf_from_sgdf0 <- function(sgdf0, ex_hits=NULL, in_hits=NULL) |
|
189 |
+.make_sgedges_from_sgedges0 <- function(sgedges0, ex_hits=NULL, in_hits=NULL) |
|
190 | 190 |
{ |
191 |
- from <- sgdf0[ , "from"] |
|
192 |
- to <- sgdf0[ , "to"] |
|
193 |
- ex_or_in <- sgdf0[ , "ex_or_in"] |
|
194 |
- tx_id <- sgdf0[ , "tx_id"] |
|
191 |
+ from <- sgedges0[ , "from"] |
|
192 |
+ to <- sgedges0[ , "to"] |
|
193 |
+ ex_or_in <- sgedges0[ , "ex_or_in"] |
|
194 |
+ tx_id <- sgedges0[ , "tx_id"] |
|
195 | 195 |
edges <- paste(from, to, sep="~") |
196 | 196 |
sm <- match(edges, edges) |
197 | 197 |
if (!all(ex_or_in == ex_or_in[sm])) |
198 | 198 |
stop("invalid splicing graph") |
199 | 199 |
is_not_dup <- sm == seq_along(sm) |
200 |
- sgdf <- DataFrame(sgdf0[is_not_dup, , drop=FALSE]) |
|
201 |
- sgdf$tx_id <- splitAsList(tx_id, sm) |
|
202 |
- UATXHcount <- sgdf$UATXHcount |
|
200 |
+ sgedges <- DataFrame(sgedges0[is_not_dup, , drop=FALSE]) |
|
201 |
+ sgedges$tx_id <- splitAsList(tx_id, sm) |
|
202 |
+ UATXHcount <- sgedges$UATXHcount |
|
203 | 203 |
if (!is.null(UATXHcount)) |
204 |
- sgdf$UATXHcount <- sum(splitAsList(sgdf0$UATXHcount, sm)) |
|
204 |
+ sgedges$UATXHcount <- sum(splitAsList(sgedges0$UATXHcount, sm)) |
|
205 | 205 |
if (is.null(ex_hits) && is.null(in_hits)) |
206 |
- return(sgdf) |
|
206 |
+ return(sgedges) |
|
207 | 207 |
hits <- relist(character(0), PartitioningByEnd(NG=length(sm))) |
208 | 208 |
if (!is.null(ex_hits)) { |
209 | 209 |
if (!is(ex_hits, "CharacterList")) |
210 | 210 |
stop("'ex_hits' must be a CharacterList object") |
211 | 211 |
ex_idx <- which(ex_or_in == "ex") |
212 | 212 |
if (length(ex_idx) != length(ex_hits)) |
213 |
- stop("'ex_hits' is incompatible with 'sgdf0'") |
|
213 |
+ stop("'ex_hits' is incompatible with 'sgedges0'") |
|
214 | 214 |
hits[ex_idx] <- ex_hits |
215 | 215 |
} |
216 | 216 |
if (!is.null(in_hits)) { |
... | ... |
@@ -218,24 +218,24 @@ setMethod(".hits", "GRangesList", |
218 | 218 |
stop("'in_hits' must be a CharacterList object") |
219 | 219 |
in_idx <- which(ex_or_in == "in") |
220 | 220 |
if (length(in_idx) != length(in_hits)) |
221 |
- stop("'in_hits' is incompatible with 'sgdf0'") |
|
221 |
+ stop("'in_hits' is incompatible with 'sgedges0'") |
|
222 | 222 |
hits[in_idx] <- in_hits |
223 | 223 |
} |
224 | 224 |
## TODO: This is quite inefficient. Improve it. |
225 | 225 |
for (i in which(!is_not_dup)) |
226 | 226 |
hits[[sm[i]]] <- unique(hits[[sm[i]]], hits[[i]]) |
227 |
- sgdf$hits <- hits[is_not_dup] |
|
228 |
- sgdf$nhits <- elementLengths(sgdf$hits) |
|
229 |
- sgdf |
|
227 |
+ sgedges$hits <- hits[is_not_dup] |
|
228 |
+ sgedges$nhits <- elementLengths(sgedges$hits) |
|
229 |
+ sgedges |
|
230 | 230 |
} |
231 | 231 |
|
232 |
-setGeneric("sgdf", signature="x", |
|
232 |
+setGeneric("sgedges", signature="x", |
|
233 | 233 |
function(x, gene_id=NA, UATXHcount=NULL, in_by_tx=NULL, |
234 | 234 |
keep.dup.edges=FALSE) |
235 |
- standardGeneric("sgdf") |
|
235 |
+ standardGeneric("sgedges") |
|
236 | 236 |
) |
237 | 237 |
|
238 |
-setMethod("sgdf", "ANY", |
|
238 |
+setMethod("sgedges", "ANY", |
|
239 | 239 |
function(x, gene_id=NA, UATXHcount=NULL, in_by_tx=NULL, |
240 | 240 |
keep.dup.edges=FALSE) |
241 | 241 |
{ |
... | ... |
@@ -243,7 +243,7 @@ setMethod("sgdf", "ANY", |
243 | 243 |
if (is.null(UATXHcount)) |
244 | 244 |
UATXHcount <- UATXHcount(x, gene_id=gene_id) |
245 | 245 |
if (is.null(in_by_tx)) |
246 |
- return(sgdf(spath, UATXHcount=UATXHcount, |
|
246 |
+ return(sgedges(spath, UATXHcount=UATXHcount, |
|
247 | 247 |
keep.dup.edges=keep.dup.edges)) |
248 | 248 |
if (!is(in_by_tx, "GRangesList")) |
249 | 249 |
stop("'in_by_tx' must be NULL or a GRangesList object") |
... | ... |
@@ -257,8 +257,8 @@ setMethod("sgdf", "ANY", |
257 | 257 |
"with the shape of 'x'") |
258 | 258 |
if (!identical(keep.dup.edges, FALSE)) |
259 | 259 |
stop("'keep.dup.edges' must be FALSE when 'in_by_tx' is supplied") |
260 |
- sgdf0 <- sgdf(spath, UATXHcount=UATXHcount, keep.dup.edges=TRUE) |
|
261 |
- ex_or_in <- sgdf0[ , "ex_or_in"] |
|
260 |
+ sgedges0 <- sgedges(spath, UATXHcount=UATXHcount, keep.dup.edges=TRUE) |
|
261 |
+ ex_or_in <- sgedges0[ , "ex_or_in"] |
|
262 | 262 |
ex_hits <- .hits(x@tx, gene_id=gene_id) |
263 | 263 |
if (is.null(ex_hits)) |
264 | 264 |
stop("'x' must have a \"hits\" inner metadata column ", |
... | ... |
@@ -268,11 +268,11 @@ setMethod("sgdf", "ANY", |
268 | 268 |
if (is.null(in_hits)) |
269 | 269 |
stop("'in_by_tx' has no \"hits\" inner metadata column. May be ", |
270 | 270 |
"you forgot to pass it thru assignSubfeatureHits()?") |
271 |
- .make_sgdf_from_sgdf0(sgdf0, ex_hits=ex_hits, in_hits=in_hits) |
|
271 |
+ .make_sgedges_from_sgedges0(sgedges0, ex_hits=ex_hits, in_hits=in_hits) |
|
272 | 272 |
} |
273 | 273 |
) |
274 | 274 |
|
275 |
-setMethod("sgdf", "IntegerList", |
|
275 |
+setMethod("sgedges", "IntegerList", |
|
276 | 276 |
function(x, gene_id=NA, UATXHcount=NULL, in_by_tx=NULL, |
277 | 277 |
keep.dup.edges=FALSE) |
278 | 278 |
{ |
... | ... |
@@ -282,12 +282,12 @@ setMethod("sgdf", "IntegerList", |
282 | 282 |
if (!is.null(in_by_tx)) |
283 | 283 |
stop("the 'in_by_tx' arg is not supported ", |
284 | 284 |
"when 'x' is an IntegerList") |
285 |
- sgdf0 <- .make_sgdf0_from_spath(x, UATXHcount=UATXHcount) |
|
286 |
- sgdf(sgdf0, keep.dup.edges=keep.dup.edges) |
|
285 |
+ sgedges0 <- .make_sgedges0_from_spath(x, UATXHcount=UATXHcount) |
|
286 |
+ sgedges(sgedges0, keep.dup.edges=keep.dup.edges) |
|
287 | 287 |
} |
288 | 288 |
) |
289 | 289 |
|
290 |
-setMethod("sgdf", "data.frame", |
|
290 |
+setMethod("sgedges", "data.frame", |
|
291 | 291 |
function(x, gene_id=NA, UATXHcount=NULL, in_by_tx=NULL, |
292 | 292 |
keep.dup.edges=FALSE) |
293 | 293 |
{ |
... | ... |
@@ -304,7 +304,7 @@ setMethod("sgdf", "data.frame", |
304 | 304 |
stop("'keep.dup.edges' must be TRUE or FALSE") |
305 | 305 |
if (keep.dup.edges) |
306 | 306 |
return(x) # no-op |
307 |
- .make_sgdf_from_sgdf0(x) |
|
307 |
+ .make_sgedges_from_sgedges0(x) |
|
308 | 308 |
} |
309 | 309 |
) |
310 | 310 |
|
... | ... |
@@ -320,7 +320,7 @@ setGeneric("uninformativeSSids", signature="x", |
320 | 320 |
setMethod("uninformativeSSids", "ANY", |
321 | 321 |
function(x, gene_id=NA) |
322 | 322 |
{ |
323 |
- x <- sgdf(x, gene_id=gene_id) |
|
323 |
+ x <- sgedges(x, gene_id=gene_id) |
|
324 | 324 |
uninformativeSSids(x) |
325 | 325 |
} |
326 | 326 |
) |
... | ... |
@@ -341,30 +341,30 @@ setMethod("uninformativeSSids", "DataFrame", |
341 | 341 |
|
342 | 342 |
|
343 | 343 |
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
344 |
-### sgdf2() extractor |
|
344 |
+### sgedges2() extractor |
|
345 | 345 |
### |
346 |
-### Same as sgdf() except that uninformative nodes (i.e. SSids) are removed. |
|
346 |
+### Same as sgedges() except that uninformative nodes (i.e. SSids) are removed. |
|
347 | 347 |
### |
348 | 348 |
|
349 |
-### 'sgdf' must be a DataFrame as returned by: |
|
350 |
-### sgdf( , keep.dup.edges=FALSE) |
|
351 |
-.remove_uninformative_SSids <- function(sgdf) |
|
349 |
+### 'sgedges' must be a DataFrame as returned by: |
|
350 |
+### sgedges( , keep.dup.edges=FALSE) |
|
351 |
+.remove_uninformative_SSids <- function(sgedges) |
|
352 | 352 |
{ |
353 |
- ex_or_in <- sgdf[ , "ex_or_in"] |
|
353 |
+ ex_or_in <- sgedges[ , "ex_or_in"] |
|
354 | 354 |
ex_or_in_levels <- levels(ex_or_in) |
355 | 355 |
if (!identical(ex_or_in_levels, EX_OR_IN_LEVELS)) |
356 | 356 |
stop("Malformed input.\n", |
357 | 357 |
" In the input data.frame (or DataFrame) representing the ", |
358 | 358 |
"original splicing graph, the \"ex_or_in\" column has invalid ", |
359 | 359 |
"levels. Could it be that it was obtained by a previous call ", |
360 |
- "to sgdf2()?") |
|
360 |
+ "to sgedges2()?") |
|
361 | 361 |
levels(ex_or_in) <- EX_OR_IN_LEVELS2 |
362 |
- uninformative_SSids <- uninformativeSSids(sgdf) |
|
362 |
+ uninformative_SSids <- uninformativeSSids(sgedges) |
|
363 | 363 |
if (length(uninformative_SSids) == 0L) |
364 |
- return(sgdf) |
|
365 |
- from <- sgdf[ , "from"] |
|
366 |
- to <- sgdf[ , "to"] |
|
367 |
- tx_id <- sgdf[ , "tx_id"] |
|
364 |
+ return(sgedges) |
|
365 |
+ from <- sgedges[ , "from"] |
|
366 |
+ to <- sgedges[ , "to"] |
|
367 |
+ tx_id <- sgedges[ , "tx_id"] |
|
368 | 368 |
idx1 <- match(uninformative_SSids, from) |
369 | 369 |
idx2 <- match(uninformative_SSids, to) |
370 | 370 |
## 2 sanity checks. |
... | ... |
@@ -375,7 +375,7 @@ setMethod("uninformativeSSids", "DataFrame", |
375 | 375 |
"uninformative splicing site id must contain the same tx_id.", |
376 | 376 |
"Could it be that the \"tx_id\" column was manually altered ", |
377 | 377 |
"before the data.frame (or DataFrame) was passed to ", |
378 |
- "sgdf2()?") |
|
378 |
+ "sgedges2()?") |
|
379 | 379 |
if (!all(idx1 == idx2 + 1L)) |
380 | 380 |
stop("Malformed input.\n", |
381 | 381 |
" In the input data.frame (or DataFrame) representing the ", |
... | ... |
@@ -383,7 +383,7 @@ setMethod("uninformativeSSids", "DataFrame", |
383 | 383 |
"id must appear in 2 consecutive rows (first in the \"to\" ", |
384 | 384 |
"column, then in the \"from\" column. Could it be that the ", |
385 | 385 |
"rows were subsetted before the data.frame (or DataFrame) ", |
386 |
- "was passed to sgdf2()?") |
|
386 |
+ "was passed to sgedges2()?") |
|
387 | 387 |
from <- from[-idx1] |
388 | 388 |
to <- to[-idx2] |
389 | 389 |
ex_or_in[idx1] <- EX_OR_IN_LEVELS2[4L] |
... | ... |
@@ -392,10 +392,10 @@ setMethod("uninformativeSSids", "DataFrame", |
392 | 392 |
DataFrame(from=from, to=to, ex_or_in=ex_or_in, tx_id=tx_id) |
393 | 393 |
} |
394 | 394 |
|
395 |
-sgdf2 <- function(x, gene_id=NA) |
|
395 |
+sgedges2 <- function(x, gene_id=NA) |
|
396 | 396 |
{ |
397 | 397 |
if (!is(x, "DataFrame")) |
398 |
- x <- sgdf(x, gene_id=gene_id) |
|
398 |
+ x <- sgedges(x, gene_id=gene_id) |
|
399 | 399 |
.remove_uninformative_SSids(x) |
400 | 400 |
} |
401 | 401 |
|
... | ... |
@@ -9,23 +9,23 @@ setOldClass("igraph") |
9 | 9 |
|
10 | 10 |
|
11 | 11 |
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
12 |
-### .make_igraph_from_sgdf() |
|
12 |
+### .make_igraph_from_sgedges() |
|
13 | 13 |
### |
14 | 14 |
|
15 |
-### 'sgdf' must be a data.frame as returned by: |
|
16 |
-### sgdf( , keep.dup.edges=TRUE) |
|
15 |
+### 'sgedges' must be a data.frame as returned by: |
|
16 |
+### sgedges( , keep.dup.edges=TRUE) |
|
17 | 17 |
### or a DataFrame as returned by: |
18 |
-### sgdf( , keep.dup.edges=FALSE) |
|
18 |
+### sgedges( , keep.dup.edges=FALSE) |
|
19 | 19 |
### Valid extra cols are: "label", "label.color", "lty", "color", "width" |
20 | 20 |
### and "UATXHcount". They are used to set graphical parameters on the edges. |
21 |
-.precook_igraph_edges_from_sgdf <- function(sgdf) |
|
21 |
+.precook_igraph_edges_from_sgedges <- function(sgedges) |
|
22 | 22 |
{ |
23 | 23 |
required_colnames <- c("from", "to", "ex_or_in", "tx_id") |
24 | 24 |
extra_colnames <- c("label", "label.color", "lty", "color", |
25 | 25 |
"width", "UATXHcount") |
26 | 26 |
extract_colnames <- c(required_colnames, |
27 |
- intersect(extra_colnames, colnames(sgdf))) |
|
28 |
- ans <- sgdf[ , extract_colnames, drop=FALSE] |
|
27 |
+ intersect(extra_colnames, colnames(sgedges))) |
|
28 |
+ ans <- sgedges[ , extract_colnames, drop=FALSE] |
|
29 | 29 |
ex_or_in <- ans[ , "ex_or_in"] |
30 | 30 |
ex_or_in_levels <- levels(ex_or_in) |
31 | 31 |
if (!identical(ex_or_in_levels, EX_OR_IN_LEVELS2) |
... | ... |
@@ -83,33 +83,33 @@ setOldClass("igraph") |
83 | 83 |
g |
84 | 84 |
} |
85 | 85 |
|
86 |
-### 'sgdf0' must be a data.frame as returned by: |
|
87 |
-### sgdf( , keep.dup.edges=TRUE) |
|
88 |
-.make_igraph_from_sgdf0 <- function(sgdf0, gene_id=NA, |
|
89 |
- tx_id.as.edge.label=FALSE) |
|
86 |
+### 'sgedges0' must be a data.frame as returned by: |
|
87 |
+### sgedges( , keep.dup.edges=TRUE) |
|
88 |
+.make_igraph_from_sgedges0 <- function(sgedges0, gene_id=NA, |
|
89 |
+ tx_id.as.edge.label=FALSE) |
|
90 | 90 |
{ |
91 |
- if (!is.data.frame(sgdf0)) |
|
92 |
- stop("'sgdf0' must be a data.frame") |
|
91 |
+ if (!is.data.frame(sgedges0)) |
|
92 |
+ stop("'sgedges0' must be a data.frame") |
|
93 | 93 |
if (!isTRUEorFALSE(tx_id.as.edge.label)) |
94 | 94 |
stop("'tx_id.as.edge.label' must be TRUE or FALSE") |
95 |
- d <- .precook_igraph_edges_from_sgdf(sgdf0) |
|
95 |
+ d <- .precook_igraph_edges_from_sgedges(sgedges0) |
|
96 | 96 |
if (tx_id.as.edge.label) |
97 | 97 |
d$label <- d$tx_id |
98 | 98 |
.make_igraph(d) |
99 | 99 |
} |
100 | 100 |
|
101 |
-### 'sgdf' must be a DataFrame as returned by: |
|
102 |
-### sgdf( , keep.dup.edges=FALSE) |
|
101 |
+### 'sgedges' must be a DataFrame as returned by: |
|
102 |
+### sgedges( , keep.dup.edges=FALSE) |
|
103 | 103 |
### or by: |
104 |
-### sgdf2( ) |
|
105 |
-.make_igraph_from_sgdf <- function(sgdf, gene_id=NA, |
|
106 |
- tx_id.as.edge.label=FALSE) |
|
104 |
+### sgedges2( ) |
|
105 |
+.make_igraph_from_sgedges <- function(sgedges, gene_id=NA, |
|
106 |
+ tx_id.as.edge.label=FALSE) |
|
107 | 107 |
{ |
108 |
- if (!is(sgdf, "DataFrame")) |
|
109 |
- stop("'sgdf' must be a DataFrame") |
|
108 |
+ if (!is(sgedges, "DataFrame")) |
|
109 |
+ stop("'sgedges' must be a DataFrame") |
|
110 | 110 |
if (!isTRUEorFALSE(tx_id.as.edge.label)) |
111 | 111 |
stop("'tx_id.as.edge.label' must be TRUE or FALSE") |
112 |
- d <- .precook_igraph_edges_from_sgdf(sgdf) |
|
112 |
+ d <- .precook_igraph_edges_from_sgedges(sgedges) |
|
113 | 113 |
if (tx_id.as.edge.label) |
114 | 114 |
d$label <- sapply(d$tx_id, paste, collapse=",") |
115 | 115 |
d$tx_id <- NULL |
... | ... |
@@ -137,9 +137,9 @@ setMethod("sgraph", "ANY", |
137 | 137 |
function(x, gene_id=NA, keep.dup.edges=FALSE, |
138 | 138 |
tx_id.as.edge.label=FALSE, as.igraph=FALSE) |
139 | 139 |
{ |
140 |
- sgdf <- sgdf(x, gene_id=gene_id, keep.dup.edges=keep.dup.edges) |
|
141 |
- sgraph(sgdf, tx_id.as.edge.label=tx_id.as.edge.label, |
|
142 |
- as.igraph=as.igraph) |
|
140 |
+ sgedges <- sgedges(x, gene_id=gene_id, keep.dup.edges=keep.dup.edges) |
|
141 |
+ sgraph(sgedges, tx_id.as.edge.label=tx_id.as.edge.label, |
|
142 |
+ as.igraph=as.igraph) |
|
143 | 143 |
} |
144 | 144 |
) |
145 | 145 |
|
... | ... |
@@ -153,7 +153,7 @@ setMethod("sgraph", "data.frame", |
153 | 153 |
if (!identical(keep.dup.edges, FALSE)) |
154 | 154 |
stop("the 'keep.dup.edges' arg is not supported ", |
155 | 155 |
"when 'x' is a data.frame") |
156 |
- igraph <- .make_igraph_from_sgdf0(x, |
|
156 |
+ igraph <- .make_igraph_from_sgedges0(x, |
|
157 | 157 |
tx_id.as.edge.label=tx_id.as.edge.label) |
158 | 158 |
sgraph(igraph, as.igraph=as.igraph) |
159 | 159 |
} |
... | ... |
@@ -169,7 +169,7 @@ setMethod("sgraph", "DataFrame", |
169 | 169 |
if (!identical(keep.dup.edges, FALSE)) |
170 | 170 |
stop("the 'keep.dup.edges' arg is not supported ", |
171 | 171 |
"when 'x' is a DataFrame") |
172 |
- igraph <- .make_igraph_from_sgdf(x, |
|
172 |
+ igraph <- .make_igraph_from_sgedges(x, |
|
173 | 173 |
tx_id.as.edge.label=tx_id.as.edge.label) |
174 | 174 |
sgraph(igraph, as.igraph=as.igraph) |
175 | 175 |
} |
... | ... |
@@ -209,7 +209,7 @@ setMethod("sgraph", "igraph", |
209 | 209 |
|
210 | 210 |
sgraph2 <- function(x, gene_id=NA, tx_id.as.edge.label=FALSE, as.igraph=FALSE) |
211 | 211 |
{ |
212 |
- sgraph(sgdf2(x, gene_id=gene_id), |
|
212 |
+ sgraph(sgedges2(x, gene_id=gene_id), |
|
213 | 213 |
tx_id.as.edge.label=tx_id.as.edge.label, as.igraph=as.igraph) |
214 | 214 |
} |
215 | 215 |
|
... | ... |
@@ -259,7 +259,7 @@ package. |
259 | 259 |
First we load the selected \Rclass{TranscriptDb} object. |
260 | 260 |
|
261 | 261 |
<<loadTxdb>>= |
262 |
-library("TxDb.Mmusculus.UCSC.mm9.knownGene") |
|
262 |
+library(TxDb.Mmusculus.UCSC.mm9.knownGene) |
|
263 | 263 |
txdb <- TxDb.Mmusculus.UCSC.mm9.knownGene |
264 | 264 |
@ |
265 | 265 |
|
... | ... |
@@ -290,46 +290,32 @@ the modified \Rclass{TranscriptDb} object. |
290 | 290 |
<<loadGenomicFeatures>>= |
291 | 291 |
library(SplicingGraphs) |
292 | 292 |
sg <- SplicingGraphs(txdb) |
293 |
+sg |
|
294 |
+@ |
|
295 |
+ |
|
296 |
+\Rcode{sg} is a \Rclass{SplicingGraphs} object. It has 1 element per |
|
297 |
+transcript, and each transcript is assigned a name that is the id of the |
|
298 |
+gene it belongs to. All the transcripts belonging to the same gene are |
|
299 |
+guaranteed to be consecutive elements in \Rcode{sg}: |
|
300 |
+ |
|
301 |
+<<>>= |
|
302 |
+head(names(sg)) |
|
293 | 303 |
@ |
294 | 304 |
|
295 | 305 |
\end{document} |
296 | 306 |
|
297 |
-The \Rfunction{spliceGraphs} function returns the |
|
298 |
-collapsed edges with their associated disjoined exons and provides information |
|
299 |
-about the underlying splicing mechanisms represented as splicing codes. |
|
300 |
-Additionally the object contains mapping of edges to the |
|
301 |
-individual bubbles and bubble parts. |
|
302 |
-Lets have a look onto the resulting object. The provided |
|
303 |
-\Rclass{GrangesList} contains the edge IDs as list names. |
|
304 |
-The exons in the \Rclass{Granges} objects are not the original |
|
305 |
-exons provided by the \Rclass{TranscriptDb} object, since the gene |
|
306 |
-model became modified internally. |
|
307 |
-As mentioned in the previous section during the splicing graph |
|
308 |
-construction overlapping exons within a gene get disjoined and |
|
309 |
-new exons with new exon ids which differ in |
|
310 |
-size compared to the original exons are produced. |
|
311 |
- |
|
312 |
-The original exon ids associated with the new exon ids |
|
313 |
-can be found in the metadata column of the individual list elements |
|
314 |
-and is called \Rfunction{exon\_ids}. Each element of the |
|
315 |
-column is a \Rclass{CharacterList} containing the original exon ids. |
|
316 |
-The new disjoined exon ids can be retrieved |
|
317 |
-directly from the metadata column called \Rfunction{disJ\_exon\_ids}. |
|
307 |
+\Rcode{sg} contains information about the underlying splicing mechanisms |
|
308 |
+represented as splicing codes. Additionally the object contains mapping |
|
309 |
+of edges to the individual bubbles and bubble parts. |
|
310 |
+ |
|
311 |
+Lets have a look at the resulting object. |
|
312 |
+ |
|
318 | 313 |
As already mentioned in the introduction there is also information |
319 | 314 |
provided about the splicing events generating the individual |
320 | 315 |
transcript variants. This information is stored in the metadata slot |
321 | 316 |
of the \Rclass{GrangesList} object and can be accessed by using the |
322 | 317 |
\Rfunction{metadata} function. |
323 | 318 |
|
324 |
-Below an example edge of the object returned by the \Rfunction{spliceGraphs} |
|
325 |
-function is shown. This edge consist of multiple exons. |
|
326 |
- |
|
327 |
-\begin{scriptsize} |
|
328 |
-<<exbyedges>>= |
|
329 |
-sG |
|
330 |
-@ |
|
331 |
-\end{scriptsize} |
|
332 |
- |
|
333 | 319 |
In the code chunk below we access the information about the type of |
334 | 320 |
splicing events and try to quantify them later on. |
335 | 321 |
|
... | ... |
@@ -20,24 +20,24 @@ source(TSPC_utils_path) |
20 | 20 |
|
21 | 21 |
### Make a TSPC splicing graph data frame and save it in the current working |
22 | 22 |
### directory. |
23 |
-makeAndSaveTSPCsgdf <- function(subdir_path) |
|
23 |
+makeAndSaveTSPCsgedges <- function(subdir_path) |
|
24 | 24 |
{ |
25 | 25 |
subdir_basename <- basename(subdir_path) |
26 |
- objname <- paste0(subdir_basename, "sgdf") |
|
26 |
+ objname <- paste0(subdir_basename, "sgedges") |
|
27 | 27 |
filename <- paste0(objname, ".rda") |
28 |
- sgdf <- makeTSPCsgdf(subdir_path) |
|
28 |
+ sgedges <- makeTSPCsgedges(subdir_path) |
|
29 | 29 |
message("Saving ", objname, " to ", filename, " ... ", appendLF=FALSE) |
30 |
- assign(objname, sgdf, envir=.GlobalEnv) |
|
30 |
+ assign(objname, sgedges, envir=.GlobalEnv) |
|
31 | 31 |
save(list=objname, file=filename, envir=.GlobalEnv) |
32 | 32 |
message("OK") |
33 | 33 |
} |
34 | 34 |
|
35 |
-makeAndSaveAllTSPCsgdfs <- function(subdir_paths) |
|
35 |
+makeAndSaveAllTSPCsgedges <- function(subdir_paths) |
|
36 | 36 |
{ |
37 | 37 |
for (subdir_path in subdir_paths) |
38 |
- makeAndSaveTSPCsgdf(subdir_path) |
|
38 |
+ makeAndSaveTSPCsgedges(subdir_path) |
|
39 | 39 |
} |
40 | 40 |
|
41 | 41 |
### Run this to make and save all the TSPC splicing graph data frames: |
42 |
-#makeAndSaveAllTSPCsgdfs(subdir_paths) |
|
42 |
+#makeAndSaveAllTSPCsgedges(subdir_paths) |
|
43 | 43 |
|
... | ... |
@@ -27,7 +27,7 @@ loadModels <- function(models_path, check.transcripts=TRUE) |
27 | 27 |
|
28 | 28 |
### It's questionable whether this does the right thing on paired-end reads. |
29 | 29 |
### I guess not... |
30 |
-makeSgdfWithHits <- function(grl, sg) |
|
30 |
+makeSgedgesWithHits <- function(grl, sg) |
|
31 | 31 |
{ |
32 | 32 |
ov0 <- findOverlaps(grl, sg@tx, ignore.strand=TRUE) |
33 | 33 |
ovenc0 <- encodeOverlaps(grl, sg@tx, hits=ov0, |
... | ... |
@@ -37,10 +37,10 @@ makeSgdfWithHits <- function(grl, sg) |
37 | 37 |
sg@tx <- assignSubfeatureHits(grl, sg@tx, ov1, ignore.strand=TRUE) |
38 | 38 |
in_by_tx <- psetdiff(range(sg@tx), sg@tx) |
39 | 39 |
in_by_tx <- assignSubfeatureHits(grl, in_by_tx, ov1, ignore.strand=TRUE) |
40 |
- sgdf(sg, in_by_tx=in_by_tx) |
|
40 |
+ sgedges(sg, in_by_tx=in_by_tx) |
|
41 | 41 |
} |
42 | 42 |
|
43 |
-makeTSPCsgdf <- function(subdir_path) |
|
43 |
+makeTSPCsgedges <- function(subdir_path) |
|
44 | 44 |
{ |
45 | 45 |
subdir_basename <- basename(subdir_path) |
46 | 46 |
filenames <- list.files(subdir_path) |
... | ... |
@@ -56,7 +56,7 @@ makeTSPCsgdf <- function(subdir_path) |
56 | 56 |
|
57 | 57 |
## Compute the splicing graph. |
58 | 58 |
sg <- SplicingGraphs(ex_by_tx) |
59 |
- ans <- sgdf(sg) |
|
59 |
+ ans <- sgedges(sg) |
|
60 | 60 |
|
61 | 61 |
## Find the BAM files. |
62 | 62 |
suffixes <- substr(filenames, filenames_nchar-3L, filenames_nchar) |
... | ... |
@@ -101,9 +101,9 @@ makeTSPCsgdf <- function(subdir_path) |
101 | 101 |
param=param0) |
102 | 102 |
grl <- grglist(galp, order.as.in.query=TRUE) |
103 | 103 |
} |
104 |
- sgdf <- makeSgdfWithHits(grl, sg) |
|
104 |
+ sgedges <- makeSgedgesWithHits(grl, sg) |
|
105 | 105 |
message("OK") |
106 |
- sgdf[ , "nhits"] |
|
106 |
+ sgedges[ , "nhits"] |
|
107 | 107 |
}) |
108 | 108 |
cbind(ans, DataFrame(nhits)) |
109 | 109 |
} |
... | ... |
@@ -139,9 +139,10 @@ SplicingGraphs(x, grouping=NULL, check.introns=TRUE) |
139 | 139 |
\item The \link[IRanges]{IntegerList}, \link[IRanges]{CharacterList}, |
140 | 140 |
and \link[IRanges]{DataFrame} classes in the IRanges package. |
141 | 141 |
|
142 |
- \item \code{\link{sgdf}} and \code{\link{sgraph}} for extracting |
|
143 |
- a splicing graph as a data frame or as a plottable graph-like |
|
144 |
- object. |
|
142 |
+ \item \code{\link{sgedges}} for extracting the edges of a splicing graph. |
|
143 |
+ |
|
144 |
+ \item \code{\link{sgraph}} for extracting a splicing graph as a plottable |
|
145 |
+ graph-like object. |
|
145 | 146 |
} |
146 | 147 |
} |
147 | 148 |
|
148 | 149 |
similarity index 61% |
149 | 150 |
rename from man/TSPCsgdfs.Rd |
150 | 151 |
rename to man/TSPCsgraphs.Rd |
... | ... |
@@ -1,6 +1,6 @@ |
1 |
-\name{TSPCsgdfs} |
|
1 |
+\name{TSPCsgraphs} |
|
2 | 2 |
|
3 |
-\alias{TSPCsgdfs} |
|
3 |
+\alias{TSPCsgraphs} |
|
4 | 4 |
\alias{TSPC} |
5 | 5 |
|
6 | 6 |
\title{ |
... | ... |
@@ -15,36 +15,36 @@ |
15 | 15 |
## 1 splicing graph data frame per gene, except for gene MUC16. |
16 | 16 |
## Transcripts T-4 and T-5 in gene MUC16 both have their 2nd exon included |
17 | 17 |
## in their 3rd exon ==> splicing graph theory doesn't apply. |
18 |
-filepaths <- list.files(system.file("extdata", "TSPCsgdfs", |
|
18 |
+filepaths <- list.files(system.file("extdata", "TSPCsgraphs", |
|
19 | 19 |
package="SplicingGraphs"), |
20 | 20 |
full.names=TRUE) |
21 | 21 |
for (filepath in filepaths) |
22 | 22 |
load(filepath) |
23 | 23 |
|
24 |
-dim(BAI1sgdf) |
|
24 |
+dim(BAI1sgedges) |
|
25 | 25 |
|
26 | 26 |
## All the data frames have 1 row per edge in the graph, and the first 4 |
27 | 27 |
## cols are always "from", "to", "ex_or_in", and "tx_id". Note that there |
28 | 28 |
## can be more than 1 transcript associated with a given edge. |
29 |
-LGSNsgdf[ , 1:4] |
|
29 |
+LGSNsgedges[ , 1:4] |
|
30 | 30 |
|
31 | 31 |
## There is 1 additional column per sample: |
32 |
-LGSNsgdf[ , 5:8] |
|
32 |
+LGSNsgedges[ , 5:8] |
|
33 | 33 |
|
34 |
-## 'KIAA0319Lsgdf' and 'TREM2sgdf' have no additional cols because there |
|
34 |
+## 'KIAA0319Lsgedges' and 'TREM2sgedges' have no additional cols because there |
|
35 | 35 |
## was no BAM files for those genes: |
36 |
-dim(KIAA0319Lsgdf) |
|
37 |
-dim(TREM2sgdf) |
|
36 |
+dim(KIAA0319Lsgedges) |
|
37 |
+dim(TREM2sgedges) |
|
38 | 38 |
|
39 | 39 |
## Plot the splicing graphs: |
40 | 40 |
library(Rgraphviz) |
41 |
-plot(sgraph(BAI1sgdf)) |
|
42 |
-plot(sgraph(CYB561sgdf)) |
|
43 |
-plot(sgraph(DAPL1sgdf)) |
|
44 |
-plot(sgraph(ITGB8sgdf)) |
|
45 |
-plot(sgraph(KIAA0319Lsgdf)) |
|
46 |
-plot(sgraph(LGSNsgdf)) |
|
47 |
-plot(sgraph(MKRN3sgdf)) |
|
48 |
-plot(sgraph(ST14sgdf)) |
|
49 |
-plot(sgraph(TREM2sgdf)) |
|
41 |
+plot(sgraph(BAI1sgedges)) |
|
42 |
+plot(sgraph(CYB561sgedges)) |
|
43 |
+plot(sgraph(DAPL1sgedges)) |
|
44 |
+plot(sgraph(ITGB8sgedges)) |
|
45 |
+plot(sgraph(KIAA0319Lsgedges)) |
|
46 |
+plot(sgraph(LGSNsgedges)) |
|
47 |
+plot(sgraph(MKRN3sgedges)) |
|
48 |
+plot(sgraph(ST14sgedges)) |
|
49 |
+plot(sgraph(TREM2sgedges)) |
|
50 | 50 |
} |
... | ... |
@@ -106,5 +106,5 @@ sg@tx <- assignSubfeatureHits(grl, sg@tx, ov1, ignore.strand=TRUE) |
106 | 106 |
in_by_tx <- psetdiff(range(sg@tx), sg@tx) |
107 | 107 |
in_by_tx <- assignSubfeatureHits(grl, in_by_tx, ov1, ignore.strand=TRUE) |
108 | 108 |
|
109 |
-sgdf(sg, gene_id="geneA", in_by_tx=in_by_tx) |
|
109 |
+sgedges(sg, gene_id="geneA", in_by_tx=in_by_tx) |
|
110 | 110 |
} |
111 | 111 |
similarity index 69% |
112 | 112 |
rename from man/sgdf-methods.Rd |
113 | 113 |
rename to man/sgedges-methods.Rd |
... | ... |
@@ -1,6 +1,6 @@ |
1 |
-\name{sgdf-methods} |
|
1 |
+\name{sgedges-methods} |
|
2 | 2 |
|
3 |
-\alias{sgdf-methods} |
|
3 |
+\alias{sgedges-methods} |
|
4 | 4 |
|
5 | 5 |
\alias{spath} |
6 | 6 |
\alias{spath,SplicingGraphs-method} |
... | ... |
@@ -8,30 +8,30 @@ |
8 | 8 |
\alias{UATXHcount} |
9 | 9 |
\alias{UATXHcount,SplicingGraphs-method} |
10 | 10 |
|
11 |
-\alias{sgdf} |
|
12 |
-\alias{sgdf,ANY-method} |
|
13 |
-\alias{sgdf,IntegerList-method} |
|
14 |
-\alias{sgdf,data.frame-method} |
|
11 |
+\alias{sgedges} |
|
12 |
+\alias{sgedges,ANY-method} |
|
13 |
+\alias{sgedges,IntegerList-method} |
|
14 |
+\alias{sgedges,data.frame-method} |
|
15 | 15 |
|
16 | 16 |
\alias{uninformativeSSids} |
17 | 17 |
\alias{uninformativeSSids,ANY-method} |
18 | 18 |
\alias{uninformativeSSids,DataFrame-method} |
19 | 19 |
|
20 |
-\alias{sgdf2} |
|
20 |
+\alias{sgedges2} |
|
21 | 21 |
|
22 | 22 |
|
23 | 23 |
\title{ |
24 |
- Extract a splicing graph as a data frame |
|
24 |
+ Extract the edges of a splicing graph |
|
25 | 25 |
} |
26 | 26 |
|
27 | 27 |
\description{ |
28 |
- Extract the splicing graph for a given gene from a \link{SplicingGraphs} |
|
29 |
- object and return it as a \link[IRanges]{DataFrame}. |
|
28 |
+ Extract the edges of the splicing graph of a given gene from a |
|
29 |
+ \link{SplicingGraphs} object and return it as a \link[IRanges]{DataFrame}. |
|
30 | 30 |
} |
31 | 31 |
|
32 | 32 |
\usage{ |
33 |
-sgdf(x, gene_id=NA, UATXHcount=NULL, in_by_tx=NULL, keep.dup.edges=FALSE) |
|
34 |
-sgdf2(x, gene_id=NA) |
|
33 |
+sgedges(x, gene_id=NA, UATXHcount=NULL, in_by_tx=NULL, keep.dup.edges=FALSE) |
|
34 |
+sgedges2(x, gene_id=NA) |
|
35 | 35 |
|
36 | 36 |
## Related utilities: |
37 | 37 |
|
... | ... |
@@ -89,5 +89,5 @@ sg |
89 | 89 |
## consecutive elements in 'sg'. |
90 | 90 |
names(sg) |
91 | 91 |
|
92 |
-sgdf(sg, gene_id="geneA") |
|
92 |
+sgedges(sg, gene_id="geneA") |
|
93 | 93 |
} |