Browse code

- Split SplicingGraphs-class.R in 3 files: SplicingGraphs-class.R, sgdf-methods.R, and sgraph-methods.R. And split the corresponding man page accordingly. - Rename Spath(), Sgdf(), Sgraph() -> spath(), sgdf(), sgraph().

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SplicingGraphs@73836 bc3139a8-67e5-0310-9ffc-ced21a209358

Herve Pages authored on 28/02/2013 21:57:06
Showing 12 changed files

... ...
@@ -16,6 +16,8 @@ Imports: methods, utils, igraph,
16 16
 Suggests: igraph, Gviz, TxDb.Mmusculus.UCSC.mm9.knownGene, RUnit
17 17
 Collate: utils.R
18 18
 	 SplicingGraphs-class.R
19
+	 sgdf-methods.R
20
+	 sgraph-methods.R
19 21
 	 countReads.R
20 22
 	 toy_data.R
21 23
 biocViews: Genetics, Annotation, HighThroughputSequencing
... ...
@@ -19,35 +19,69 @@ import(Rsamtools)
19 19
 
20 20
 exportClasses(SplicingGraphs)
21 21
 
22
+
23
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
24
+### Export S4 methods for generics not defined in SplicingGraphs
25
+###
26
+
27
+exportMethods(
28
+    length,
29
+    names,
30
+    elementLengths,
31
+    plot,
32
+    findOverlaps,
33
+    encodeOverlaps
34
+)
35
+
36
+
37
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
38
+### Export non-generic functions
39
+###
40
+
41
+export(
42
+    ## SplicingGraphs-class.R:
43
+    SplicingGraphs,
44
+
45
+    ## sgdf-methods.R:
46
+    sgdf2,
47
+
48
+    ## sgraph-methods.R:
49
+    sgraph2,
50
+
51
+    ## countReads.R:
52
+    assignSubfeatureHits,
53
+
54
+    ## toy_data.R:
55
+    toy_genes_gff,
56
+    toy_reads_sam,
57
+    toy_reads_bam,
58
+    toy_overlaps,
59
+    plotToyReads
60
+)
61
+
62
+
63
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
64
+### Export S4 generics defined in SplicingGraphs + export corresponding
65
+### methods
66
+###
67
+
22 68
 export(
23
-  Spath,
24
-  UATXHcount,
25
-  Sgdf,
26
-  Sgraph,
27
-  uninformativeSSids,
28
-  Sgdf2,
29
-  Sgraph2,
30
-  SplicingGraphs,
31
-
32
-  assignSubfeatureHits,
33
-
34
-  toy_genes_gff,
35
-  toy_reads_sam,
36
-  toy_reads_bam,
37
-  toy_overlaps,
38
-  plotToyReads
69
+    ## sgdf-methods.R:
70
+    spath,
71
+    UATXHcount,
72
+    sgdf,
73
+    uninformativeSSids,
74
+
75
+    ## sgraph-methods.R:
76
+    sgraph
39 77
 )
40 78
 
79
+### Exactly the same list as above.
41 80
 exportMethods(
42
-  length,
43
-  names,
44
-  elementLengths,
45
-  Spath,
46
-  UATXHcount,
47
-  Sgdf,
48
-  Sgraph,
49
-  uninformativeSSids,
50
-  plot,
51
-  findOverlaps,
52
-  encodeOverlaps
81
+    spath,
82
+    UATXHcount,
83
+    sgdf,
84
+    uninformativeSSids,
85
+    sgraph
53 86
 )
87
+
... ...
@@ -2,6 +2,7 @@
2 2
 ### SplicingGraphs objects
3 3
 ### -------------------------------------------------------------------------
4 4
 
5
+
5 6
 ### We deliberately choose to not extend GRangesList to make SplicingGraphs
6 7
 ### objects read-only and with a very restricted API (opaque objects).
7 8
 setClass("SplicingGraphs",
... ...
@@ -10,12 +11,6 @@ setClass("SplicingGraphs",
10 11
     )
11 12
 )
12 13
 
13
-setOldClass("igraph")
14
-
15
-.EX_OR_IN_LEVELS2 <- c("ex", "in", "", "mixed")
16
-.EDGE_WEIGHTS <- c(1, 0.2, 0.1, 0.4)
17
-.EX_OR_IN_LEVELS <- .EX_OR_IN_LEVELS2[-4L]
18
-
19 14
 
20 15
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
21 16
 ### Basic accessors.
... ...
@@ -50,583 +45,6 @@ setMethod("show", "SplicingGraphs",
50 45
 )
51 46
 
52 47
 
53
-### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
54
-### Spath() accessor
55
-###
56
-### Gets all the splicing paths for the specified gene.
57
-### Returns them in a named IntegerList with 1 top-level element per
58
-### transcript in the specified gene. Each top-level element 'Spath[[i]]'
59
-### contains the splicing site ids for the i-th transcript.
60
-###
61
-
62
-setGeneric("Spath", signature="x",
63
-    function(x, gene_id=NA) standardGeneric("Spath")
64
-)
65
-
66
-### Should return a CompressedIntegerList.
67
-setMethod("Spath", "SplicingGraphs",
68
-    function(x, gene_id=NA)
69
-    {
70
-        if (!isSingleStringOrNA(gene_id))
71
-            stop("'gene_id' must be a single string (or NA)")
72
-        if (length(x) == 0L)
73
-            stop("'x' must be of length >= 1")
74
-        x_names <- names(x)
75
-        ans <- mcols(x@tx)[ , "Spath"]
76
-        if (is.null(x_names)) {
77
-            if (!is.na(gene_id))
78
-                stop("the 'gene_id' arg is not supported ",
79
-                     "when 'x' is unnamed (in which case all its elements ",
80
-                     "(i.e. transcripts) are considered to belong to the ",
81
-                     "same gene)")
82
-            return(ans) 
83
-        }
84
-        if (is.na(gene_id))
85
-            stop("'gene_id' must be supplied when 'x' has names")
86
-        ans <- ans[x_names == gene_id]
87
-        if (length(ans) == 0L)
88
-            stop("invalid 'gene_id'")
89
-        ans
90
-    }
91
-)
92
-
93
-
94
-### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
95
-### UATXHcount() accessor
96
-###
97
-
98
-setGeneric("UATXHcount", signature="x",
99
-    function(x, gene_id=NA) standardGeneric("UATXHcount")
100
-)
101
-
102
-### Should return an integer vector or a NULL.
103
-setMethod("UATXHcount", "SplicingGraphs",
104
-    function(x, gene_id=NA)
105
-    {
106
-        if (!isSingleStringOrNA(gene_id))
107
-            stop("'gene_id' must be a single string (or NA)")
108
-        if (length(x) == 0L)
109
-            stop("'x' must be of length >= 1")
110
-        x_names <- names(x)
111
-        ans <- mcols(x@tx)[["UATXHcount"]]
112
-        if (is.null(x_names)) {
113
-            if (!is.na(gene_id))
114
-                stop("the 'gene_id' arg is not supported ",
115
-                     "when 'x' is unnamed (in which case all its elements ",
116
-                     "(i.e. transcripts) are considered to belong to the ",
117
-                     "same gene)")
118
-            return(ans) 
119
-        }
120
-        if (is.na(gene_id))
121
-            stop("'gene_id' must be supplied when 'x' has names")
122
-        if (is.null(ans))
123
-            return(ans)
124
-        ans <- ans[x_names == gene_id]
125
-        if (length(ans) == 0L)
126
-            stop("invalid 'gene_id'")
127
-        ans
128
-    }
129
-)
130
-
131
-
132
-### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
133
-### .hits() accessor (not exported)
134
-###
135
-
136
-setGeneric(".hits", signature="x",
137
-    function(x, gene_id=NA) standardGeneric(".hits")
138
-)
139
-
140
-### Should return a CompressedCharacterList or a NULL.
141
-setMethod(".hits", "GRangesList",
142
-    function(x, gene_id=NA)
143
-    {
144
-        if (!isSingleStringOrNA(gene_id))
145
-            stop("'gene_id' must be a single string (or NA)")
146
-        if (length(x) == 0L)
147
-            stop("'x' must be of length >= 1")
148
-        x_names <- names(x)
149
-        if (is.null(x_names)) {
150
-            if (!is.na(gene_id))
151
-                stop("the 'gene_id' arg is not supported ",
152
-                     "when 'x' is unnamed (in which case all its elements ",
153
-                     "(i.e. transcripts) are considered to belong to the ",
154
-                     "same gene)")
155
-            ans <- mcols(unlist(x, use.names=FALSE))[["hits"]]
156
-            return(ans) 
157
-        }
158
-        if (is.na(gene_id))
159
-            stop("'gene_id' must be supplied when 'x' has names")
160
-        x <- x[x_names == gene_id]
161
-        if (length(x) == 0L)
162
-            stop("invalid 'gene_id'")
163
-        ans <- mcols(unlist(x, use.names=FALSE))[["hits"]]
164
-        ans
165
-    }
166
-)
167
-
168
-
169
-### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
170
-### Sgdf() accessor
171
-###
172
-### Returns the splicing graph in a DataFrame with 1 row per edge.
173
-###
174
-
175
-### 'spath' must be an IntegerList containing all the splicing paths for a
176
-### given gene. Should have been obtained thru the Spath() accessor.
177
-### Returns a 4-col (or 5-col if 'UATXHcount' is supplied) data.frame representing
178
-### the splicing graph.
179
-.make_Sgdf0_from_Spath <- function(spath, UATXHcount=NULL)
180
-{
181
-    if (!is.null(UATXHcount)) {
182
-        if (!is.integer(UATXHcount))
183
-            stop("'UATXHcount' must be an integer vector or NULL")
184
-        if (length(UATXHcount) != length(spath))
185
-            stop("when not NULL, 'UATXHcount' must have ",
186
-                 "the same length as 'spath'")
187
-    }
188
-    sgdf0s <- lapply(seq_along(spath),
189
-                     function(i) {
190
-                         SSids <- spath[[i]]
191
-                         from <- c("R", SSids)
192
-                         to <- c(SSids, "L")
193
-                         nb_SSids <- length(SSids)
194
-                         if (nb_SSids %% 2L != 0L)
195
-                             stop("some splicing paths in 'spath' go thru an ",
196
-                                  "odd number of splicing site ids")
197
-                         nexons <- nb_SSids %/% 2L
198
-                         if (nexons == 0L) {
199
-                             ex_or_in <- .EX_OR_IN_LEVELS[3L]
200
-                         } else {
201
-                             nintrons <- nexons - 1L
202
-                             ex_or_in <- c(.EX_OR_IN_LEVELS[3L],
203
-                                           rep.int(.EX_OR_IN_LEVELS[1:2],
204
-                                                   nintrons),
205
-                                           .EX_OR_IN_LEVELS[1L],
206
-                                           .EX_OR_IN_LEVELS[3L])
207
-                         }
208
-                         ex_or_in <- factor(ex_or_in,
209
-                                            levels=.EX_OR_IN_LEVELS)
210
-                         data.frame(from=from,
211
-                                    to=to,
212
-                                    ex_or_in=ex_or_in,
213
-                                    stringsAsFactors=FALSE)
214
-                     })
215
-    nedges_per_tx <- sapply(sgdf0s, nrow)
216
-    sgdf0 <- do.call(rbind, sgdf0s)
217
-    tx_id <- names(spath)
218
-    if (is.null(tx_id))
219
-        tx_id <- seq_along(spath)
220
-    tx_id <- rep.int(factor(tx_id, levels=tx_id), nedges_per_tx)
221
-    sgdf0$tx_id <- tx_id
222
-    if (!is.null(UATXHcount))
223
-        sgdf0$UATXHcount <- rep.int(UATXHcount, nedges_per_tx)
224
-    sgdf0
225
-}
226
-
227
-### Collapse the duplicated edges in 'sgdf0' into a DataFrame.
228
-### We use a DataFrame instead of a data.frame because we want to store
229
-### the tx_id col in a CompressedFactorList (even though this container
230
-### doesn't formally exist and a CompressedIntegerList is actually what's
231
-### being used).
232
-.make_Sgdf_from_Sgdf0 <- function(sgdf0, ex_hits=NULL, in_hits=NULL)
233
-{
234
-    from <- sgdf0[ , "from"]
235
-    to <- sgdf0[ , "to"]
236
-    ex_or_in <- sgdf0[ , "ex_or_in"]
237
-    tx_id <- sgdf0[ , "tx_id"]
238
-    edges <- paste(from, to, sep="~")
239
-    sm <- match(edges, edges)
240
-    if (!all(ex_or_in == ex_or_in[sm]))
241
-        stop("invalid splicing graph")
242
-    is_not_dup <- sm == seq_along(sm)
243
-    sgdf <- DataFrame(sgdf0[is_not_dup, , drop=FALSE])
244
-    sgdf$tx_id <- splitAsList(tx_id, sm)
245
-    UATXHcount <- sgdf$UATXHcount
246
-    if (!is.null(UATXHcount))
247
-        sgdf$UATXHcount <- sum(splitAsList(sgdf0$UATXHcount, sm))
248
-    if (is.null(ex_hits) && is.null(in_hits))
249
-        return(sgdf)
250
-    hits <- relist(character(0), PartitioningByEnd(NG=length(sm)))
251
-    if (!is.null(ex_hits)) {
252
-        if (!is(ex_hits, "CharacterList"))
253
-            stop("'ex_hits' must be a CharacterList object")
254
-        ex_idx <- which(ex_or_in == "ex")
255
-        if (length(ex_idx) != length(ex_hits))
256
-            stop("'ex_hits' is incompatible with 'sgdf0'")
257
-        hits[ex_idx] <- ex_hits
258
-    }
259
-    if (!is.null(in_hits)) {
260
-        if (!is(in_hits, "CharacterList"))
261
-            stop("'in_hits' must be a CharacterList object")
262
-        in_idx <- which(ex_or_in == "in")
263
-        if (length(in_idx) != length(in_hits))
264
-            stop("'in_hits' is incompatible with 'sgdf0'")
265
-        hits[in_idx] <- in_hits
266
-    }
267
-    ## TODO: This is quite inefficient. Improve it.
268
-    for (i in which(!is_not_dup))
269
-        hits[[sm[i]]] <- unique(hits[[sm[i]]], hits[[i]])
270
-    sgdf$hits <- hits[is_not_dup]
271
-    sgdf$nhits <- elementLengths(sgdf$hits)
272
-    sgdf
273
-}
274
-
275
-setGeneric("Sgdf", signature="x",
276
-    function(x, gene_id=NA, UATXHcount=NULL, inbytx=NULL, keep.dup.edges=FALSE)
277
-        standardGeneric("Sgdf")
278
-)
279
-
280
-setMethod("Sgdf", "ANY",
281
-    function(x, gene_id=NA, UATXHcount=NULL, inbytx=NULL, keep.dup.edges=FALSE)
282
-    {
283
-        spath <- Spath(x, gene_id=gene_id)
284
-        if (is.null(UATXHcount))
285
-            UATXHcount <- UATXHcount(x, gene_id=gene_id)
286
-        if (is.null(inbytx))
287
-            return(Sgdf(spath, UATXHcount=UATXHcount,
288
-                               keep.dup.edges=keep.dup.edges))
289
-        if (!is(inbytx, "GRangesList"))
290
-            stop("'inbytx' must be NULL or a GRangesList object")
291
-        if (!is(x, "SplicingGraphs"))
292
-            stop("'x' must be a SplicingGraphs object ",
293
-                 "when 'inbytx' is a GRangesList object")
294
-        if (length(inbytx) != length(x))
295
-            stop("'inbytx' must have the same length as 'x'")
296
-        if (!identical(elementLengths(inbytx) + 1L, elementLengths(x)))
297
-            stop("the shape of 'inbytx' is not compatible ",
298
-                 "with the shape of 'x'")
299
-        if (!identical(keep.dup.edges, FALSE))
300
-            stop("'keep.dup.edges' must be FALSE when 'inbytx' is supplied")
301
-        sgdf0 <- Sgdf(spath, UATXHcount=UATXHcount, keep.dup.edges=TRUE)
302
-        ex_or_in <- sgdf0[ , "ex_or_in"]
303
-        ex_hits <- .hits(x@tx, gene_id=gene_id)
304
-        if (is.null(ex_hits))
305
-            stop("'x' must have a \"hits\" inner metadata column ",
306
-                 "when 'inbytx' is a GRangesList object. May be ",
307
-                 "you forgot to pass it thru assignSubfeatureHits()?")
308
-        in_hits <- .hits(inbytx, gene_id=gene_id)
309
-        if (is.null(in_hits))
310
-            stop("'inbytx' has no \"hits\" inner metadata column. May be ",
311
-                 "you forgot to pass it thru assignSubfeatureHits()?")
312
-        .make_Sgdf_from_Sgdf0(sgdf0, ex_hits=ex_hits, in_hits=in_hits)
313
-    }
314
-)
315
-
316
-setMethod("Sgdf", "IntegerList",
317
-    function(x, gene_id=NA, UATXHcount=NULL, inbytx=NULL, keep.dup.edges=FALSE)
318
-    {
319
-        if (!identical(gene_id, NA))
320
-            stop("the 'gene_id' arg is not supported ",
321
-                 "when 'x' is an IntegerList")
322
-        if (!is.null(inbytx))
323
-            stop("the 'inbytx' arg is not supported ",
324
-                 "when 'x' is an IntegerList")
325
-        sgdf0 <- .make_Sgdf0_from_Spath(x, UATXHcount=UATXHcount)
326
-        Sgdf(sgdf0, keep.dup.edges=keep.dup.edges)
327
-    }
328
-)
329
-
330
-setMethod("Sgdf", "data.frame",
331
-    function(x, gene_id=NA, UATXHcount=NULL, inbytx=NULL, keep.dup.edges=FALSE)
332
-    {
333
-        if (!identical(gene_id, NA))
334
-            stop("the 'gene_id' arg is not supported ",
335
-                 "when 'x' is a data.frame")
336
-        if (!is.null(UATXHcount))
337
-            stop("the 'UATXHcount' arg is not supported ",
338
-                 "when 'x' is a data.frame")
339
-        if (!is.null(inbytx))
340
-            stop("the 'inbytx' arg is not supported ",
341
-                 "when 'x' is a data.frame")
342
-        if (!isTRUEorFALSE(keep.dup.edges))
343
-            stop("'keep.dup.edges' must be TRUE or FALSE")
344
-        if (keep.dup.edges)
345
-            return(x)  # no-op
346
-        .make_Sgdf_from_Sgdf0(x)
347
-    }
348
-)
349
-
350
-
351
-### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
352
-### .make_igraph_from_Sgdf()
353
-###
354
-
355
-### 'sgdf' must be a data.frame as returned by:
356
-###     Sgdf( , keep.dup.edges=TRUE)
357
-### or a DataFrame as returned by:
358
-###     Sgdf( , keep.dup.edges=FALSE)
359
-### Valid extra cols are: "label", "label.color", "lty", "color", "width"
360
-### and "UATXHcount". They are used to set graphical parameters on the edges.
361
-.precook_igraph_edges_from_Sgdf <- function(sgdf)
362
-{
363
-    required_colnames <- c("from", "to", "ex_or_in", "tx_id")
364
-    extra_colnames <- c("label", "label.color", "lty", "color",
365
-                        "width", "UATXHcount")
366
-    extract_colnames <- c(required_colnames,
367
-                          intersect(extra_colnames, colnames(sgdf)))
368
-    ans <- sgdf[ , extract_colnames, drop=FALSE]
369
-    ex_or_in <- ans[ , "ex_or_in"]
370
-    ex_or_in_levels <- levels(ex_or_in)
371
-    if (!identical(ex_or_in_levels, .EX_OR_IN_LEVELS2)
372
-     && !identical(ex_or_in_levels, .EX_OR_IN_LEVELS))
373
-        stop("\"ex_or_in\" column has invalid levels")
374
-    if (!("label.color" %in% extract_colnames))
375
-        ans$label.color <- "blue"
376
-    if (!("lty" %in% extract_colnames))
377
-        ans$lty <- c("solid", "solid", "dashed", "solid")[ex_or_in]
378
-    if (!("color" %in% extract_colnames))
379
-        ans$color <- c("green3", "darkgrey", "grey", "black")[ex_or_in]
380
-    if (!("width" %in% extract_colnames)
381
-     && "UATXHcount" %in% extract_colnames) {
382
-        min_UATXHcount <- min(ans$UATXHcount)
383
-        if (min_UATXHcount < 0L) {
384
-            warning("'UATXHcount' column contains negative values. Cannot use ",
385
-                    "it to set the widths of the edges.")
386
-        } else {
387
-            max_UATXHcount <- max(ans$UATXHcount)
388
-            if (max_UATXHcount <= 0L) {
389
-                warning("'UATXHcount' column has no positive values. Cannot use ",
390
-                        "it to set the widths of the edges.")
391
-            } else {
392
-                ans$width <- 20.0 * ans$UATXHcount / max(ans$UATXHcount)
393
-            }
394
-        }
395
-    }
396
-    ans
397
-}
398
-
399
-.make_igraph <- function(d)
400
-{
401
-    ## Prepare the 'vertices' argument to pass to graph.data.frame().
402
-    from <- d[ , "from"]
403
-    to <- d[ , "to"]
404
-    nodes <- unique(c(from, to))
405
-    nodes <- sort(as.integer(setdiff(nodes, c("R", "L"))))
406
-    nodes <- c("R", as.character(nodes), "L")
407
-    color <- c("gray", rep.int("white", length(nodes)-2L), "gray")
408
-    label.color <- "black"
409
-    vertices <- data.frame(name=nodes, color=color, label.color=label.color)
410
-
411
-    ## Make the igraph object.
412
-    g <- graph.data.frame(d, vertices=vertices)
413
-    layout.kamada.kawai.deterministic <- function(...)
414
-    {
415
-        set.seed(33L)
416
-        layout.kamada.kawai(...)
417
-    }
418
-
419
-    ## Set its layout attribute.
420
-    g$layout <- layout.kamada.kawai.deterministic
421
-    #g$layout <- layout.Sgraph
422
-
423
-    g
424
-}
425
-
426
-### 'sgdf0' must be a data.frame as returned by:
427
-###     Sgdf( , keep.dup.edges=TRUE)
428
-.make_igraph_from_Sgdf0 <- function(sgdf0, gene_id=NA)
429
-{
430
-    if (!is.data.frame(sgdf0))
431
-        stop("'sgdf0' must be a data.frame")
432
-    d <- .precook_igraph_edges_from_Sgdf(sgdf0)
433
-    if (!("label" %in% colnames(d)))
434
-        d$label <- d$tx_id
435
-    .make_igraph(d)
436
-}
437
-
438
-### 'sgdf' must be a DataFrame as returned by:
439
-###     Sgdf( , keep.dup.edges=FALSE)
440
-### or by:
441
-###     Sgdf2( )
442
-.make_igraph_from_Sgdf <- function(sgdf, gene_id=NA)
443
-{
444
-    if (!is(sgdf, "DataFrame"))
445
-        stop("'sgdf' must be a DataFrame")
446
-    d <- .precook_igraph_edges_from_Sgdf(sgdf)
447
-    if (!("label" %in% colnames(d)))
448
-        d$label <- sapply(d$tx_id, paste, collapse=",")
449
-    d$tx_id <- NULL
450
-    ## Turning 'd' into an ordinary data.frame. (Looks like 'as.data.frame()'
451
-    ## on a DataFrame ignores the 'stringsAsFactors' arg so we use
452
-    ## 'data.frame(as.list())' instead.)
453
-    d <- data.frame(as.list(d), stringsAsFactors=FALSE)
454
-    .make_igraph(d)
455
-}
456
-
457
-
458
-### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
459
-### Sgraph() accessor
460
-###
461
-### Returns the splicing graph in an igraph object.
462
-###
463
-
464
-setGeneric("Sgraph", signature="x",
465
-    function(x, gene_id=NA, keep.dup.edges=FALSE, as.igraph=FALSE)
466
-        standardGeneric("Sgraph")
467
-)
468
-
469
-setMethod("Sgraph", "ANY",
470
-    function(x, gene_id=NA, keep.dup.edges=FALSE, as.igraph=FALSE)
471
-    {
472
-        sgdf <- Sgdf(x, gene_id=gene_id, keep.dup.edges=keep.dup.edges)
473
-        Sgraph(sgdf, as.igraph=as.igraph)
474
-    }
475
-)
476
-
477
-setMethod("Sgraph", "data.frame",
478
-    function(x, gene_id=NA, keep.dup.edges=FALSE, as.igraph=FALSE)
479
-    {
480
-        if (!identical(gene_id, NA))
481
-            stop("the 'gene_id' arg is not supported ",
482
-                 "when 'x' is a data.frame")
483
-        if (!identical(keep.dup.edges, FALSE))
484
-            stop("the 'keep.dup.edges' arg is not supported ",
485
-                 "when 'x' is a data.frame")
486
-        igraph <- .make_igraph_from_Sgdf0(x)
487
-        Sgraph(igraph, as.igraph=as.igraph)
488
-    }
489
-)
490
-
491
-setMethod("Sgraph", "DataFrame",
492
-    function(x, gene_id=NA, keep.dup.edges=FALSE, as.igraph=FALSE)
493
-    {
494
-        if (!identical(gene_id, NA))
495
-            stop("the 'gene_id' arg is not supported ",
496
-                 "when 'x' is a DataFrame")
497
-        if (!identical(keep.dup.edges, FALSE))
498
-            stop("the 'keep.dup.edges' arg is not supported ",
499
-                 "when 'x' is a DataFrame")
500
-        igraph <- .make_igraph_from_Sgdf(x)
501
-        Sgraph(igraph, as.igraph=as.igraph)
502
-    }
503
-)
504
-
505
-setMethod("Sgraph", "igraph",
506
-    function(x, gene_id=NA, keep.dup.edges=FALSE, as.igraph=FALSE)
507
-    {
508
-        if (!identical(gene_id, NA))
509
-            stop("the 'gene_id' arg is not supported ",
510
-                 "when 'x' is an igraph object")
511
-        if (!identical(keep.dup.edges, FALSE))
512
-            stop("the 'keep.dup.edges' arg is not supported ",
513
-                 "when 'x' is an igraph object")
514
-        if (!isTRUEorFALSE(as.igraph))
515
-            stop("'as.igraph' must be TRUE or FALSE")
516
-        if (as.igraph) {
517
-            ## Need to load the igraph package so the user can display, plot,
518
-            ## and manipulate the returned object.
519
-            library(igraph)
520
-            return(x)  # no-op
521
-        }
522
-        make_Ragraph_from_igraph(x)
523
-    }
524
-)
525
-
526
-
527
-### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
528
-### uninformativeSSids() accessor
529
-###
530
-
531
-setGeneric("uninformativeSSids", signature="x",
532
-    function(x, gene_id=NA) standardGeneric("uninformativeSSids")
533
-)
534
-
535
-setMethod("uninformativeSSids", "ANY",
536
-    function(x, gene_id=NA)
537
-    {
538
-        x <- Sgdf(x, gene_id=gene_id)
539
-        uninformativeSSids(x)
540
-    }
541
-)
542
-
543
-setMethod("uninformativeSSids", "DataFrame",
544
-    function(x, gene_id=NA)
545
-    {
546
-        if (!identical(gene_id, NA))
547
-            stop("the 'gene_id' arg is not supported ",
548
-                 "when 'x' is a DataFrame")
549
-        from <- x[ , "from"]
550
-        to <- x[ , "to"]
551
-        from1_SSids <- setdiff(from, from[duplicated(from)])
552
-        to1_SSids <- setdiff(to, to[duplicated(to)])
553
-        intersect(from1_SSids, to1_SSids)
554
-    }
555
-)
556
-
557
-
558
-### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
559
-### Sgdf2() accessor
560
-###
561
-### Same as Sgdf() except that uninformative nodes (i.e. SSids) are removed.
562
-###
563
-
564
-### 'sgdf' must be a DataFrame as returned by:
565
-###     Sgdf( , keep.dup.edges=FALSE)
566
-.remove_uninformative_SSids <- function(sgdf)
567
-{
568
-    ex_or_in <- sgdf[ , "ex_or_in"]
569
-    ex_or_in_levels <- levels(ex_or_in)
570
-    if (!identical(ex_or_in_levels, .EX_OR_IN_LEVELS))
571
-        stop("Malformed input.\n",
572
-             "  In the input data.frame (or DataFrame) representing the ",
573
-             "original splicing graph, the \"ex_or_in\" column has invalid ",
574
-             "levels. Could it be that it was obtained by a previous call ",
575
-             "to Sgdf2()?")
576
-    levels(ex_or_in) <- .EX_OR_IN_LEVELS2
577
-    uninformative_SSids <- uninformativeSSids(sgdf)
578
-    if (length(uninformative_SSids) == 0L)
579
-        return(sgdf)
580
-    from <- sgdf[ , "from"]
581
-    to <- sgdf[ , "to"]
582
-    tx_id <- sgdf[ , "tx_id"]
583
-    idx1 <- match(uninformative_SSids, from)
584
-    idx2 <- match(uninformative_SSids, to)
585
-    ## 2 sanity checks.
586
-    if (!identical(unname(tx_id[idx1]), unname(tx_id[idx2])))
587
-        stop("Malformed input.\n",
588
-             "  In the input data.frame (or DataFrame) representing the ",
589
-             "original splicing graph, the 2 rows containing a given ",
590
-             "uninformative splicing site id must contain the same tx_id.",
591
-             "Could it be that the \"tx_id\" column was manually altered ",
592
-             "before the data.frame (or DataFrame) was passed to ",
593
-             "Sgdf2()?")
594
-    if (!all(idx1 == idx2 + 1L))
595
-        stop("Malformed input.\n",
596
-             "  In the input data.frame (or DataFrame) representing the ",
597
-             "original splicing graph, each uninformative splicing site ",
598
-             "id must appear in 2 consecutive rows (first in the \"to\" ",
599
-             "column, then in the \"from\" column. Could it be that the ",
600
-             "rows were subsetted before the data.frame (or DataFrame) ",
601
-             "was passed to Sgdf2()?")
602
-    from <- from[-idx1]
603
-    to <- to[-idx2]
604
-    ex_or_in[idx1] <- .EX_OR_IN_LEVELS2[4L]
605
-    ex_or_in <- ex_or_in[-idx2]
606
-    tx_id <- tx_id[-idx1]
607
-    DataFrame(from=from, to=to, ex_or_in=ex_or_in, tx_id=tx_id)
608
-}
609
-
610
-Sgdf2 <- function(x, gene_id=NA)
611
-{
612
-    if (!is(x, "DataFrame"))
613
-        x <- Sgdf(x, gene_id=gene_id)
614
-    .remove_uninformative_SSids(x)
615
-}
616
-
617
-
618
-### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
619
-### Sgraph2() accessor
620
-###
621
-### Same as Sgraph() except that uninformative nodes (i.e. SSids) are removed.
622
-###
623
-
624
-Sgraph2 <- function(x, gene_id=NA, as.igraph=FALSE)
625
-{
626
-    Sgraph(Sgdf2(x, gene_id=gene_id), as.igraph=as.igraph)
627
-}
628
-
629
-
630 48
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
631 49
 ### SplicingGraphs() constructor
632 50
 ###
... ...
@@ -711,18 +129,18 @@ Sgraph2 <- function(x, gene_id=NA, as.igraph=FALSE)
711 129
     if (!is.null(tx_id))
712 130
         exbytx_mcols$tx_id <- tx_id
713 131
 
714
-    ## Set Spath metadata col.
715
-    if ("Spath" %in% colnames(exbytx_mcols))
716
-        stop("'exbytx' already has metadata column Spath")
132
+    ## Set spath metadata col.
133
+    if ("spath" %in% colnames(exbytx_mcols))
134
+        stop("'exbytx' already has metadata column spath")
717 135
     if (on.minus.strand) {
718
-        Spath <- rbind(SSids$end_SSid, SSids$start_SSid)
136
+        spath <- rbind(SSids$end_SSid, SSids$start_SSid)
719 137
     } else {
720
-        Spath <- rbind(SSids$start_SSid, SSids$end_SSid)
138
+        spath <- rbind(SSids$start_SSid, SSids$end_SSid)
721 139
     }
722
-    Spath_partitioning <- PartitioningByEnd(end(PartitioningByEnd(exbytx)) * 2L)
723
-    names(Spath_partitioning) <- tx_id
724
-    Spath <- splitAsList(as.vector(Spath), Spath_partitioning)
725
-    exbytx_mcols$Spath <- Spath
140
+    spath_partitioning <- PartitioningByEnd(end(PartitioningByEnd(exbytx)) * 2L)
141
+    names(spath_partitioning) <- tx_id
142
+    spath <- splitAsList(as.vector(spath), spath_partitioning)
143
+    exbytx_mcols$spath <- spath
726 144
 
727 145
     mcols(exbytx) <- exbytx_mcols
728 146
     exbytx
... ...
@@ -899,33 +317,3 @@ SplicingGraphs <- function(exbytx, grouping=NULL, check.introns=TRUE)
899 317
     new("SplicingGraphs", tx=ans_tx)
900 318
 }
901 319
 
902
-
903
-### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
904
-### "plot" method.
905
-###
906
-
907
-setMethod("plot", c("SplicingGraphs", "ANY"),
908
-    function(x, y, gene_id=NA)
909
-    {
910
-        if (missing(gene_id)) {
911
-            if (missing(y)) {
912
-                gene_id <- NA
913
-            } else {
914
-                gene_id <- y
915
-            }
916
-        } else {
917
-            if (!missing(y))
918
-                warning("'y' is ignored when plotting a SplicingGraphs ",
919
-                        "object and 'gene_id' is supplied")
920
-        }
921
-        if (!isSingleStringOrNA(gene_id))
922
-            stop("the supplied gene id must be a single string (or NA)")
923
-        x_names <- names(x)
924
-        if (!is.null(x_names) && is.na(gene_id))
925
-            stop("You need to specify a gene id when 'x' has names ",
926
-                 "e.g. 'plot(sg, \"some gene id\")'. Get all valid ",
927
-                 "gene ids with 'unique(names(sg))'.")
928
-        plot(Sgraph(x, gene_id=gene_id))
929
-    }
930
-)
931
-
932 320
new file mode 100644
... ...
@@ -0,0 +1,397 @@
1
+### =========================================================================
2
+### sgdf (and related) methods
3
+### -------------------------------------------------------------------------
4
+
5
+
6
+EX_OR_IN_LEVELS2 <- c("ex", "in", "", "mixed")
7
+EX_OR_IN_LEVELS <- EX_OR_IN_LEVELS2[-4L]
8
+
9
+
10
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
11
+### spath() accessor
12
+###
13
+### Gets all the splicing paths for the specified gene.
14
+### Returns them in a named IntegerList with 1 top-level element per
15
+### transcript in the specified gene. Each top-level element 'spath[[i]]'
16
+### contains the splicing site ids for the i-th transcript.
17
+###
18
+
19
+setGeneric("spath", signature="x",
20
+    function(x, gene_id=NA) standardGeneric("spath")
21
+)
22
+
23
+### Should return a CompressedIntegerList.
24
+setMethod("spath", "SplicingGraphs",
25
+    function(x, gene_id=NA)
26
+    {
27
+        if (!isSingleStringOrNA(gene_id))
28
+            stop("'gene_id' must be a single string (or NA)")
29
+        if (length(x) == 0L)
30
+            stop("'x' must be of length >= 1")
31
+        x_names <- names(x)
32
+        ans <- mcols(x@tx)[ , "spath"]
33
+        if (is.null(x_names)) {
34
+            if (!is.na(gene_id))
35
+                stop("the 'gene_id' arg is not supported ",
36
+                     "when 'x' is unnamed (in which case all its elements ",
37
+                     "(i.e. transcripts) are considered to belong to the ",
38
+                     "same gene)")
39
+            return(ans) 
40
+        }
41
+        if (is.na(gene_id))
42
+            stop("'gene_id' must be supplied when 'x' has names")
43
+        ans <- ans[x_names == gene_id]
44
+        if (length(ans) == 0L)
45
+            stop("invalid 'gene_id'")
46
+        ans
47
+    }
48
+)
49
+
50
+
51
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
52
+### UATXHcount() accessor
53
+###
54
+
55
+setGeneric("UATXHcount", signature="x",
56
+    function(x, gene_id=NA) standardGeneric("UATXHcount")
57
+)
58
+
59
+### Should return an integer vector or a NULL.
60
+setMethod("UATXHcount", "SplicingGraphs",
61
+    function(x, gene_id=NA)
62
+    {
63
+        if (!isSingleStringOrNA(gene_id))
64
+            stop("'gene_id' must be a single string (or NA)")
65
+        if (length(x) == 0L)
66
+            stop("'x' must be of length >= 1")
67
+        x_names <- names(x)
68
+        ans <- mcols(x@tx)[["UATXHcount"]]
69
+        if (is.null(x_names)) {
70
+            if (!is.na(gene_id))
71
+                stop("the 'gene_id' arg is not supported ",
72
+                     "when 'x' is unnamed (in which case all its elements ",
73
+                     "(i.e. transcripts) are considered to belong to the ",
74
+                     "same gene)")
75
+            return(ans) 
76
+        }
77
+        if (is.na(gene_id))
78
+            stop("'gene_id' must be supplied when 'x' has names")
79
+        if (is.null(ans))
80
+            return(ans)
81
+        ans <- ans[x_names == gene_id]
82
+        if (length(ans) == 0L)
83
+            stop("invalid 'gene_id'")
84
+        ans
85
+    }
86
+)
87
+
88
+
89
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
90
+### .hits() accessor (not exported)
91
+###
92
+
93
+setGeneric(".hits", signature="x",
94
+    function(x, gene_id=NA) standardGeneric(".hits")
95
+)
96
+
97
+### Should return a CompressedCharacterList or a NULL.
98
+setMethod(".hits", "GRangesList",
99
+    function(x, gene_id=NA)
100
+    {
101
+        if (!isSingleStringOrNA(gene_id))
102
+            stop("'gene_id' must be a single string (or NA)")
103
+        if (length(x) == 0L)
104
+            stop("'x' must be of length >= 1")
105
+        x_names <- names(x)
106
+        if (is.null(x_names)) {
107
+            if (!is.na(gene_id))
108
+                stop("the 'gene_id' arg is not supported ",
109
+                     "when 'x' is unnamed (in which case all its elements ",
110
+                     "(i.e. transcripts) are considered to belong to the ",
111
+                     "same gene)")
112
+            ans <- mcols(unlist(x, use.names=FALSE))[["hits"]]
113
+            return(ans) 
114
+        }
115
+        if (is.na(gene_id))
116
+            stop("'gene_id' must be supplied when 'x' has names")
117
+        x <- x[x_names == gene_id]
118
+        if (length(x) == 0L)
119
+            stop("invalid 'gene_id'")
120
+        ans <- mcols(unlist(x, use.names=FALSE))[["hits"]]
121
+        ans
122
+    }
123
+)
124
+
125
+
126
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
127
+### sgdf() extractor
128
+###
129
+### Returns the splicing graph in a DataFrame with 1 row per edge.
130
+###
131
+
132
+### 'spath' must be an IntegerList containing all the splicing paths for a
133
+### given gene. Should have been obtained thru the spath() accessor.
134
+### Returns a 4-col (or 5-col if 'UATXHcount' is supplied) data.frame
135
+### representing the splicing graph.
136
+.make_sgdf0_from_spath <- function(spath, UATXHcount=NULL)
137
+{
138
+    if (!is.null(UATXHcount)) {
139
+        if (!is.integer(UATXHcount))
140
+            stop("'UATXHcount' must be an integer vector or NULL")
141
+        if (length(UATXHcount) != length(spath))
142
+            stop("when not NULL, 'UATXHcount' must have ",
143
+                 "the same length as 'spath'")
144
+    }
145
+    sgdf0s <- lapply(seq_along(spath),
146
+                     function(i) {
147
+                         SSids <- spath[[i]]
148
+                         from <- c("R", SSids)
149
+                         to <- c(SSids, "L")
150
+                         nb_SSids <- length(SSids)
151
+                         if (nb_SSids %% 2L != 0L)
152
+                             stop("some splicing paths in 'spath' go thru an ",
153
+                                  "odd number of splicing site ids")
154
+                         nexons <- nb_SSids %/% 2L
155
+                         if (nexons == 0L) {
156
+                             ex_or_in <- EX_OR_IN_LEVELS[3L]
157
+                         } else {
158
+                             nintrons <- nexons - 1L
159
+                             ex_or_in <- c(EX_OR_IN_LEVELS[3L],
160
+                                           rep.int(EX_OR_IN_LEVELS[1:2],
161
+                                                   nintrons),
162
+                                           EX_OR_IN_LEVELS[1L],
163
+                                           EX_OR_IN_LEVELS[3L])
164
+                         }
165
+                         ex_or_in <- factor(ex_or_in,
166
+                                            levels=EX_OR_IN_LEVELS)
167
+                         data.frame(from=from,
168
+                                    to=to,
169
+                                    ex_or_in=ex_or_in,
170
+                                    stringsAsFactors=FALSE)
171
+                     })
172
+    nedges_per_tx <- sapply(sgdf0s, nrow)
173
+    sgdf0 <- do.call(rbind, sgdf0s)
174
+    tx_id <- names(spath)
175
+    if (is.null(tx_id))
176
+        tx_id <- seq_along(spath)
177
+    tx_id <- rep.int(factor(tx_id, levels=tx_id), nedges_per_tx)
178
+    sgdf0$tx_id <- tx_id
179
+    if (!is.null(UATXHcount))
180
+        sgdf0$UATXHcount <- rep.int(UATXHcount, nedges_per_tx)
181
+    sgdf0
182
+}
183
+
184
+### Collapse the duplicated edges in 'sgdf0' into a DataFrame.
185
+### We use a DataFrame instead of a data.frame because we want to store
186
+### the tx_id col in a CompressedFactorList (even though this container
187
+### doesn't formally exist and a CompressedIntegerList is actually what's
188
+### being used).
189
+.make_sgdf_from_sgdf0 <- function(sgdf0, ex_hits=NULL, in_hits=NULL)
190
+{
191
+    from <- sgdf0[ , "from"]
192
+    to <- sgdf0[ , "to"]
193
+    ex_or_in <- sgdf0[ , "ex_or_in"]
194
+    tx_id <- sgdf0[ , "tx_id"]
195
+    edges <- paste(from, to, sep="~")
196
+    sm <- match(edges, edges)
197
+    if (!all(ex_or_in == ex_or_in[sm]))
198
+        stop("invalid splicing graph")
199
+    is_not_dup <- sm == seq_along(sm)
200
+    sgdf <- DataFrame(sgdf0[is_not_dup, , drop=FALSE])
201
+    sgdf$tx_id <- splitAsList(tx_id, sm)
202
+    UATXHcount <- sgdf$UATXHcount
203
+    if (!is.null(UATXHcount))
204
+        sgdf$UATXHcount <- sum(splitAsList(sgdf0$UATXHcount, sm))
205
+    if (is.null(ex_hits) && is.null(in_hits))
206
+        return(sgdf)
207
+    hits <- relist(character(0), PartitioningByEnd(NG=length(sm)))
208
+    if (!is.null(ex_hits)) {
209
+        if (!is(ex_hits, "CharacterList"))
210
+            stop("'ex_hits' must be a CharacterList object")
211
+        ex_idx <- which(ex_or_in == "ex")
212
+        if (length(ex_idx) != length(ex_hits))
213
+            stop("'ex_hits' is incompatible with 'sgdf0'")
214
+        hits[ex_idx] <- ex_hits
215
+    }
216
+    if (!is.null(in_hits)) {
217
+        if (!is(in_hits, "CharacterList"))
218
+            stop("'in_hits' must be a CharacterList object")
219
+        in_idx <- which(ex_or_in == "in")
220
+        if (length(in_idx) != length(in_hits))
221
+            stop("'in_hits' is incompatible with 'sgdf0'")
222
+        hits[in_idx] <- in_hits
223
+    }
224
+    ## TODO: This is quite inefficient. Improve it.
225
+    for (i in which(!is_not_dup))
226
+        hits[[sm[i]]] <- unique(hits[[sm[i]]], hits[[i]])
227
+    sgdf$hits <- hits[is_not_dup]
228
+    sgdf$nhits <- elementLengths(sgdf$hits)
229
+    sgdf
230
+}
231
+
232
+setGeneric("sgdf", signature="x",
233
+    function(x, gene_id=NA, UATXHcount=NULL, inbytx=NULL, keep.dup.edges=FALSE)
234
+        standardGeneric("sgdf")
235
+)
236
+
237
+setMethod("sgdf", "ANY",
238
+    function(x, gene_id=NA, UATXHcount=NULL, inbytx=NULL, keep.dup.edges=FALSE)
239
+    {
240
+        spath <- spath(x, gene_id=gene_id)
241
+        if (is.null(UATXHcount))
242
+            UATXHcount <- UATXHcount(x, gene_id=gene_id)
243
+        if (is.null(inbytx))
244
+            return(sgdf(spath, UATXHcount=UATXHcount,
245
+                               keep.dup.edges=keep.dup.edges))
246
+        if (!is(inbytx, "GRangesList"))
247
+            stop("'inbytx' must be NULL or a GRangesList object")
248
+        if (!is(x, "SplicingGraphs"))
249
+            stop("'x' must be a SplicingGraphs object ",
250
+                 "when 'inbytx' is a GRangesList object")
251
+        if (length(inbytx) != length(x))
252
+            stop("'inbytx' must have the same length as 'x'")
253
+        if (!identical(elementLengths(inbytx) + 1L, elementLengths(x)))
254
+            stop("the shape of 'inbytx' is not compatible ",
255
+                 "with the shape of 'x'")
256
+        if (!identical(keep.dup.edges, FALSE))
257
+            stop("'keep.dup.edges' must be FALSE when 'inbytx' is supplied")
258
+        sgdf0 <- sgdf(spath, UATXHcount=UATXHcount, keep.dup.edges=TRUE)
259
+        ex_or_in <- sgdf0[ , "ex_or_in"]
260
+        ex_hits <- .hits(x@tx, gene_id=gene_id)
261
+        if (is.null(ex_hits))
262
+            stop("'x' must have a \"hits\" inner metadata column ",
263
+                 "when 'inbytx' is a GRangesList object. May be ",
264
+                 "you forgot to pass it thru assignSubfeatureHits()?")
265
+        in_hits <- .hits(inbytx, gene_id=gene_id)
266
+        if (is.null(in_hits))
267
+            stop("'inbytx' has no \"hits\" inner metadata column. May be ",
268
+                 "you forgot to pass it thru assignSubfeatureHits()?")
269
+        .make_sgdf_from_sgdf0(sgdf0, ex_hits=ex_hits, in_hits=in_hits)
270
+    }
271
+)
272
+
273
+setMethod("sgdf", "IntegerList",
274
+    function(x, gene_id=NA, UATXHcount=NULL, inbytx=NULL, keep.dup.edges=FALSE)
275
+    {
276
+        if (!identical(gene_id, NA))
277
+            stop("the 'gene_id' arg is not supported ",
278
+                 "when 'x' is an IntegerList")
279
+        if (!is.null(inbytx))
280
+            stop("the 'inbytx' arg is not supported ",
281
+                 "when 'x' is an IntegerList")
282
+        sgdf0 <- .make_sgdf0_from_spath(x, UATXHcount=UATXHcount)
283
+        sgdf(sgdf0, keep.dup.edges=keep.dup.edges)
284
+    }
285
+)
286
+
287
+setMethod("sgdf", "data.frame",
288
+    function(x, gene_id=NA, UATXHcount=NULL, inbytx=NULL, keep.dup.edges=FALSE)
289
+    {
290
+        if (!identical(gene_id, NA))
291
+            stop("the 'gene_id' arg is not supported ",
292
+                 "when 'x' is a data.frame")
293
+        if (!is.null(UATXHcount))
294
+            stop("the 'UATXHcount' arg is not supported ",
295
+                 "when 'x' is a data.frame")
296
+        if (!is.null(inbytx))
297
+            stop("the 'inbytx' arg is not supported ",
298
+                 "when 'x' is a data.frame")
299
+        if (!isTRUEorFALSE(keep.dup.edges))
300
+            stop("'keep.dup.edges' must be TRUE or FALSE")
301
+        if (keep.dup.edges)
302
+            return(x)  # no-op
303
+        .make_sgdf_from_sgdf0(x)
304
+    }
305
+)
306
+
307
+
308
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
309
+### uninformativeSSids() extractor
310
+###
311
+
312
+setGeneric("uninformativeSSids", signature="x",
313
+    function(x, gene_id=NA) standardGeneric("uninformativeSSids")
314
+)
315
+
316
+setMethod("uninformativeSSids", "ANY",
317
+    function(x, gene_id=NA)
318
+    {
319
+        x <- sgdf(x, gene_id=gene_id)
320
+        uninformativeSSids(x)
321
+    }
322
+)
323
+
324
+setMethod("uninformativeSSids", "DataFrame",
325
+    function(x, gene_id=NA)
326
+    {
327
+        if (!identical(gene_id, NA))
328
+            stop("the 'gene_id' arg is not supported ",
329
+                 "when 'x' is a DataFrame")
330
+        from <- x[ , "from"]
331
+        to <- x[ , "to"]
332
+        from1_SSids <- setdiff(from, from[duplicated(from)])
333
+        to1_SSids <- setdiff(to, to[duplicated(to)])
334
+        intersect(from1_SSids, to1_SSids)
335
+    }
336
+)
337
+
338
+
339
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
340
+### sgdf2() extractor
341
+###
342
+### Same as sgdf() except that uninformative nodes (i.e. SSids) are removed.
343
+###
344
+
345
+### 'sgdf' must be a DataFrame as returned by:
346
+###     sgdf( , keep.dup.edges=FALSE)
347
+.remove_uninformative_SSids <- function(sgdf)
348
+{
349
+    ex_or_in <- sgdf[ , "ex_or_in"]
350
+    ex_or_in_levels <- levels(ex_or_in)
351
+    if (!identical(ex_or_in_levels, EX_OR_IN_LEVELS))
352
+        stop("Malformed input.\n",
353
+             "  In the input data.frame (or DataFrame) representing the ",
354
+             "original splicing graph, the \"ex_or_in\" column has invalid ",
355
+             "levels. Could it be that it was obtained by a previous call ",
356
+             "to sgdf2()?")
357
+    levels(ex_or_in) <- EX_OR_IN_LEVELS2
358
+    uninformative_SSids <- uninformativeSSids(sgdf)
359
+    if (length(uninformative_SSids) == 0L)
360
+        return(sgdf)
361
+    from <- sgdf[ , "from"]
362
+    to <- sgdf[ , "to"]
363
+    tx_id <- sgdf[ , "tx_id"]
364
+    idx1 <- match(uninformative_SSids, from)
365
+    idx2 <- match(uninformative_SSids, to)
366
+    ## 2 sanity checks.
367
+    if (!identical(unname(tx_id[idx1]), unname(tx_id[idx2])))
368
+        stop("Malformed input.\n",
369
+             "  In the input data.frame (or DataFrame) representing the ",
370
+             "original splicing graph, the 2 rows containing a given ",
371
+             "uninformative splicing site id must contain the same tx_id.",
372
+             "Could it be that the \"tx_id\" column was manually altered ",
373
+             "before the data.frame (or DataFrame) was passed to ",
374
+             "sgdf2()?")
375
+    if (!all(idx1 == idx2 + 1L))
376
+        stop("Malformed input.\n",
377
+             "  In the input data.frame (or DataFrame) representing the ",
378
+             "original splicing graph, each uninformative splicing site ",
379
+             "id must appear in 2 consecutive rows (first in the \"to\" ",
380
+             "column, then in the \"from\" column. Could it be that the ",
381
+             "rows were subsetted before the data.frame (or DataFrame) ",
382
+             "was passed to sgdf2()?")
383
+    from <- from[-idx1]
384
+    to <- to[-idx2]
385
+    ex_or_in[idx1] <- EX_OR_IN_LEVELS2[4L]
386
+    ex_or_in <- ex_or_in[-idx2]
387
+    tx_id <- tx_id[-idx1]
388
+    DataFrame(from=from, to=to, ex_or_in=ex_or_in, tx_id=tx_id)
389
+}
390
+
391
+sgdf2 <- function(x, gene_id=NA)
392
+{
393
+    if (!is(x, "DataFrame"))
394
+        x <- sgdf(x, gene_id=gene_id)
395
+    .remove_uninformative_SSids(x)
396
+}
397
+
0 398
new file mode 100644
... ...
@@ -0,0 +1,226 @@
1
+### =========================================================================
2
+### sgraph (and related) methods
3
+### -------------------------------------------------------------------------
4
+
5
+
6
+setOldClass("igraph")
7
+
8
+.EDGE_WEIGHTS <- c(1, 0.2, 0.1, 0.4)
9
+
10
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
11
+### .make_igraph_from_sgdf()
12
+###
13
+
14
+### 'sgdf' must be a data.frame as returned by:
15
+###     sgdf( , keep.dup.edges=TRUE)
16
+### or a DataFrame as returned by:
17
+###     sgdf( , keep.dup.edges=FALSE)
18
+### Valid extra cols are: "label", "label.color", "lty", "color", "width"
19
+### and "UATXHcount". They are used to set graphical parameters on the edges.
20
+.precook_igraph_edges_from_sgdf <- function(sgdf)
21
+{
22
+    required_colnames <- c("from", "to", "ex_or_in", "tx_id")
23
+    extra_colnames <- c("label", "label.color", "lty", "color",
24
+                        "width", "UATXHcount")
25
+    extract_colnames <- c(required_colnames,
26
+                          intersect(extra_colnames, colnames(sgdf)))
27
+    ans <- sgdf[ , extract_colnames, drop=FALSE]
28
+    ex_or_in <- ans[ , "ex_or_in"]
29
+    ex_or_in_levels <- levels(ex_or_in)
30
+    if (!identical(ex_or_in_levels, EX_OR_IN_LEVELS2)
31
+     && !identical(ex_or_in_levels, EX_OR_IN_LEVELS))
32
+        stop("\"ex_or_in\" column has invalid levels")
33
+    if (!("label.color" %in% extract_colnames))
34
+        ans$label.color <- "blue"
35
+    if (!("lty" %in% extract_colnames))
36
+        ans$lty <- c("solid", "solid", "dashed", "solid")[ex_or_in]
37
+    if (!("color" %in% extract_colnames))
38
+        ans$color <- c("green3", "darkgrey", "grey", "black")[ex_or_in]
39
+    if (!("width" %in% extract_colnames)
40
+     && "UATXHcount" %in% extract_colnames) {
41
+        min_UATXHcount <- min(ans$UATXHcount)
42
+        if (min_UATXHcount < 0L) {
43
+            warning("'UATXHcount' column contains negative values. Cannot use ",
44
+                    "it to set the widths of the edges.")
45
+        } else {
46
+            max_UATXHcount <- max(ans$UATXHcount)
47
+            if (max_UATXHcount <= 0L) {
48
+                warning("'UATXHcount' column has no positive values. Cannot use ",
49
+                        "it to set the widths of the edges.")
50
+            } else {
51
+                ans$width <- 20.0 * ans$UATXHcount / max(ans$UATXHcount)
52
+            }
53
+        }
54
+    }
55
+    ans
56
+}
57
+
58
+.make_igraph <- function(d)
59
+{
60
+    ## Prepare the 'vertices' argument to pass to graph.data.frame().
61
+    from <- d[ , "from"]
62
+    to <- d[ , "to"]
63
+    nodes <- unique(c(from, to))
64
+    nodes <- sort(as.integer(setdiff(nodes, c("R", "L"))))
65
+    nodes <- c("R", as.character(nodes), "L")
66
+    color <- c("gray", rep.int("white", length(nodes)-2L), "gray")
67
+    label.color <- "black"
68
+    vertices <- data.frame(name=nodes, color=color, label.color=label.color)
69
+
70
+    ## Make the igraph object.
71
+    g <- graph.data.frame(d, vertices=vertices)
72
+    layout.kamada.kawai.deterministic <- function(...)
73
+    {
74
+        set.seed(33L)
75
+        layout.kamada.kawai(...)
76
+    }
77
+
78
+    ## Set its layout attribute.
79
+    g$layout <- layout.kamada.kawai.deterministic
80
+    #g$layout <- layout.sgraph
81
+
82
+    g
83
+}
84
+
85
+### 'sgdf0' must be a data.frame as returned by:
86
+###     sgdf( , keep.dup.edges=TRUE)
87
+.make_igraph_from_sgdf0 <- function(sgdf0, gene_id=NA)
88
+{
89
+    if (!is.data.frame(sgdf0))
90
+        stop("'sgdf0' must be a data.frame")
91
+    d <- .precook_igraph_edges_from_sgdf(sgdf0)
92
+    if (!("label" %in% colnames(d)))
93
+        d$label <- d$tx_id
94
+    .make_igraph(d)
95
+}
96
+
97
+### 'sgdf' must be a DataFrame as returned by:
98
+###     sgdf( , keep.dup.edges=FALSE)
99
+### or by:
100
+###     sgdf2( )
101
+.make_igraph_from_sgdf <- function(sgdf, gene_id=NA)
102
+{
103
+    if (!is(sgdf, "DataFrame"))
104
+        stop("'sgdf' must be a DataFrame")
105
+    d <- .precook_igraph_edges_from_sgdf(sgdf)
106
+    if (!("label" %in% colnames(d)))
107
+        d$label <- sapply(d$tx_id, paste, collapse=",")
108
+    d$tx_id <- NULL
109
+    ## Turning 'd' into an ordinary data.frame. (Looks like 'as.data.frame()'
110
+    ## on a DataFrame ignores the 'stringsAsFactors' arg so we use
111
+    ## 'data.frame(as.list())' instead.)
112
+    d <- data.frame(as.list(d), stringsAsFactors=FALSE)
113
+    .make_igraph(d)
114
+}
115
+
116
+
117
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
118
+### sgraph() extractor
119
+###
120
+### Returns the splicing graph in an Ragraph object.
121
+###
122
+
123
+setGeneric("sgraph", signature="x",
124
+    function(x, gene_id=NA, keep.dup.edges=FALSE, as.igraph=FALSE)
125
+        standardGeneric("sgraph")
126
+)
127
+
128
+setMethod("sgraph", "ANY",
129
+    function(x, gene_id=NA, keep.dup.edges=FALSE, as.igraph=FALSE)
130
+    {
131
+        sgdf <- sgdf(x, gene_id=gene_id, keep.dup.edges=keep.dup.edges)
132
+        sgraph(sgdf, as.igraph=as.igraph)
133
+    }
134
+)
135
+
136
+setMethod("sgraph", "data.frame",
137
+    function(x, gene_id=NA, keep.dup.edges=FALSE, as.igraph=FALSE)
138
+    {
139
+        if (!identical(gene_id, NA))
140
+            stop("the 'gene_id' arg is not supported ",
141
+                 "when 'x' is a data.frame")
142
+        if (!identical(keep.dup.edges, FALSE))
143
+            stop("the 'keep.dup.edges' arg is not supported ",
144
+                 "when 'x' is a data.frame")
145
+        igraph <- .make_igraph_from_sgdf0(x)
146
+        sgraph(igraph, as.igraph=as.igraph)
147
+    }
148
+)
149
+
150
+setMethod("sgraph", "DataFrame",
151
+    function(x, gene_id=NA, keep.dup.edges=FALSE, as.igraph=FALSE)
152
+    {
153
+        if (!identical(gene_id, NA))
154
+            stop("the 'gene_id' arg is not supported ",
155
+                 "when 'x' is a DataFrame")
156
+        if (!identical(keep.dup.edges, FALSE))
157
+            stop("the 'keep.dup.edges' arg is not supported ",
158
+                 "when 'x' is a DataFrame")
159
+        igraph <- .make_igraph_from_sgdf(x)
160
+        sgraph(igraph, as.igraph=as.igraph)
161
+    }
162
+)
163
+
164
+setMethod("sgraph", "igraph",
165
+    function(x, gene_id=NA, keep.dup.edges=FALSE, as.igraph=FALSE)
166
+    {
167
+        if (!identical(gene_id, NA))
168
+            stop("the 'gene_id' arg is not supported ",
169
+                 "when 'x' is an igraph object")
170
+        if (!identical(keep.dup.edges, FALSE))
171
+            stop("the 'keep.dup.edges' arg is not supported ",
172
+                 "when 'x' is an igraph object")
173
+        if (!isTRUEorFALSE(as.igraph))
174
+            stop("'as.igraph' must be TRUE or FALSE")
175
+        if (as.igraph) {
176
+            ## Need to load the igraph package so the user can display, plot,
177
+            ## and manipulate the returned object.
178
+            library(igraph)
179
+            return(x)  # no-op
180
+        }
181
+        make_Ragraph_from_igraph(x)
182
+    }
183
+)
184
+
185
+
186
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
187
+### sgraph2() extractor
188
+###
189
+### Same as sgraph() except that uninformative nodes (i.e. SSids) are removed.
190
+###
191
+
192
+sgraph2 <- function(x, gene_id=NA, as.igraph=FALSE)
193
+{
194
+    sgraph(sgdf2(x, gene_id=gene_id), as.igraph=as.igraph)
195
+}
196
+
197
+
198
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
199
+### "plot" method.
200
+###
201
+
202
+setMethod("plot", c("SplicingGraphs", "ANY"),
203
+    function(x, y, gene_id=NA)
204
+    {
205
+        if (missing(gene_id)) {
206
+            if (missing(y)) {
207
+                gene_id <- NA
208
+            } else {
209
+                gene_id <- y
210
+            }
211
+        } else {
212
+            if (!missing(y))
213
+                warning("'y' is ignored when plotting a SplicingGraphs ",
214
+                        "object and 'gene_id' is supplied")
215
+        }
216
+        if (!isSingleStringOrNA(gene_id))
217
+            stop("the supplied gene id must be a single string (or NA)")
218
+        x_names <- names(x)
219
+        if (!is.null(x_names) && is.na(gene_id))
220
+            stop("You need to specify a gene id when 'x' has names ",
221
+                 "e.g. 'plot(sg, \"some gene id\")'. Get all valid ",
222
+                 "gene ids with 'unique(names(sg))'.")
223
+        plot(sgraph(x, gene_id=gene_id))
224
+    }
225
+)
226
+
... ...
@@ -1,9 +1,9 @@
1 1
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2
-### A layout function for igraph objects.
2
+### A layout function for a splicing graph represented as an igraph object.
3 3
 ###
4 4
 
5 5
 ### Experimental. Not ready yet!
6
-layout.Sgraph <- function(graph)
6
+layout.sgraph <- function(graph)
7 7
 {
8 8
     ## Compute the 'x' col.
9 9
     vertices <- get.data.frame(graph, what="vertices")
... ...
@@ -37,7 +37,7 @@ makeSgdfWithHits <- function(grl, sg)
37 37
     sg@tx <- assignSubfeatureHits(grl, sg@tx, ov1, ignore.strand=TRUE)
38 38
     in_by_tx2 <- psetdiff(range(sg@tx), sg@tx)
39 39
     in_by_tx2 <- assignSubfeatureHits(grl, in_by_tx2, ov1, ignore.strand=TRUE)
40
-    Sgdf(sg, inbytx=in_by_tx2)
40
+    sgdf(sg, inbytx=in_by_tx2)
41 41
 }
42 42
 
43 43
 makeTSPCsgdf <- function(subdir_path)
... ...
@@ -56,7 +56,7 @@ makeTSPCsgdf <- function(subdir_path)
56 56
 
57 57
     ## Compute the splicing graph.
58 58
     sg <- SplicingGraphs(ex_by_tx)
59
-    ans <- Sgdf(sg)
59
+    ans <- sgdf(sg)
60 60
 
61 61
     ## Find the BAM files.
62 62
     suffixes <- substr(filenames, filenames_nchar-3L, filenames_nchar)
... ...
@@ -9,61 +9,24 @@
9 9
 \alias{elementLengths,SplicingGraphs-method}
10 10
 \alias{show,SplicingGraphs-method}
11 11
 
12
-\alias{Spath}
13
-\alias{Spath,SplicingGraphs-method}
14
-
15
-\alias{UATXHcount}
16
-\alias{UATXHcount,SplicingGraphs-method}
17
-
18
-\alias{Sgdf}
19
-\alias{Sgdf,ANY-method}
20
-\alias{Sgdf,IntegerList-method}
21
-\alias{Sgdf,data.frame-method}
22
-
23
-\alias{Sgraph}
24
-\alias{Sgraph,ANY-method}
25
-\alias{Sgraph,data.frame-method}
26
-\alias{Sgraph,DataFrame-method}
27
-\alias{Sgraph,igraph-method}
28
-
29
-\alias{uninformativeSSids}
30
-\alias{uninformativeSSids,ANY-method}
31
-\alias{uninformativeSSids,DataFrame-method}
32
-
33
-\alias{Sgdf2}
34
-
35
-\alias{Sgraph2}
36
-
37
-\alias{plot,SplicingGraphs,ANY-method}
38
-
39 12
 
40 13
 \title{
41
-  Tools for creating and plotting splicing graphs from transcript annotations
14
+  SplicingGraphs objects
42 15
 }
43 16
 
44 17
 \description{
45
-  TODO
18
+  The SplicingGraphs class is a container for splicing graphs created from
19
+  transcript annotations.
46 20
 }
47 21
 
48 22
 \usage{
49 23
 SplicingGraphs(exbytx, grouping=NULL, check.introns=TRUE)
50 24
 
51
-## Basic accessors
25
+## Basic accessors:
26
+
52 27
 \S4method{length}{SplicingGraphs}(x)
53 28
 \S4method{names}{SplicingGraphs}(x)
54 29
 \S4method{elementLengths}{SplicingGraphs}(x)
55
-
56
-## Other accessors
57
-Spath(x, gene_id=NA)
58
-UATXHcount(x, gene_id=NA)
59
-Sgdf(x, gene_id=NA, UATXHcount=NULL, inbytx=NULL, keep.dup.edges=FALSE)
60
-Sgraph(x, gene_id=NA, keep.dup.edges=FALSE, as.igraph=FALSE)
61
-uninformativeSSids(x, gene_id=NA)
62
-Sgdf2(x, gene_id=NA)
63
-Sgraph2(x, gene_id=NA, as.igraph=FALSE)
64
-
65
-## PLotting
66
-\S4method{plot}{SplicingGraphs,ANY}(x, y, gene_id=NA)
67 30
 }
68 31
 
69 32
 \arguments{
... ...
@@ -82,24 +45,6 @@ Sgraph2(x, gene_id=NA, as.igraph=FALSE)
82 45
   \item{x}{
83 46
     TODO
84 47
   }
85
-  \item{gene_id}{
86
-    TODO
87
-  }
88
-  \item{UATXHcount}{
89
-    TODO
90
-  }
91
-  \item{inbytx}{
92
-    TODO
93
-  }
94
-  \item{keep.dup.edges}{
95
-    TODO
96
-  }
97
-  \item{as.igraph}{
98
-    TODO
99
-  }
100
-  \item{y}{
101
-    TODO
102
-  }
103 48
 }
104 49
 
105 50
 \details{
... ...
@@ -146,7 +91,7 @@ Sgraph2(x, gene_id=NA, as.igraph=FALSE)
146 91
 }
147 92
 
148 93
 \value{
149
-TODO
94
+  TODO
150 95
 }
151 96
 
152 97
 \author{
... ...
@@ -175,10 +120,16 @@ TODO
175 120
           \code{\link[GenomicFeatures]{transcriptsBy}}, and the
176 121
           \link[GenomicFeatures]{TranscriptDb} class in the GenomicFeatures
177 122
           package.
123
+
178 124
     \item The \link[GenomicRanges]{GRangesList} class in the GenomicRanges
179 125
           package.
126
+
180 127
     \item The \link[IRanges]{IntegerList}, \link[IRanges]{CharacterList},
181 128
           and \link[IRanges]{DataFrame} classes in the IRanges package.
129
+
130
+    \item \code{\link{sgdf}} and \code{\link{sgraph}} for extracting
131
+          a splicing graph as a data frame or as a plottable graph-like
132
+          object.
182 133
   }
183 134
 }
184 135
 
... ...
@@ -207,28 +158,16 @@ sg <- SplicingGraphs(ex_by_tx, tx_by_gn)
207 158
 ## Note that 'sg' can also be created directly from the
208 159
 ## TranscriptDb object:
209 160
 #sg <- SplicingGraphs(toy_genes_txdb)  # not ready yet
161
+sg
210 162
 
211 163
 ## 'sg' has 1 element per transcript, and each transcript is
212 164
 ## assigned a name that is the id of the gene it belongs to. All the
213
-## transcripts belonging to a given gene are guaranteed to be
165
+## transcripts belonging to the same gene are guaranteed to be
214 166
 ## consecutive elements in 'sg'.
215 167
 names(sg)
216 168
 
217 169
 ## ---------------------------------------------------------------------
218
-## 3. Create the splicing graph data frame for geneA, and plot it
219
-## ---------------------------------------------------------------------
220
-
221
-sgdfA <- Sgdf(sg, gene_id="geneA")
222
-sgdfA
223
-
224
-if (interactive()) {
225
-  ## Edges are labeled with the transcript ids (or names), in blue.
226
-  ## The green arrows are edges corresponding to exons.
227
-  plot(Sgraph(sgdfA))
228
-}
229
-
230
-## ---------------------------------------------------------------------
231
-## 4. Extract information from the splicing graph
170
+## 3. Extract information from the SplicingGraphs object
232 171
 ## ---------------------------------------------------------------------
233 172
 
234 173
 if (FALSE) {
... ...
@@ -252,15 +191,4 @@ if (FALSE) {
252 191
   ## assignment of edges to bubbles
253 192
   mcols(sgA)
254 193
 }
255
-
256
-## ---------------------------------------------------------------------
257
-## 5. Plotting the other toy genes
258
-## ---------------------------------------------------------------------
259
-
260
-if (interactive()) {
261
-  plot(sg, "geneB")
262
-  plot(sg, "geneC")
263
-  plot(sg, "geneD")
264
-  plot(sg, "geneE")
265
-}
266 194
 }
... ...
@@ -38,13 +38,13 @@ dim(TREM2sgdf)
38 38
 
39 39
 ## Plot the splicing graphs:
40 40
 library(Rgraphviz)
41
-plot(Sgraph(BAI1sgdf))
42
-plot(Sgraph(CYB561sgdf))
43
-plot(Sgraph(DAPL1sgdf))
44
-plot(Sgraph(ITGB8sgdf))
45
-plot(Sgraph(KIAA0319Lsgdf))
46
-plot(Sgraph(LGSNsgdf))
47
-plot(Sgraph(MKRN3sgdf))
48
-plot(Sgraph(ST14sgdf))
49
-plot(Sgraph(TREM2sgdf))
41
+plot(sgraph(BAI1sgdf))
42
+plot(sgraph(CYB561sgdf))
43
+plot(sgraph(DAPL1sgdf))
44
+plot(sgraph(ITGB8sgdf))
45
+plot(sgraph(KIAA0319Lsgdf))
46
+plot(sgraph(LGSNsgdf))
47
+plot(sgraph(MKRN3sgdf))
48
+plot(sgraph(ST14sgdf))
49
+plot(sgraph(TREM2sgdf))
50 50
 }
... ...
@@ -106,5 +106,5 @@ sg@tx <- assignSubfeatureHits(grl, sg@tx, ov1, ignore.strand=TRUE)
106 106
 in_by_tx2 <- psetdiff(range(sg@tx), sg@tx)
107 107
 in_by_tx2 <- assignSubfeatureHits(grl, in_by_tx2, ov1, ignore.strand=TRUE)
108 108
 
109
-Sgdf(sg, gene_id="geneA", inbytx=in_by_tx2)
109
+sgdf(sg, gene_id="geneA", inbytx=in_by_tx2)
110 110
 }
111 111
new file mode 100644
... ...
@@ -0,0 +1,93 @@
1
+\name{sgdf-methods}
2
+
3
+\alias{sgdf-methods}
4
+
5
+\alias{spath}
6
+\alias{spath,SplicingGraphs-method}
7
+
8
+\alias{UATXHcount}
9
+\alias{UATXHcount,SplicingGraphs-method}
10
+
11
+\alias{sgdf}
12
+\alias{sgdf,ANY-method}
13
+\alias{sgdf,IntegerList-method}
14
+\alias{sgdf,data.frame-method}
15
+
16
+\alias{uninformativeSSids}
17
+\alias{uninformativeSSids,ANY-method}
18
+\alias{uninformativeSSids,DataFrame-method}
19
+
20
+\alias{sgdf2}
21
+
22
+
23
+\title{
24
+  Extract a splicing graph as a data frame
25
+}
26
+
27
+\description{
28
+  Extract the splicing graph for a given gene from a \link{SplicingGraphs}
29
+  object and return it as a \link[IRanges]{DataFrame}.
30
+}
31
+
32
+\usage{
33
+sgdf(x, gene_id=NA, UATXHcount=NULL, inbytx=NULL, keep.dup.edges=FALSE)
34
+sgdf2(x, gene_id=NA)
35
+
36
+## Related utilities:
37
+
38
+spath(x, gene_id=NA)
39
+UATXHcount(x, gene_id=NA)
40
+uninformativeSSids(x, gene_id=NA)
41
+}
42
+
43
+\arguments{
44
+  \item{x}{
45
+    TODO
46
+  }
47
+  \item{gene_id}{
48
+    TODO
49
+  }
50
+  \item{UATXHcount}{
51
+    TODO
52
+  }
53
+  \item{inbytx}{
54
+    TODO
55
+  }
56
+  \item{keep.dup.edges}{
57
+    TODO
58
+  }
59
+}
60
+
61
+\details{
62
+  TODO
63
+}
64
+
65
+\value{
66
+  TODO
67
+}
68
+
69
+\author{
70
+  H. Pages
71
+}
72
+
73
+\seealso{
74
+  \itemize{
75
+    \item The \link{SplicingGraphs} class.
76
+
77
+    \item \code{\link{sgraph}} for extracting a splicing graph as a
78
+          plottable graph-like object.
79
+  }
80
+}
81
+
82
+\examples{
83
+example(SplicingGraphs)  # create SplicingGraphs object 'sg'
84
+sg
85
+
86
+## 'sg' has 1 element per transcript, and each transcript is
87
+## assigned a name that is the id of the gene it belongs to. All the
88
+## transcripts belonging to the same gene are guaranteed to be
89
+## consecutive elements in 'sg'.
90
+names(sg)
91
+
92
+sgdf(sg, gene_id="geneA")
93
+}
0 94
new file mode 100644
... ...
@@ -0,0 +1,96 @@
1
+\name{sgraph-methods}
2
+
3
+\alias{sgraph-methods}
4
+
5
+\alias{sgraph}
6
+\alias{sgraph,ANY-method}
7
+\alias{sgraph,data.frame-method}
8
+\alias{sgraph,DataFrame-method}
9
+\alias{sgraph,igraph-method}
10
+
11
+\alias{sgraph2}
12
+
13
+\alias{plot,SplicingGraphs,ANY-method}
14
+
15
+
16
+\title{
17
+  Extract a splicing graph as a plottable graph-like object
18
+}
19
+
20
+\description{
21
+  Extract the splicing graph for a given gene from a \link{SplicingGraphs}
22
+  object and return it as a plottable graph-like object.
23
+}
24
+
25
+\usage{
26
+sgraph(x, gene_id=NA, keep.dup.edges=FALSE, as.igraph=FALSE)
27
+sgraph2(x, gene_id=NA, as.igraph=FALSE)
28
+
29
+## PLotting:
30
+
31
+\S4method{plot}{SplicingGraphs,ANY}(x, y, gene_id=NA)
32
+}
33
+
34
+\arguments{
35
+  \item{x}{
36
+    TODO
37
+  }
38
+  \item{gene_id}{
39
+    TODO
40
+  }
41
+  \item{keep.dup.edges}{
42
+    TODO
43
+  }
44
+  \item{as.igraph}{
45
+    TODO
46
+  }
47
+  \item{y}{
48
+    TODO
49
+  }
50
+}
51
+
52
+\details{
53
+  TODO
54
+}
55
+
56
+\value{
57
+  TODO
58
+}
59
+
60
+\author{
61
+  H. Pages
62
+}
63
+
64
+\seealso{
65
+  \itemize{
66
+    \item The \link{SplicingGraphs} class.
67
+
68
+    \item \code{\link{sgdf}} for extracting a splicing graph as a
69
+          data frame.
70
+  }
71
+}
72
+
73
+\examples{
74
+example(SplicingGraphs)  # create SplicingGraphs object 'sg'
75
+sg
76
+
77
+## 'sg' has 1 element per transcript, and each transcript is
78
+## assigned a name that is the id of the gene it belongs to. All the
79
+## transcripts belonging to the same gene are guaranteed to be
80
+## consecutive elements in 'sg'.
81
+names(sg)
82
+
83
+sgA <- sgraph(sg, gene_id="geneA")
84
+
85
+if (interactive()) {
86
+  ## Edges are labeled with the transcript ids (or names), in blue.
87
+  ## The green arrows are edges corresponding to exons.
88
+  plot(sgA)
89
+
90
+  ## Note that plot() works directly on the SplicingGraphs object 'sg'.
91
+  plot(sg, "geneB")
92
+  plot(sg, "geneC")
93
+  plot(sg, "geneD")
94
+  plot(sg, "geneE")
95
+}
96
+}