Bioconductor Code: SplicingGraphs

Browse code

- Change the order of the edge-level metadata cols in the GRangesList object returned by sgedgesByTranscript() and sgedgesByGene(): the "sgedge_id" col (global splicing graph edge id) now is in 3rd position (instead of 1st), after the "from" and "to" cols.

- By default, the output of sgedges() now also includes the "sgedge_id" col,
so it has the same cols as the edge-level metadata cols returned by default
by sgedgesByTranscript() and sgedgesByGene().

- Change the order of the edge-level metadata cols in the GRangesList object
returned by rsgedgesByGene(): the "rsgedge_id" col (global reduced splicing
graph edge id) now is also in 3rd position (instead of 1st), after the
"from" and "to" cols.

- By default, the output of rsgedges() now also includes the "rsgedge_id" col,
so it has the same cols as the edge-level metadata cols returned by default
by rsgedgesByGene().

- The "tx_id" col in the output of all the above functions now is a
CharacterList instead of a broken IntegerList that was holding a factor in
its unlistData slot.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SplicingGraphs@75803 bc3139a8-67e5-0310-9ffc-ced21a209358

Herve Pages authored on 19/04/2013 10:13:26
Showing 8 changed files

R/rsgedgesByGene-methods.R index 6e7a757..814e99e 100644
R/sgedges-methods.R index ebe42b7..07f9ff3 100644
R/sgedgesByGene-methods.R index 372719f..7c456ac 100644
R/utils.R index 8dfc8b5..0d7e1c8 100644
inst/doc/SplicingGraphs.Rnw index a6d400e..0a7d6a5 100644
man/countReads.Rd index 18027bd..2f13701 100644
man/rsgedgesByGene-methods.Rd index bf7a9d6..e2fa3e5 100644
man/sgedges-methods.Rd index b6fd07c..8d67ca1 100644

R/rsgedgesByGene-methods.R

History View file @ e3f9cd9

@@ -6,8 +6,8 @@
                      ### *reduced* splicing graphs.
                      ###
                     -### Return the uninformative fully qualified nodes.
                     -.uninformative_fqnodes <- function(sg)
                     +### Return the fully qualified uninformative nodes.
                     +.get_fq_uninfnodes <- function(sg)
+                     {
                          txpath <- txpath(unlist(sg))
                          skeleton2 <- PartitioningByEnd(end(PartitioningByEnd(txpath)) * 2L)
@@ -25,65 +25,84 @@
                          gene_id <- gene_id[c(TRUE, FALSE)]
                          from <- tmp[c(TRUE, FALSE)]
                          to <- tmp[c(FALSE, TRUE)]
                     -    sgedge_id <- paste0(gene_id, ":", from, ",", to)
                     +    sgedge_id <- make_global_sgedge_id(gene_id, from, to)
                          keep_idx <- which(!duplicated(sgedge_id))
                     -    fqfrom <- paste0(gene_id, ":", from)[keep_idx]  # fully qualified ids
                     -    fqto <- paste0(gene_id, ":", to)[keep_idx]  # fully qualified ids
                     -    uninformative_sgnodes(fqfrom, fqto)
                     +    fq_from <- paste(gene_id, from, sep=":")[keep_idx]  # fully qualified ids
                     +    fq_to <- paste(gene_id, to, sep=":")[keep_idx]  # fully qualified ids
                     +    uninformative_sgnodes(fq_from, fq_to)
+                     }
                     -.pmerge <- function(x, y)
                     +### 'f': factor. The "reverse factor" of 'f' is the named list of integer
                     +### vectors that maps each level of 'f' to the positions in 'f' where that
                     +### level is used. It can quickly be computed with:
                     +###
                     +###     revfactor <- split(seq_along(f), f)
                     +###
                     +### 'f' can be rebuilt from 'revfactor' with:
                     +###
                     +###     f2 <- .make_factor_from_revfactor(revfactor, length(f))
                     +###     stopifnot(identical(f2, f)
                     +###
                     +.make_factor_from_revfactor <- function(revfactor, f_len)
+                     {
                     -    x_partitioning <- PartitioningByEnd(x)
                     -    y_partitioning <- PartitioningByEnd(y)
                     -    stopifnot(identical(x_partitioning, y_partitioning))
                     -    starts <- start(x_partitioning)
                     -    ends <- end(x_partitioning)
                     -    unlisted_x <- unlist(x, use.names=FALSE)
                     -    unlisted_y <- unlist(y, use.names=FALSE)
                     -    stopifnot(identical(unlisted_x[-starts], unlisted_y[-ends]))
                     -    y <- as(unlisted_y[ends], "List")
                     -    xy <- c(x, y)
                     -    f <- rep.int(seq_along(x), 2L)
                     -    ans <- unlistAndSplit(xy, f)
                     -    names(ans) <- names(x_partitioning)
                     -    ans
                     +    f_levels <- names(revfactor)
                     +    idx <- integer(f_len)
                     +    idx[] <- NA_integer_
                     +    idx[unlist(revfactor, use.names=FALSE)] <- rep.int(seq_along(revfactor),
                     +                                                   elementLengths(revfactor))
                     +    factor(f_levels[idx], levels=f_levels)
                     +}
+                    +
                     +### 'from' and 'to' must have the same length N (nb of unique edges in the
                     +### SplicingGraphs object before reduction).
                     +.make_revfactor_from_uninfnodes <- function(uninfnodes, from, to)
                     +{
                     +    from_idx <- match(uninfnodes, from)
                     +    to_idx <- match(uninfnodes, to)
                     +    keep_idx <- which(!(is.na(from_idx) | is.na(to_idx)))
                     +    from_idx <- from_idx[keep_idx]
                     +    to_idx <- to_idx[keep_idx]
                     +    stopifnot(all(from_idx == to_idx + 1L))  # sanity check
                     +    if (length(from_idx) == 0L) {
                     +        group1 <- integer(0)
                     +    } else {
                     +        group1 <- cumsum(c(TRUE, diff(from_idx) != 1L))
                     +    }
                     +    split_to_idx <- unname(splitAsList(to_idx, group1))
                     +    split_from_idx <- unname(splitAsList(from_idx, group1))
                     +    fancy_punion(split_to_idx, split_from_idx)
+                     }
                     -### 'gene_id', 'from', and 'to', must have the same length N (nb of unique
                     -### edges in the SplicingGraphs object befor reduction).
                     +### 'revfactor' must be a "reverse factor" as returned by
                     +### .make_revfactor_from_uninfnodes().
                      ### Returns a character vector of length N containing the global rsgedge id
                      ### (global reduced splicing graph edge id) corresponding to each input edge.
                     -.build_sgedge2rsgedge_map <- function(gene_id, from, to, ui_fqnodes)
                     +.build_sgedge2rsgedge_map_from_revfactor <- function(revfactor,
                     +                                                     gene_id, from, to)
+                     {
                     -    ans <- paste0(gene_id, ":", from, ",", to)
+                    -
                     -    fqfrom <- paste0(gene_id, ":", from)  # fully qualified ids
                     -    fqto <- paste0(gene_id, ":", to)  # fully qualified ids
                     -    idx1 <- match(ui_fqnodes, fqfrom)
                     -    idx2 <- match(ui_fqnodes, fqto)
                     -    keep_idx <- which(!(is.na(idx1) | is.na(idx2)))
                     -    idx1 <- idx1[keep_idx]
                     -    idx2 <- idx2[keep_idx]
                     -    stopifnot(all(idx1 == idx2 + 1L))  # sanity check
                     -    group1 <- cumsum(c(TRUE, diff(idx1) != 1L))
                     -    split_idx2 <- unname(splitAsList(idx2, group1))
                     -    split_idx1 <- unname(splitAsList(idx1, group1))
                     -    idx <- .pmerge(split_idx2, split_idx1)
                     -    alter_idx <- idx@unlistData
+                    -
                     -    from_list <- relist(from[alter_idx], idx)
                     -    to_list <- relist(to[alter_idx], idx)
                     -    nodes_list <- .pmerge(from_list, to_list)
                     +    ans <- make_global_sgedge_id(gene_id, from, to)
+                    +
                     +    unlisted_revfactor <- unlist(revfactor, use.names=FALSE)
                     +    from_list <- relist(from[unlisted_revfactor], revfactor)
                     +    to_list <- relist(to[unlisted_revfactor], revfactor)
                     +    nodes_list <- fancy_punion(from_list, to_list)
                          rsgedge_id <- sapply(nodes_list, paste0, collapse=",")
                     -    rsgedge_id <- rep.int(rsgedge_id, elementLengths(idx))
                     -    rsgedge_id <- paste(gene_id[alter_idx], rsgedge_id, sep=":")
                     +    rsgedge_id <- rep.int(rsgedge_id, elementLengths(revfactor))
                     +    rsgedge_id <- paste(gene_id[unlisted_revfactor], rsgedge_id, sep=":")
                     -    ans[alter_idx] <- rsgedge_id
                     +    ans[unlisted_revfactor] <- rsgedge_id
                          ans
+                     }
                     +.build_sgedge2rsgedge_map <- function(uninfnodes, gene_id, from, to)
                     +{
                     +    fq_from <- paste(gene_id, from, sep=":")  # fully qualified ids
                     +    fq_to <- paste(gene_id, to, sep=":")  # fully qualified ids
                     +    revfactor <- .make_revfactor_from_uninfnodes(uninfnodes, fq_from, fq_to)
                     +    .build_sgedge2rsgedge_map_from_revfactor(revfactor, gene_id, from, to)
                     +}
+                    +
                      ### 'edges' must be a GRanges object.
                      .reduce_edges <- function(edges, f)
+                     {
@@ -136,9 +155,9 @@
                          stopifnot(identical(edges_tx_id, edges_tx_id[sm]))  # sanity check
                          ans_tx_id <- edges_tx_id[keep_idx]
                     -    ans_mcols <- DataFrame(rsgedge_id=levels(f),
                     -                           from=ans_from,
                     +    ans_mcols <- DataFrame(from=ans_from,
                                                 to=ans_to,
                     +                           rsgedge_id=levels(f),
                                                 ex_or_in=ans_ex_or_in,
                                                 tx_id=ans_tx_id)
@@ -244,9 +263,9 @@ setMethod("rsgedgesByGene", "SplicingGraphs",
                              gene_id <- names(edges0)
                              from <- edges0_mcols[ , "from"]
                              to <- edges0_mcols[ , "to"]
                     -        ui_fqnodes <- .uninformative_fqnodes(x)
                     -        sgedge2rsgedge_map <- .build_sgedge2rsgedge_map(gene_id, from, to,
                     -                                                        ui_fqnodes)
                     +        uninfnodes <- .get_fq_uninfnodes(x)
                     +        sgedge2rsgedge_map <- .build_sgedge2rsgedge_map(uninfnodes,
                     +                                                        gene_id, from, to)
                              f <- factor(sgedge2rsgedge_map, levels=unique(sgedge2rsgedge_map))
                              ans_flesh <- .reduce_edges(edges0, f)
                              ans_flesh_names <- Rle(names(ans_flesh))
@@ -261,7 +280,7 @@ setMethod("rsgedgesByGene", "SplicingGraphs",
                      ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                      ### rsgedges() extractor
                      ###
                     -### Same as sgedges() except that uninformative nodes (i.e. SSids) are removed.
                     +### Same as sgedges() except that uninformative nodes are removed.
                      ###
                      ### 'sgedges' must be a DataFrame as returned by:
@@ -278,15 +297,47 @@ setMethod("rsgedgesByGene", "SplicingGraphs",
                                   "to rsgedges()?")
                          levels(ex_or_in) <- EX_OR_IN_LEVELS2
                          uninformative_SSids <- uninformativeSSids(sgedges)
                     -    if (length(uninformative_SSids) == 0L)
                     +    if (length(uninformative_SSids) == 0L) {
                     +        col_idx <- match("sgedge_id", colnames(sgedges))
                     +        colnames(sgedges)[col_idx] <- "rsgedge_id"
                     +        sgedges$ex_or_in <- ex_or_in
                              return(sgedges)
                     +    }
+                    +
                          from <- sgedges[ , "from"]
                          to <- sgedges[ , "to"]
                     +    from_idx <- match(uninformative_SSids, from)
                     +    to_idx <- match(uninformative_SSids, to)
                     +    if (!all(from_idx == to_idx + 1L))
                     +        stop("Malformed input.\n",
                     +             "  In the input data.frame (or DataFrame) representing the ",
                     +             "original splicing graph, each uninformative splicing site ",
                     +             "id must appear in 2 consecutive rows (first in the \"to\" ",
                     +             "column, then in the \"from\" column. Could it be that the ",
                     +             "rows were subsetted before the data.frame (or DataFrame) ",
                     +             "was passed to rsgedges()?")
+                    +
                     +    ## Reduce "from" and "to" cols.
                     +    ans_from <- from[-from_idx]
                     +    ans_to <- to[-to_idx]
+                    +
                     +    ## Reduce "sgedge_id" col.
                     +    sgedges_id <- sgedges[ , "sgedge_id"]
                     +    tmp <- unlist(strsplit(sgedges_id, ":", fixed=TRUE), use.names=FALSE)
                     +    gene_id <- tmp[c(TRUE, FALSE)]
                     +    revfactor <- .make_revfactor_from_uninfnodes(uninformative_SSids, from, to)
                     +    sgedge2rsgedge_map <- .build_sgedge2rsgedge_map_from_revfactor(revfactor,
                     +                                                             gene_id, from, to)
                     +    f <- factor(sgedge2rsgedge_map, levels=unique(sgedge2rsgedge_map))
                     +    ans_rsgedge_id <- levels(f)
+                    +
                     +    ## Reduce "ex_or_in" col.
                     +    ex_or_in[from_idx] <- EX_OR_IN_LEVELS2[4L]
                     +    ans_ex_or_in <- ex_or_in[-to_idx]
+                    +
                     +    ## Reduce "tx_id" col.
                          tx_id <- sgedges[ , "tx_id"]
                     -    idx1 <- match(uninformative_SSids, from)
                     -    idx2 <- match(uninformative_SSids, to)
                     -    ## 2 sanity checks.
                     -    if (!identical(unname(tx_id[idx1]), unname(tx_id[idx2])))
                     +    if (!identical(unname(tx_id[from_idx]), unname(tx_id[to_idx])))
                              stop("Malformed input.\n",
                                   "  In the input data.frame (or DataFrame) representing the ",
                                   "original splicing graph, the 2 rows containing a given ",
@@ -294,20 +345,13 @@ setMethod("rsgedgesByGene", "SplicingGraphs",
                                   "Could it be that the \"tx_id\" column was manually altered ",
                                   "before the data.frame (or DataFrame) was passed to ",
                                   "rsgedges()?")
                     -    if (!all(idx1 == idx2 + 1L))
                     -        stop("Malformed input.\n",
                     -             "  In the input data.frame (or DataFrame) representing the ",
                     -             "original splicing graph, each uninformative splicing site ",
                     -             "id must appear in 2 consecutive rows (first in the \"to\" ",
                     -             "column, then in the \"from\" column. Could it be that the ",
                     -             "rows were subsetted before the data.frame (or DataFrame) ",
                     -             "was passed to rsgedges()?")
                     -    from <- from[-idx1]
                     -    to <- to[-idx2]
                     -    ex_or_in[idx1] <- EX_OR_IN_LEVELS2[4L]
                     -    ex_or_in <- ex_or_in[-idx2]
                     -    tx_id <- tx_id[-idx1]
                     -    DataFrame(from=from, to=to, ex_or_in=ex_or_in, tx_id=tx_id)
                     +    ans_tx_id <- tx_id[-from_idx]
+                    +
                     +    DataFrame(from=ans_from,
                     +              to=ans_to,
                     +              rsgedge_id=ans_rsgedge_id,
                     +              ex_or_in=ans_ex_or_in,
                     +              tx_id=ans_tx_id)
+                     }
                      rsgedges <- function(x)

R/sgedges-methods.R

History View file @ e3f9cd9

@@ -22,7 +22,7 @@
                      ### transcript) for a given gene. Should have been obtained thru the txpath()
                      ### accessor. Returns a 4-col (or 5-col if 'txweight' is supplied) data.frame
                      ### representing the splicing graph.
                     -.make_sgedges0_from_txpath <- function(txpath, txweight=NULL)
                     +.make_sgedges0_from_txpath <- function(txpath, gene_id, txweight=NULL)
+                     {
                          if (!is.null(txweight)) {
                              if (!is.numeric(txweight))
@@ -40,6 +40,8 @@
                                                           "an odd number of splicing site ids")
                                                  from <- c("R", txpath_i)
                                                  to <- c(txpath_i, "L")
                     +                            sgedge_id <- make_global_sgedge_id(gene_id,
                     +                                                               from, to)
                                                  nexons <- txpath_i_len %/% 2L
                                                  if (nexons == 0L) {
                                                      ex_or_in <- EX_OR_IN_LEVELS[3L]
@@ -55,6 +57,7 @@
                                                                     levels=EX_OR_IN_LEVELS)
                                                  data.frame(from=from,
                                                             to=to,
                     +                                       sgedge_id=sgedge_id,
                                                             ex_or_in=ex_or_in,
                                                             stringsAsFactors=FALSE)
                                              })
@@ -63,7 +66,7 @@
                          tx_id <- names(txpath)
                          if (is.null(tx_id))
                              tx_id <- seq_along(txpath)
                     -    tx_id <- rep.int(factor(tx_id, levels=tx_id), nedges_per_tx)
                     +    tx_id <- rep.int(tx_id, nedges_per_tx)
                          sgedges0$tx_id <- tx_id
                          if (!is.null(txweight))
                              sgedges0$txweight <- rep.int(txweight, nedges_per_tx)
@@ -72,17 +75,15 @@
                      ### Collapse the duplicated edges in 'sgedges0' into a DataFrame.
                      ### We use a DataFrame instead of a data.frame because we want to store
                     -### the tx_id col in a CompressedFactorList (even though this container
                     -### doesn't formally exist and a CompressedIntegerList is actually what's
                     -### being used).
                     +### the "tx_id" col in a CharacterList.
                      .make_sgedges_from_sgedges0 <- function(sgedges0, ex_hits=NULL, in_hits=NULL)
+                     {
                          from <- sgedges0[ , "from"]
                          to <- sgedges0[ , "to"]
                     +    sgedge_id <- sgedges0[ , "sgedge_id"]
                          ex_or_in <- sgedges0[ , "ex_or_in"]
                          tx_id <- sgedges0[ , "tx_id"]
                     -    edges <- paste(from, to, sep="~")
                     -    sm <- match(edges, edges)
                     +    sm <- match(sgedge_id, sgedge_id)
                          if (!all(ex_or_in == ex_or_in[sm]))
                              stop("invalid splicing graph")
                          is_not_dup <- sm == seq_along(sm)
@@ -178,14 +179,14 @@ setMethod("sgedges", "SplicingGraphs",
+                         {
                              if (!isTRUEorFALSE(keep.dup.edges))
                                  stop("'keep.dup.edges' must be TRUE or FALSE")
                     -        txpath <- txpath(x)
                     +        txpath <- txpath(x)  # fails if length(x) != 1
                     +        gene_id <- names(x)
                              if (is.null(txweight))
                                  txweight <- txweight(x)
                     +        sgedges0 <- .make_sgedges0_from_txpath(txpath, gene_id,
                     +                                               txweight=txweight)
                              if (keep.dup.edges)
                     -            return(sgedges(txpath, txweight=txweight,
                     -                                   keep.dup.edges=keep.dup.edges))
                     -        sgedges0 <- sgedges(txpath, txweight=txweight,
                     -                                    keep.dup.edges=TRUE)
                     +            return(sgedges0)
                              exon_hits <- .extract_sgedges_exon_hits(x)
                              intron_hits <- .extract_sgedges_intron_hits(x)
                              ## FIXME: Once .extract_sgedges_intron_hits() is fixed, merge the
@@ -198,28 +199,6 @@ setMethod("sgedges", "SplicingGraphs",
+                         }
+                     )
                     -setMethod("sgedges", "IntegerList",
                     -    function(x, txweight=NULL, keep.dup.edges=FALSE)
                     -    {
                     -        sgedges0 <- .make_sgedges0_from_txpath(x, txweight=txweight)
                     -        sgedges(sgedges0, keep.dup.edges=keep.dup.edges)
                     -    }
                     -)
+                    -
                     -setMethod("sgedges", "data.frame",
                     -    function(x, txweight=NULL, keep.dup.edges=FALSE)
                     -    {
                     -        if (!is.null(txweight))
                     -            stop("the 'txweight' arg is not supported ",
                     -                 "when 'x' is a data.frame")
                     -        if (!isTRUEorFALSE(keep.dup.edges))
                     -            stop("'keep.dup.edges' must be TRUE or FALSE")
                     -        if (keep.dup.edges)
                     -            return(x)  # no-op
                     -        .make_sgedges_from_sgedges0(x)
                     -    }
                     -)
+                    -
                      ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                      ### sgnodes() accessor
@@ -229,7 +208,7 @@ setGeneric("sgnodes", signature="x",
                          function(x) standardGeneric("sgnodes")
+                     )
                     -setMethod("sgnodes", "ANY",
                     +setMethod("sgnodes", "SplicingGraphs",
                          function(x)
+                         {
                              txpath <- txpath(x)
@@ -270,7 +249,8 @@ setMethod("outdeg", "DataFrame",
                          function(x)
+                         {
                              sgnodes <- sgnodes(x)
                     -        ans <- countMatches(sgnodes, x[ , "from"])
                     +        m <- match(x[ , "from"], sgnodes)
                     +        ans <- tabulate(m, nbins=length(sgnodes))
                              names(ans) <- sgnodes
                              ans
+                         }
@@ -292,7 +272,8 @@ setMethod("indeg", "DataFrame",
                          function(x)
+                         {
                              sgnodes <- sgnodes(x)
                     -        ans <- countMatches(sgnodes, x[ , "to"])
                     +        m <- match(x[ , "to"], sgnodes)
                     +        ans <- tabulate(m, nbins=length(sgnodes))
                              names(ans) <- sgnodes
                              ans
+                         }

R/sgedgesByGene-methods.R

History View file @ e3f9cd9

@@ -82,7 +82,6 @@ setMethod("sgedgesByTranscript", "SplicingGraphs",
                              in_prepend_mcols$ex_or_in <- ex_or_in
                              tx_id <- mcols(ex_by_tx)[ , "tx_id"]
                     -        tx_id <- factor(tx_id, levels=unique(tx_id))
                              ex_prepend_mcols$tx_id <- rep.int(tx_id, nex_by_tx)
                              in_prepend_mcols$tx_id <- rep.int(tx_id, nin_by_tx)
@@ -131,11 +130,14 @@ setMethod("sgedgesByTranscript", "SplicingGraphs",
                              stopifnot(identical(ans_unlistData_end[minus_introns_idx] + 1L,
                                                  ans_unlistData_start[minus_introns_idx - 1L]))
                     -        ## Add "sgedge_id" metadata col.
                     -        sgedge_id <- paste0(rep.int(gene_ids, width(ans_partitioning)), ":",
                     -                            from, ",", to)
                     -        ans_unlistData_mcols <- cbind(DataFrame(sgedge_id=sgedge_id),
                     -                                      ans_unlistData_mcols)
                     +        ## Insert "sgedge_id" metadata col after first 2 metadata cols
                     +        ## ("from" and "to").
                     +        sgedge_id <- make_global_sgedge_id(
                     +                         rep.int(gene_ids, width(ans_partitioning)),
                     +                         from, to)
                     +        ans_unlistData_mcols <- c(ans_unlistData_mcols[1:2],
                     +                                  DataFrame(sgedge_id=sgedge_id),
                     +                                  ans_unlistData_mcols[-(1:2)])
                              check_all_edge_mcolnames(colnames(ans_unlistData_mcols))
                              ## Drop unwanted columns.

R/utils.R

History View file @ e3f9cd9

@@ -98,13 +98,13 @@ commonStrand.GRangesList <- function(x)
                      EXON_MCOLS <- c("exon_id", "exon_name", "exon_rank", "start_SSid", "end_SSid")
                      ### All edge metadata columns.
                     -ALL_EDGE_MCOLS <- c("sgedge_id", "from", "to", "ex_or_in", "tx_id", EXON_MCOLS)
                     +ALL_EDGE_MCOLS <- c("from", "to", "sgedge_id", "ex_or_in", "tx_id", EXON_MCOLS)
                      ### Subset of 'ALL_EDGE_MCOLS' made of those columns that are considered
                      ### invariant i.e. the values in them associated with the same sgedge_id
                      ### (global edge id) should be the same. Note that we also include the
                      ### "sgedge_id" col itself.
                     -INVARIANT_EDGE_MCOLS <- c("sgedge_id", "from", "to", "ex_or_in",
                     +INVARIANT_EDGE_MCOLS <- c("from", "to", "sgedge_id", "ex_or_in",
                                                "start_SSid", "end_SSid")
                      EX_OR_IN_LEVELS2 <- c("ex", "in", "", "mixed")
@@ -155,6 +155,23 @@ get_index_of_invariant_edge_mcols <- function(colnames)
+                     }
                     +### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                     +### make_sgedge_id()
                     +###
                     +### Returns "global splicing graph edge id".
                     +###
+                    +
                     +make_global_sgedge_id <- function(gene_id, from, to)
                     +{
                     +    ans_len <- length(from)
                     +    stopifnot(length(to) == ans_len)
                     +    stopifnot(length(gene_id) == 1L || length(gene_id) == ans_len)
                     +    if (ans_len == 0L)
                     +        return(character(0))
                     +    paste0(gene_id, ":", from, ",", to)
                     +}
+                    +
+                    +
                      ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                      ### unlistAndSplit()
                      ###
@@ -184,6 +201,55 @@ unlistAndSplit <- function(x, f, drop=FALSE)
+                     }
                     +### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                     +### fancy_punion() -- fancy parallel union
                     +###
                     +### Expects 'x' and 'y' to be 2 list-like objects such that:
                     +###   (a) all the list elements in 'x' and 'y' are vector-like objects of the
                     +###       same class;
                     +###   (b) 'x' and 'y' have the same length and names;
                     +###   (c) 'x' and 'y' have the same shape i.e. for any valid 'i', 'x[[i]]'
                     +###       and 'y[[i]]' have the same length;
                     +###   (d) 'x' and 'y' have no zero-length list elements;
                     +###   (e) for any valid index 'i', 'y[[i]][-L_i]' is identical to 'x[[i]][-1]',
                     +###       where L_i is the length of 'y[[i]]' (and 'x[[i]]').
                     +### Performs an optimized 'mendoapply(union, x, y)'.
                     +###
                     +### Example:
                     +###
                     +###   > x <- IntegerList(c(12, 4, 9), 5, c(8, -2))
                     +###   > y <- IntegerList(c(4, 9, 8), 0, c(-2, 10))
                     +###   > fancy_punion(x, y)
                     +###   IntegerList of length 3
                     +###   [[1]] 12 4 9 8
                     +###   [[2]] 5 0
                     +###   [[3]] 8 -2 10
                     +###
                     +fancy_punion <- function(x, y)
                     +{
                     +    x_partitioning <- PartitioningByEnd(x)
                     +    y_partitioning <- PartitioningByEnd(y)
                     +    if (!identical(x_partitioning, y_partitioning))
                     +        stop("'x' and 'y' must have the same length, names, and shape")
                     +    starts <- start(x_partitioning)
                     +    ends <- end(x_partitioning)
                     +    if (any(ends - starts == -1L))
                     +        stop("'x' and 'y' have zero-length list elements")
                     +    x_flesh <- unlist(x, use.names=FALSE)
                     +    y_flesh <- unlist(y, use.names=FALSE)
                     +    if (!identical(x_flesh[-starts], y_flesh[-ends]))
                     +        stop("for any valid index 'i', 'y[[i]][-length(y[[i]])]' ",
                     +             "must be identical to 'x[[i]][-1]'")
                     +    ans_breakpoints <- ends + seq_along(ends)
                     +    ans_flesh <- c(x_flesh, y_flesh[ends])
                     +    ans_flesh[-ans_breakpoints] <- x_flesh
                     +    ans_flesh[ans_breakpoints] <- y_flesh[ends]
                     +    ans_skeleton <- PartitioningByEnd(ans_breakpoints,
                     +                                      names=names(x_partitioning))
                     +    relist(ans_flesh, ans_skeleton)
                     +}
+                    +
+                    +
                      ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                      ### uninformative_sgnodes()
                      ###

inst/doc/SplicingGraphs.Rnw

History View file @ e3f9cd9

@@ -543,6 +543,8 @@ The \Rclass{DataFrame} object returned by \Rfunction{sgedges} has the
                      following columns:
                      \begin{itemize}
                        \item \Rcode{from}, \Rcode{to}: The 2 nodes connected by the edge.
                     +  \item \Rcode{sgedge\_id}: A \textit{global edge id} of the form
                     +        \Rcode{gene\_id:from,to}.
                        \item \Rcode{ex\_or\_in}: The type of the edge, i.e., exon, intron, or
                              no type if it's an artificial edge.
                        \item \Rcode{tx\_id}: The ids of the transcripts that support the edge.
@@ -566,17 +568,11 @@ edges_by_gene[["107328"]]
+                     @
                      \end{small}
                     -The edge-level metadata columns are the following:
                     -\begin{itemize}
                     -  \item \Rcode{sgedge\_id}: A \textit{global edge id} of the form
                     -        \Rcode{gene\_id:from,to}.
                     -  \item \Rcode{from}, \Rcode{to}, \Rcode{ex\_or\_in}, \Rcode{tx\_id}:
                     -        See the columns of the \Rclass{DataFrame} object returned
                     -        by \Rfunction{sgedges}.
                     -\end{itemize}
+                    -
                     -The artificial edges (i.e., edges starting from the root
                     -node (\Rcode{R}) or ending at the leaf node (\Rcode{L})) are omitted!
                     +The edge-level metadata columns are the same as the columns of the
                     +\Rclass{DataFrame} object returned by \Rfunction{sgedges}.
                     +An important difference though is that the artificial edges (i.e., edges
                     +starting from the root node (\Rcode{R}) or ending at the leaf node
                     +(\Rcode{L})) are omitted!
                      Finally, to plot a given splicing graph:

man/countReads.Rd

History View file @ e3f9cd9

@@ -99,10 +99,19 @@ countReads(x, by=c("sgedge", "rsgedge"))
                        the reads assigned to it.
                        For \code{countReads}: a \link[IRanges]{DataFrame} object with one row
                     -  per unique splicing graph edge and one column of counts per sample.
                     -  Two additional columns (\code{"sgedge_id"} and \code{"ex_or_in"}) contain
                     -  the splicing graph edge ids and the type of edge (exon or intron),
                     -  respectively.
                     +  per unique splicing graph edge if \code{by="sgedge"}, or one row per
                     +  unique reduced splicing graph edge if \code{by="rsgedge"}.
                     +  There is one column of counts per sample, and the following two
                     +  additional columns:
                     +  \enumerate{
                     +    \item \code{"sgedge_id"} if \code{by="sgedge"} or \code{"rsgedge_id"}
                     +          if \code{by="rsgedge"}: contains the \emph{global splicing graph
                     +          edge ids} if \code{by="sgedge"}, or the \emph{global reduced
                     +          splicing graph edge ids} if \code{by="rsgedge"}.
                     +    \item \code{"ex_or_in"}: the type of edge. This can be exon or intron
                     +          if \code{by="sgedge"}, or exon, intron, or mixed if
                     +          \code{by="rsgedge"}.
                     +  }
+                     }
                      \author{

man/rsgedgesByGene-methods.Rd

History View file @ e3f9cd9

@@ -119,6 +119,8 @@ edges_by_gene[["geneA"]]
                      ## "geneA:2,4" (intron), and "geneA:4,5" (exon), during the graph
                      ## reduction.
                     +stopifnot(identical(edges_by_gene["geneB"], rsgedgesByGene(sg["geneB"])))
+                    +
                      ## ---------------------------------------------------------------------
                      ## 3. sgedgesByTranscript()
                      ## ---------------------------------------------------------------------

man/sgedges-methods.Rd

History View file @ e3f9cd9

@@ -4,11 +4,9 @@
                      \alias{sgedges}
                      \alias{sgedges,SplicingGraphs-method}
                     -\alias{sgedges,IntegerList-method}
                     -\alias{sgedges,data.frame-method}
                      \alias{sgnodes}
                     -\alias{sgnodes,ANY-method}
                     +\alias{sgnodes,SplicingGraphs-method}
                      \alias{sgnodes,IntegerList-method}
                      \alias{sgnodes,data.frame-method}
                      \alias{sgnodes,DataFrame-method}
@@ -94,7 +92,6 @@ check_way1_vs_way2 <- function(res1, res2)
+                     {
                          edges1 <- res1[res1$ex_or_in != "", ]  # remove artificial edges
                          edges2 <- mcols(unlist(res2, use.names=FALSE))
                     -    edges2 <- edges2[ , -1]  # remove "sgedge_id" col (global edge id)
                          stopifnot(identical(edges1, edges2))
+                     }