Bioconductor Code: SplicingGraphs

Browse code

Change SplicingGraphs class definition and API so SplicingGraphs objects are now list-like objects with 1 gene per list element.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SplicingGraphs@74180 bc3139a8-67e5-0310-9ffc-ced21a209358

Herve Pages authored on 11/03/2013 21:18:32
Showing 13 changed files

DESCRIPTION index 219d4fe..67c9a1f 100644
NAMESPACE index e093fa5..85187ab 100644
R/SplicingGraphs-class.R index 5f8cb69..61a936b 100644
R/countReads.R index 22e63b9..a71e9cb 100644
R/sgedges-methods.R index cc5f597..69eab56 100644
R/sgraph-methods.R index 193dde6..a1e1b1e 100644
inst/scripts/TSPC-utils.R index ec28210..dced7e8 100644
inst/scripts/create-vignette-plots.R index c4b3af0..d8e1ffe 100644
man/SplicingGraphs-class.Rd index 634b003..7bd39b8 100644
man/bubbles-methods.Rd index acee469..c38e42e 100644
man/countReads.Rd index 2d57043..00a1cc1 100644
man/sgedges-methods.Rd index 8734959..5a9a79e 100644
man/sgraph-methods.Rd index e951066..40c38b0 100644

DESCRIPTION

History View file @ 7774286

@@ -1,6 +1,6 @@
                      Package: SplicingGraphs
                      Title: Tools for creating splicing graphs from annotations and RNA-Seq data
                     -Version: 0.5.0
                     +Version: 0.5.1
                      Author: D. Bindreither, M. Carlson, M. Morgan, H. Pages
                      License: Artistic-2.0
                      Description: This package provides tools for creating splicing graphs based on

NAMESPACE

History View file @ 7774286

@@ -25,12 +25,7 @@ exportClasses(SplicingGraphs)
                      ###
                      exportMethods(
                     -    length,
                     -    names,
                     -    elementLengths,
                     -    plot,
                     -    findOverlaps,
                     -    encodeOverlaps
                     +    plot
+                     )

R/SplicingGraphs-class.R

History View file @ 7774286

@@ -3,26 +3,49 @@
                      ### -------------------------------------------------------------------------
                     -### We deliberately choose to not extend GRangesList to make SplicingGraphs
                     -### objects read-only and with a very restricted API (opaque objects).
                      setClass("SplicingGraphs",
                     +    contains="CompressedList",
                          representation(
                     -        tx="GRangesList"
                     +        unlistData="GRangesList",
                     +        elementMetadata="DataFrame"
                     +    ),
                     +    prototype(
                     +        elementType="GRangesList"
+                         )
+                     )
                      ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                     -### Basic accessors.
                     -###
                     -### We support only a very small subset of getters from the GRangesList API.
                     +### Validity.
                      ###
                     -setMethod("length", "SplicingGraphs", function(x) length(x@tx))
                     +.valid.SplicingGraphs.names <- function(x)
                     +{
                     +    x_names <- names(x)
                     +    if (is.null(x_names)) {
                     +        if (length(x) == 1L)
                     +            return(NULL)
                     +        return("'x' must have names")
                     +    }
                     +    if (anyDuplicated(x_names))
                     +        return("'x' has duplicated names")
                     +    NULL
                     +}
+                    +
                     +.valid.SplicingGraphs.unlistData <- function(x)
                     +{
                     +    x_unlistData <- x@unlistData
                     +    if (!is.null(x_unlistData))
                     +        return("'x@unlistData' must be unnamed")
                     +    NULL
                     +}
                     -setMethod("names", "SplicingGraphs", function(x) names(x@tx))
                     +.valid.SplicingGraphs <- function(x)
                     +{
                     +    c(.valid.SplicingGraphs.names(x), .valid.SplicingGraphs.unlistData(x))
                     +}
                     -setMethod("elementLengths", "SplicingGraphs", function(x) elementLengths(x@tx))
                     +setValidity2("SplicingGraphs", .valid.SplicingGraphs)
                      ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
@@ -32,14 +55,9 @@ setMethod("elementLengths", "SplicingGraphs", function(x) elementLengths(x@tx))
                      setMethod("show", "SplicingGraphs",
                          function(object)
+                         {
                     -        ntx <- length(object)
                     -        object_names <- names(object)
                     -        if (is.null(object_names)) {
                     -            ngenes <- ifelse(ntx == 0L, 0L, 1L)
                     -        } else {
                     -            ngenes <- length(unique(object_names))
                     -        }
                     -        cat(class(object), " object with ", ngenes, " gene(s) ",
                     +        ngene <- length(object)
                     +        ntx <- length(unlist(object, use.names=FALSE))
                     +        cat(class(object), " object with ", ngene, " gene(s) ",
                                  "and ", ntx, " transcript(s)\n", sep="")
+                         }
+                     )
@@ -270,9 +288,9 @@ setMethod("show", "SplicingGraphs",
                      ### TODO: Improve handling of invalid genes i.e. provide more details about
                      ### which genes were considered invalid and why.
                     -.make_SplicingGraphs_from_GRangesList <- function(x, grouping=NULL,
                     -                                                  min.ntx=2, max.ntx=NA,
                     -                                                  check.introns=TRUE)
                     +.make_unlisted_SplicingGraphs_from_GRangesList <- function(x, grouping=NULL,
                     +                                                      min.ntx=2, max.ntx=NA,
                     +                                                      check.introns=TRUE)
+                     {
                          if (!is(x, "GRangesList"))
                              stop("'x' must be a GRangesList object")
@@ -349,10 +367,20 @@ setGeneric("SplicingGraphs", signature="x",
                      setMethod("SplicingGraphs", "GRangesList",
                          function(x, grouping=NULL, min.ntx=2, max.ntx=NA, check.introns=TRUE)
+                         {
                     -        ans_tx <- .make_SplicingGraphs_from_GRangesList(x,
                     -                      grouping=grouping, min.ntx=min.ntx, max.ntx=max.ntx,
                     -                      check.introns=check.introns)
                     -        new("SplicingGraphs", tx=ans_tx)
                     +        ans_unlistData <- .make_unlisted_SplicingGraphs_from_GRangesList(x,
                     +                            grouping=grouping, min.ntx=min.ntx, max.ntx=max.ntx,
                     +                            check.introns=check.introns)
                     +        ans_unlistData_names <- names(ans_unlistData)
                     +        if (is.null(ans_unlistData_names)) {
                     +            ans_partitioning <- PartitioningByEnd(length(ans_unlistData))
                     +        } else {
                     +            names(ans_unlistData) <- NULL
                     +            ans_end <- end(Rle(ans_unlistData_names))
                     +            ans_names <- ans_unlistData_names[ans_end]
                     +            ans_partitioning <- PartitioningByEnd(ans_end, names=ans_names)
                     +        }
                     +        IRanges:::newCompressedList0("SplicingGraphs",
                     +                                     ans_unlistData, ans_partitioning)
+                         }
+                     )

R/countReads.R

History View file @ 7774286

@@ -3,27 +3,6 @@
                      ### compatible hits per exon or per intron
                      ### -------------------------------------------------------------------------
                     -setMethod("findOverlaps", c("GRangesList", "SplicingGraphs"),
                     -    function(query, subject, maxgap=0L, minoverlap=1L,
                     -             type=c("any", "start", "end", "within", "equal"),
                     -             select=c("all", "first", "last", "arbitrary"),
                     -             ignore.strand=ignore.strand)
                     -    {
                     -        findOverlaps(query, subject@tx,
                     -                     maxgap=maxgap, minoverlap=minoverlap,
                     -                     type=match.arg(type), select=match.arg(select),
                     -                     ignore.strand=ignore.strand)
                     -    }
                     -)
+                    -
                     -setMethod("encodeOverlaps", c("GRangesList", "SplicingGraphs"),
                     -    function(query, subject, hits=NULL, flip.query.if.wrong.strand=FALSE)
                     -    {
                     -        encodeOverlaps(query, subject@tx,
                     -                       hits=hits,
                     -                       flip.query.if.wrong.strand=flip.query.if.wrong.strand)
                     -    }
                     -)
                      ### 'query': a named GRangesList object containing gapped reads.
                      ### 'subject': a GRangesList object containing some subfeature (e.g. exons

R/sgedges-methods.R

History View file @ 7774286

@@ -58,19 +58,17 @@ setMethod("txpaths", "SplicingGraphs",
                                  stop("'as.matrix' must be TRUE or FALSE")
                              if (length(x) == 0L)
                                  stop("'x' must be of length >= 1")
                     -        x_names <- names(x)
                     -        ans <- mcols(x@tx)[ , "txpaths"]
                     -        if (!is.null(x_names)) {
                     +        unlisted_x <- unlist(x)
                     +        unlisted_names <- names(unlisted_x)
                     +        ans <- mcols(unlisted_x)[ , "txpaths"]
                     +        if (!is.null(unlisted_names)) {
                                  if (is.na(gene_id))
                                      stop("'gene_id' must be supplied when 'x' has names")
                     -            ans <- ans[x_names == gene_id]
                     +            ans <- ans[unlisted_names == gene_id]
                                  if (length(ans) == 0L)
                                      stop("invalid 'gene_id'")
                              } else if (!is.na(gene_id)) {
                     -            stop("the 'gene_id' arg is not supported ",
                     -                 "when 'x' is unnamed (in which case all its elements ",
                     -                 "(i.e. transcripts) are considered to belong to the ",
                     -                 "same gene)")
                     +            stop("the 'gene_id' arg is not supported when 'x' is unnamed")
+                             }
                              if (as.matrix)
                                  ans <- make_matrix_from_txpaths(ans)
@@ -95,21 +93,19 @@ setMethod("UATXHcount", "SplicingGraphs",
                                  stop("'gene_id' must be a single string (or NA)")
                              if (length(x) == 0L)
                                  stop("'x' must be of length >= 1")
                     -        x_names <- names(x)
                     -        ans <- mcols(x@tx)[["UATXHcount"]]
                     -        if (!is.null(x_names)) {
                     +        unlisted_x <- unlist(x)
                     +        unlisted_names <- names(unlisted_x)
                     +        ans <- mcols(unlisted_x)[["UATXHcount"]]
                     +        if (!is.null(unlisted_names)) {
                                  if (is.na(gene_id))
                                      stop("'gene_id' must be supplied when 'x' has names")
                                  if (is.null(ans))
                                      return(ans)
                     -            ans <- ans[x_names == gene_id]
                     +            ans <- ans[unlisted_names == gene_id]
                                  if (length(ans) == 0L)
                                      stop("invalid 'gene_id'")
                              } else if (!is.na(gene_id)) {
                     -            stop("the 'gene_id' arg is not supported ",
                     -                 "when 'x' is unnamed (in which case all its elements ",
                     -                 "(i.e. transcripts) are considered to belong to the ",
                     -                 "same gene)")
                     +            stop("the 'gene_id' arg is not supported when 'x' is unnamed")
+                             }
                              ans
+                         }
@@ -277,17 +273,19 @@ setMethod("sgedges", "ANY",
                              if (!is(x, "SplicingGraphs"))
                                  stop("'x' must be a SplicingGraphs object ",
                                       "when 'in_by_tx' is a GRangesList object")
                     -        if (length(in_by_tx) != length(x))
                     -            stop("'in_by_tx' must have the same length as 'x'")
                     -        if (!identical(elementLengths(in_by_tx) + 1L, elementLengths(x)))
                     +        unlisted_x <- unlist(x)
                     +        if (length(in_by_tx) != length(unlisted_x))
                     +            stop("'in_by_tx' must have the same length as 'unlist(x)'")
                     +        if (!identical(elementLengths(in_by_tx) + 1L,
                     +                       elementLengths(unlisted_x)))
                                  stop("the shape of 'in_by_tx' is not compatible ",
                     -                 "with the shape of 'x'")
                     +                 "with the shape of 'unlist(x)'")
                              if (!identical(keep.dup.edges, FALSE))
                                  stop("'keep.dup.edges' must be FALSE when 'in_by_tx' is supplied")
                              sgedges0 <- sgedges(txpaths, UATXHcount=UATXHcount,
                                                           keep.dup.edges=TRUE)
                              ex_or_in <- sgedges0[ , "ex_or_in"]
                     -        ex_hits <- .hits(x@tx, gene_id=gene_id)
                     +        ex_hits <- .hits(unlisted_x, gene_id=gene_id)
                              if (is.null(ex_hits))
                                  stop("'x' must have a \"hits\" inner metadata column ",
                                       "when 'in_by_tx' is a GRangesList object. May be ",

R/sgraph-methods.R

History View file @ 7774286

@@ -248,9 +248,11 @@ slideshow <- function(x)
+                     {
                          if (!is(x, "SplicingGraphs"))
                              stop("'x' must be a SplicingGraphs object")
                     -    for (gene_id in unique(names(x))) {
                     -        ntx <- sum(names(x) == gene_id)
                     -        cat("Plotting gene ", gene_id, " (", ntx, " transcripts). ", sep="")
                     +    x_eltlen <- elementLengths(x)
                     +    for (gene_id in names(x)) {
                     +        ntx <- x_eltlen[[gene_id]]
                     +        cat("Plotting splicing graph for gene \"", gene_id, "\" ",
                     +            "(", ntx, " transcript(s)). ", sep="")
                              plot(x, gene_id)
                              cat("Press <Enter> for next...")
                              readLines(n=1)

inst/scripts/TSPC-utils.R

History View file @ 7774286

@@ -29,13 +29,15 @@ loadModels <- function(models_path, check.transcripts=TRUE)
                      ### I guess not...
                      makeSgedgesWithHits <- function(grl, sg)
+                     {
                     -    ov0 <- findOverlaps(grl, sg@tx, ignore.strand=TRUE)
                     -    ovenc0 <- encodeOverlaps(grl, sg@tx, hits=ov0,
                     +    unlisted_sg <- unlist(sg)
                     +    ov0 <- findOverlaps(grl, unlisted_sg, ignore.strand=TRUE)
                     +    ovenc0 <- encodeOverlaps(grl, unlisted_sg, hits=ov0,
                                                   flip.query.if.wrong.strand=TRUE)
                          ov0_is_comp <- isCompatibleWithSplicing(ovenc0)
                          ov1 <- ov0[ov0_is_comp]
                     -    sg@tx <- assignSubfeatureHits(grl, sg@tx, ov1, ignore.strand=TRUE)
                     -    in_by_tx <- psetdiff(range(sg@tx), sg@tx)
                     +    sg@unlistData <- unname(assignSubfeatureHits(grl, unlisted_sg, ov1,
                     +                                                 ignore.strand=TRUE))
                     +    in_by_tx <- psetdiff(range(unlisted_sg), unlisted_sg)
                          in_by_tx <- assignSubfeatureHits(grl, in_by_tx, ov1, ignore.strand=TRUE)
                          sgedges(sg, in_by_tx=in_by_tx)
+                     }

inst/scripts/create-vignette-plots.R

History View file @ 7774286

@@ -51,7 +51,7 @@ library(Gviz)
                      # Plotting to the PDF device produces an incomplete plot (ax_track is missing)!
                      # Looks like a bug in Gviz.
                      ax_track <- GenomeAxisTrack()
                     -grl <- sg@tx[names(sg@tx) == "117286"]
                     +grl <- sg[["117286"]]
                      ### We create 1 track per transcript.
                      tx_tracks <- lapply(seq_along(grl),
                                          function(i) {
@@ -85,7 +85,7 @@ library(Gviz)
                      # Plotting to the PDF device produces an incomplete plot (ax_track is missing)!
                      # Looks like a bug in Gviz.
                      ax_track <- GenomeAxisTrack()
                     -grl <- sg@tx[names(sg@tx) == "126017"]
                     +grl <- sg[["126017"]]
                      ### We create 1 track per transcript.
                      tx_tracks <- lapply(seq_along(grl),
                                          function(i) {

man/SplicingGraphs-class.Rd

History View file @ 7774286

@@ -7,9 +7,6 @@
                      \alias{SplicingGraphs,GRangesList-method}
                      \alias{SplicingGraphs,TranscriptDb-method}
                     -\alias{length,SplicingGraphs-method}
                     -\alias{names,SplicingGraphs-method}
                     -\alias{elementLengths,SplicingGraphs-method}
                      \alias{show,SplicingGraphs-method}
@@ -24,23 +21,13 @@
                      \usage{
                      SplicingGraphs(x, grouping=NULL, min.ntx=2, max.ntx=NA, check.introns=TRUE)
+                    -
                     -## Basic accessors:
+                    -
                     -\S4method{length}{SplicingGraphs}(x)
                     -\S4method{names}{SplicingGraphs}(x)
                     -\S4method{elementLengths}{SplicingGraphs}(x)
+                     }
                      \arguments{
                        \item{x}{
                     -    For \code{SplicingGraphs}: A \link[GenomicRanges]{GRangesList} object
                     -    containing the exons of one or more genes grouped by transcripts.
                     -    Alternatively, \code{x} can be a \link[GenomicFeatures]{TranscriptDb}
                     -    object. See Details section below.
+                    -
                     -    For the \code{length}, \code{names}, and \code{elementLengths} methods
                     -    for SplicingGraphs objects: A SplicingGraphs object.
                     +    A \link[GenomicRanges]{GRangesList} object containing the exons of one
                     +    or more genes grouped by transcripts. Alternatively, \code{x} can be a
                     +    \link[GenomicFeatures]{TranscriptDb} object. See Details section below.
+                       }
                        \item{grouping}{
                          An optional object that represents the grouping by gene of the top-level
@@ -183,12 +170,17 @@ sg2 <- SplicingGraphs(toy_genes_txdb)
                      stopifnot(identical(sg2, sg))
                      sg
                     -## 'sg' has 1 element per transcript, and each transcript is
                     -## assigned a name that is the id of the gene it belongs to. All the
                     -## transcripts belonging to the same gene are guaranteed to be
                     -## consecutive elements in 'sg'.
                     +## 'sg' has 1 element per gene. 'names(sg)' are the gene ids.
                      names(sg)
                     +## The transcripts of a given gene can be extracted as an *unnamed*
                     +## GRangesList object with [[:
                     +sg[["geneB"]]
+                    +
                     +## The transcripts of all the genes can be extracted as a *named*
                     +## GRangesList object with unlist():
                     +unlist(sg)
+                    +
                      ## ---------------------------------------------------------------------
                      ## 3. Extract information from the SplicingGraphs object
                      ## ---------------------------------------------------------------------

man/bubbles-methods.Rd

History View file @ 7774286

@@ -57,10 +57,7 @@ bubbles(x, gene_id=NA)
                      example(SplicingGraphs)  # create SplicingGraphs object 'sg'
                      sg
                     -## 'sg' has 1 element per transcript, and each transcript is
                     -## assigned a name that is the id of the gene it belongs to. All the
                     -## transcripts belonging to the same gene are guaranteed to be
                     -## consecutive elements in 'sg'.
                     +## 'sg' has 1 element per gene. 'names(sg)' are the gene ids.
                      names(sg)
                      plot(sgraph(sg, gene_id="geneA", tx_id.as.edge.label=TRUE))

man/countReads.Rd

History View file @ 7774286

@@ -69,7 +69,7 @@ gal
                      ## Find the overlaps between the reads and the transcripts:
                      grl <- grglist(gal, order.as.in.query=TRUE)
                     -ov0 <- findOverlaps(grl, sg, ignore.strand=TRUE)
                     +ov0 <- findOverlaps(grl, unlist(sg), ignore.strand=TRUE)
                      ## Nb of hits:
                      head(countQueryHits(ov0))  # per read
@@ -77,7 +77,7 @@ head(countSubjectHits(ov0))  # per transcript
                      ## Keep only overlaps that are "compatible" with the splicing of the
                      ## transcripts:
                     -ovenc0 <- encodeOverlaps(grl, sg, hits=ov0,
                     +ovenc0 <- encodeOverlaps(grl, unlist(sg), hits=ov0,
                                               flip.query.if.wrong.strand=TRUE)
                      ov0_is_comp <- isCompatibleWithSplicing(ovenc0)
                      ov1 <- ov0[ov0_is_comp]
@@ -88,7 +88,7 @@ ov1 <- ov0[ov0_is_comp]
                      ov2 <- ov1[queryHits(ov1) \%in\% which(countQueryHits(ov1) == 1L)]
                      UATXHcount <- countSubjectHits(ov2)
                     -mcols(sg@tx)$UATXHcount <- UATXHcount
                     +mcols(sg@unlistData)$UATXHcount <- UATXHcount
                      names(sg)  # valid gene ids
                      plot(sg, "geneA")
@@ -100,10 +100,12 @@ plot(sg, "geneA")
                      ## ---------------------------------------------------------------------
                      ## Assign compatible hits for each exon:
                     -sg@tx <- assignSubfeatureHits(grl, sg@tx, ov1, ignore.strand=TRUE)
                     +unlisted_sg <- unlist(sg)
                     +sg@unlistData <- unname(assignSubfeatureHits(grl, unlisted_sg, ov1,
                     +                                             ignore.strand=TRUE))
                      ## Assign compatible hits for each intron:
                     -in_by_tx <- psetdiff(range(sg@tx), sg@tx)
                     +in_by_tx <- psetdiff(range(unlisted_sg), unlisted_sg)
                      in_by_tx <- assignSubfeatureHits(grl, in_by_tx, ov1, ignore.strand=TRUE)
                      sgedges(sg, gene_id="geneA", in_by_tx=in_by_tx)

man/sgedges-methods.Rd

History View file @ 7774286

@@ -107,10 +107,7 @@ uninformativeSSids(x, gene_id=NA)
                      example(SplicingGraphs)  # create SplicingGraphs object 'sg'
                      sg
                     -## 'sg' has 1 element per transcript, and each transcript is
                     -## assigned a name that is the id of the gene it belongs to. All the
                     -## transcripts belonging to the same gene are guaranteed to be
                     -## consecutive elements in 'sg'.
                     +## 'sg' has 1 element per gene. 'names(sg)' are the gene ids.
                      names(sg)
                      sgedges(sg, gene_id="geneD")
@@ -119,5 +116,5 @@ outdeg(sg, gene_id="geneD")
                      indeg(sg, gene_id="geneD")
                      txpaths(sg, gene_id="geneD")
                     -txpaths(sg, gene_id="geneD", as.matrix=TRUE)
                     +txpaths(sg, gene_id="geneD", as.matrix=TRUE)  # splicing matrix
+                     }

man/sgraph-methods.Rd

History View file @ 7774286

@@ -83,10 +83,7 @@ slideshow(x)
                      example(SplicingGraphs)  # create SplicingGraphs object 'sg'
                      sg
                     -## 'sg' has 1 element per transcript, and each transcript is
                     -## assigned a name that is the id of the gene it belongs to. All the
                     -## transcripts belonging to the same gene are guaranteed to be
                     -## consecutive elements in 'sg'.
                     +## 'sg' has 1 element per gene. 'names(sg)' are the gene ids.
                      names(sg)
                      sgA <- sgraph(sg, gene_id="geneA", tx_id.as.edge.label=TRUE)