Bioconductor Code: SplicingGraphs

Browse code

Add getReads(). Similar to countReads() but returns right before the final counting step, that is, the returned DataFrame contains the reads instead of their counts.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SplicingGraphs@76127 bc3139a8-67e5-0310-9ffc-ced21a209358

Herve Pages authored on 29/04/2013 18:02:30
Showing 5 changed files

NAMESPACE index 7a7299a..771f70e 100644
R/countReads-methods.R index 7795b9a..b5d454b 100644
TODO index 63f97b3..e1338ba 100644
man/SplicingGraphs-package.Rd index 6831df3..099e2d6 100644
man/countReads-methods.Rd index aa1843c..bf784f1 100644

NAMESPACE

History View file @ 05410e1

@@ -102,6 +102,7 @@ export(
                          bubbles,
                          ## countReads-methods.R:
                     +    getReads,
                          countReads
+                     )
@@ -121,6 +122,7 @@ exportMethods(
                          rsgedgesByTranscript,
                          rsgedgesByGene,
                          bubbles,
                     +    getReads,
                          countReads
+                     )

R/countReads-methods.R

History View file @ 05410e1

@@ -1,35 +1,37 @@
                      ### =========================================================================
                     -### Summarizing the reads assigned to the edges of a SplicingGraphs object
                     +### Summarizing the reads assigned to a SplicingGraphs object
                      ### -------------------------------------------------------------------------
                     -.countReads_by_sgedge <- function(x)
                     +### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                     +### getReads()
                     +###
+                    +
                     +.getReads_by_sgedge <- function(x)
+                     {
                          edges_by_gene <- sgedgesByGene(x, with.hits.mcols=TRUE)
                          edge_data <- mcols(unlist(edges_by_gene, use.names=FALSE))
                          edge_data_colnames <- colnames(edge_data)
                          hits_mcol_idx <- grep("\\.hits$", edge_data_colnames)
                     -    ans <- endoapply(edge_data[hits_mcol_idx], elementLengths)
                     -    colnames(ans) <- sub("\\.hits$", "", colnames(ans))
                     +    hits_cols <- edge_data[hits_mcol_idx]
                          left_mcolnames <- c("sgedge_id", "ex_or_in")
                          left_cols <- edge_data[left_mcolnames]
                     -    cbind(left_cols, ans)
                     +    cbind(left_cols, hits_cols)
+                     }
                     -.countReads_by_rsgedge <- function(x)
                     +.getReads_by_rsgedge <- function(x)
+                     {
                          edges_by_gene <- rsgedgesByGene(x, with.hits.mcols=TRUE)
                          edge_data <- mcols(unlist(edges_by_gene, use.names=FALSE))
                          edge_data_colnames <- colnames(edge_data)
                          hits_mcol_idx <- grep("\\.hits$", edge_data_colnames)
                     -    ans <- endoapply(edge_data[hits_mcol_idx], elementLengths)
                     -    colnames(ans) <- sub("\\.hits$", "", colnames(ans))
                     +    hits_cols <- edge_data[hits_mcol_idx]
                          left_mcolnames <- c("rsgedge_id", "ex_or_in")
                          left_cols <- edge_data[left_mcolnames]
                     -    cbind(left_cols, ans)
                     +    cbind(left_cols, hits_cols)
+                     }
                     -.countReads_by_tx <- function(x)
                     +.getReads_by_tx <- function(x)
+                     {
                          ex_by_tx <- unlist(x)
                          tx_id <- mcols(ex_by_tx)[ , "tx_id"]
@@ -44,20 +46,18 @@
                          ## a function 'FUN' that modifies the nb of rows. Furthermore, the
                          ## returned object passes validation despite being broken! Fix it
                          ## in IRanges.
                     -    ans <- endoapply(edge_data[hits_mcol_idx],
                     -                     function(hits)
                     -                       elementLengths(unique(regroup(hits,
                     -                                                     edge_data_breakpoints))))
                     +    hits_cols <- endoapply(edge_data[hits_mcol_idx],
                     +                           function(hits)
                     +                             unique(regroup(hits, edge_data_breakpoints)))
                          ## Fix the broken DataFrame returned by endoapply().
                     -    ans@nrows <- length(tx_id)
                     -    ans@rownames <- NULL
                     +    hits_cols@nrows <- length(tx_id)
                     +    hits_cols@rownames <- NULL
                     -    colnames(ans) <- sub("\\.hits$", "", colnames(ans))
                     -    cbind(DataFrame(tx_id=tx_id, gene_id=gene_id), ans)
                     +    cbind(DataFrame(tx_id=tx_id, gene_id=gene_id), hits_cols)
+                     }
                     -.countReads_by_gene <- function(x)
                     +.getReads_by_gene <- function(x)
+                     {
                          edges_by_gene <- sgedgesByGene(x, with.hits.mcols=TRUE)
                          edge_data <- mcols(unlist(edges_by_gene, use.names=FALSE))
@@ -69,21 +69,43 @@
                          ## a function 'FUN' that modifies the nb of rows. Furthermore, the
                          ## returned object passes validation despite being broken! Fix it
                          ## in IRanges.
                     -    ans <- endoapply(edge_data[hits_mcol_idx],
                     +    hits_cols <- endoapply(edge_data[hits_mcol_idx],
                                           function(hits)
                     -                       elementLengths(unique(regroup(hits,
                     -                                                     edge_data_breakpoints))))
                     +                       unique(regroup(hits, edge_data_breakpoints)))
                          ## Fix the broken DataFrame returned by endoapply().
                     -    ans@nrows <- length(edges_by_gene)
                     -    ans@rownames <- NULL
                     +    hits_cols@nrows <- length(edges_by_gene)
                     +    hits_cols@rownames <- NULL
                     -    colnames(ans) <- sub("\\.hits$", "", colnames(ans))
                          gene_id <- names(edges_by_gene)
                          tx_id <- unique(regroup(edge_data[ , "tx_id"], edge_data_breakpoints))
                     -    cbind(DataFrame(gene_id=gene_id, tx_id=tx_id), ans)
                     +    cbind(DataFrame(gene_id=gene_id, tx_id=tx_id), hits_cols)
+                     }
                     +setGeneric("getReads", signature="x",
                     +    function(x, by=c("sgedge", "rsgedge", "tx", "gene"))
                     +        standardGeneric("getReads")
                     +)
+                    +
                     +### Return a DataFrame with 1 row per splicing graph edge (or reduced
                     +### splicing graph edge), and 1 column per sample.
                     +setMethod("getReads", "SplicingGraphs",
                     +    function(x, by=c("sgedge", "rsgedge", "tx", "gene"))
                     +    {
                     +        by <- match.arg(by)
                     +        switch(by,
                     +            sgedge=.getReads_by_sgedge(x),
                     +            rsgedge=.getReads_by_rsgedge(x),
                     +            tx=.getReads_by_tx(x),
                     +            gene=.getReads_by_gene(x))
                     +    }
                     +)
+                    +
+                    +
                     +### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                     +### countReads()
                     +###
+                    +
                      setGeneric("countReads", signature="x",
                          function(x, by=c("sgedge", "rsgedge", "tx", "gene"))
                              standardGeneric("countReads")
@@ -94,12 +116,13 @@ setGeneric("countReads", signature="x",
                      setMethod("countReads", "SplicingGraphs",
                          function(x, by=c("sgedge", "rsgedge", "tx", "gene"))
+                         {
                     -        by <- match.arg(by)
                     -        switch(by,
                     -            sgedge=.countReads_by_sgedge(x),
                     -            rsgedge=.countReads_by_rsgedge(x),
                     -            tx=.countReads_by_tx(x),
                     -            gene=.countReads_by_gene(x))
                     +        assigned_reads <- getReads(x, by=by)
                     +        hits_col_idx <- grep("\\.hits$", colnames(assigned_reads))
                     +        if (length(hits_col_idx) == 0L)
                     +            return(assigned_reads)
                     +        read_counts <- endoapply(assigned_reads[hits_col_idx], elementLengths)
                     +        colnames(read_counts) <- sub("\\.hits$", "", colnames(read_counts))
                     +        cbind(assigned_reads[-hits_col_idx], read_counts)
+                         }
+                     )

TODO

History View file @ 05410e1

@@ -2,19 +2,10 @@ o Replace the RNA-seq data used in the vignette by data that actually
                        contains junction reads. The BAM files in TBX20BamSubset don't contain
                        any!
                     -o Move vignette to vignettes/
+                    -
                     -o Implement rsgedgesByTranscript().
+                    -
                      o Complete vignette (get rid of all TODO from it).
                      o Complete man pages (get rid of all TODO from them).
                     -o Fix issue with calls to plotting functions requiring 1 more key-stroke than
                     -  necessary before they actually start to plot something.
+                    -
                     -o Add by="gene" to countReads() for counting of unique reads per gene.
+                    -
                      o Add 'drop.ambiguous.hits' arg to countReads() for dropping reads that are
                        assigned to multiple edges (when by="sgedge"), to multiple reduced edges
                        (when by="rsgedge"), to multiple transcripts (when by="tx"), or to
@@ -30,3 +21,11 @@ o Add 'drop.ambiguous.hits' arg to countReads() for dropping reads that are
                        countReads(sg, by="tx", drop.ambiguous.hits=TRUE) and compare. Should
                        produce the same result.
                     +o Fix issue with calls to plotting functions requiring 1 more key-stroke than
                     +  necessary before they actually start to plot something.
+                    +
                     +o Implement rsgedgesByTranscript().
+                    +
                     +o Move vignette to vignettes/
+                    +
+                    +

man/SplicingGraphs-package.Rd

History View file @ 05410e1

@@ -74,7 +74,7 @@
                                SplicingGraphs object.
                          \item \code{\link{countReads}} for summarizing the reads assigned to
                     -          the edges of a SplicingGraphs object.
                     +          a SplicingGraphs object.
                          \item \code{\link{toy_genes_gff}} for details about the toy data included
                                in this package.

man/countReads-methods.Rd

History View file @ 05410e1

@@ -7,15 +7,21 @@
                      \title{
                     -  Summarize the reads assigned to the edges of a SplicingGraphs object
                     +  Summarize the reads assigned to a SplicingGraphs object
+                     }
                      \description{
                     -  \code{countReads} returns a summarized count of the reads assigned to
                     -  the edges of a SplicingGraphs object.
                     +  \code{getReads} returns the reads assigned to a SplicingGraphs object,
                     +  summarized either by splicing graph edge, \emph{reduced} splicing graph
                     +  edge, transcript, or gene.
+                    +
                     +  \code{countReads} counts the reads assigned to a SplicingGraphs object.
                     +  The counting can be done by splicing graph edge, \emph{reduced} splicing
                     +  graph edge, transcript, or gene.
+                     }
                      \usage{
                     +getReads(x, by=c("sgedge", "rsgedge", "tx", "gene"))
                      countReads(x, by=c("sgedge", "rsgedge", "tx", "gene"))
+                     }
@@ -24,9 +30,9 @@ countReads(x, by=c("sgedge", "rsgedge", "tx", "gene"))
                          A \link{SplicingGraphs} object.
+                       }
                        \item{by}{
                     -    Summarize by splicing graph edge (\code{by="sgedge"}), by \emph{reduced}
                     -    splicing graph edge (\code{by="rsgedge"}), by transcript (\code{by="tx"}),
                     -    or by gene (\code{by="gene"}).
                     +    Summarize/count by splicing graph edge (\code{by="sgedge"}), by
                     +    \emph{reduced} splicing graph edge (\code{by="rsgedge"}), by transcript
                     +    (\code{by="tx"}), or by gene (\code{by="gene"}).
+                       }
+                     }
@@ -43,8 +49,9 @@ countReads(x, by=c("sgedge", "rsgedge", "tx", "gene"))
                          \item gene if \code{by="gene"}.
+                       }
                     -  And with one column per sample (containing the counts for that sample),
                     -  plus the two following additional leading columns:
                     +  And with one column per sample (containing the reads for that sample for
                     +  \code{getReads}, and the counts for that sample for \code{countReads}),
                     +  plus the following two additional leading columns:
                        \itemize{
                          \item if \code{by="sgedge"}: \code{"sgedge_id"}, containing the
                                \emph{global splicing graph edge ids}, and \code{"ex_or_in"},
@@ -76,16 +83,32 @@ countReads(x, by=c("sgedge", "rsgedge", "tx", "gene"))
                      example(assignReads)
                      ## ---------------------------------------------------------------------
                     -## 2. Summarize the reads assigned to 'sg'
                     +## 2. Summarize the reads by splicing graph edge
                     +## ---------------------------------------------------------------------
                     +getReads(sg)
                     +countReads(sg)
+                    +
                     +## ---------------------------------------------------------------------
                     +## 3. Summarize the reads by reduced splicing graph edge
                     +## ---------------------------------------------------------------------
                     +getReads(sg, by="rsgedge")
                     +countReads(sg, by="rsgedge")
+                    +
                     +## ---------------------------------------------------------------------
                     +## 4. Summarize the reads by transcript
                     +## ---------------------------------------------------------------------
                     +getReads(sg, by="tx")
                     +countReads(sg, by="tx")
+                    +
                     +## ---------------------------------------------------------------------
                     +## 4. Summarize the reads by gene
                      ## ---------------------------------------------------------------------
                     -countReads(sg)  # nb of reads per splicing graph edge
                     -countReads(sg, by="rsgedge")  # ... per reduced splicing graph edge
                     -countReads(sg, by="tx")  # ... per transcript
                     -countReads(sg, by="gene")  # ... per gene
                     +getReads(sg, by="gene")
                     +countReads(sg, by="gene")
                      ## ---------------------------------------------------------------------
                     -## 3. Remove the reads from 'sg'.
                     +## 5. Remove the reads from 'sg'.
                      ## ---------------------------------------------------------------------
                     -removeReads(sg)
                     +sg <- removeReads(sg)
                      countReads(sg)
+                     }