Bioconductor Code: SplicingGraphs

Browse code

countReads() now allows counting by reduced splicing graph edge in addition to counting by splicing graph edge. This is controlled via the new 'by' argument.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SplicingGraphs@75609 bc3139a8-67e5-0310-9ffc-ced21a209358

Herve Pages authored on 12/04/2013 17:59:17
Showing 6 changed files

DESCRIPTION index e1220e5..5e3dfaa 100644
NAMESPACE index 7a4a118..81806a2 100644
R/countReads.R index 1573fe4..f8a88d5 100644
R/rsgedgesByGene-methods.R index ee2ca80..d3dae83 100644
R/sgedges-methods.R index 1be4e87..30baf7b 100644
man/countReads.Rd index f56ad5f..3c506e7 100644

DESCRIPTION

History View file @ c10df4d

@@ -1,6 +1,6 @@
                      Package: SplicingGraphs
                      Title: Creation, manipulation, visualization of splicing graphs
                     -Version: 1.1.1
                     +Version: 1.1.2
                      Author: D. Bindreither, M. Carlson, M. Morgan, H. Pages
                      License: Artistic-2.0
                      Description: This package allows the user to create, manipulate, and visualize

NAMESPACE

History View file @ c10df4d

@@ -56,7 +56,6 @@ export(
                          ## countReads.R:
                          assignReads,
                     -    countReads,
                          ## toy_data.R:
                          toy_genes_gff,
@@ -100,7 +99,10 @@ export(
                          ## rsgedgesByGene-methods.R:
                          rsgedgesByTranscript,
                     -    rsgedgesByGene
                     +    rsgedgesByGene,
+                    +
                     +    ## countReads.R:
                     +    countReads
+                     )
                      ### Exactly the same list as above.
@@ -118,6 +120,7 @@ exportMethods(
                          sgraph,
                          bubbles,
                          rsgedgesByTranscript,
                     -    rsgedgesByGene
                     +    rsgedgesByGene,
                     +    countReads
+                     )

R/countReads.R

History View file @ c10df4d

@@ -136,20 +136,34 @@ assignReads <- function(sg, reads, sample.name=NA)
                      ### countReads()
                      ###
                     -### Return a DataFrame with 1 row per unique splicing graph edge and 1 column
                     -### per sample.
                     -countReads <- function(sg)
                     -{
                     -    if (!is(sg, "SplicingGraphs"))
                     -        stop("'sg' must be a SplicingGraphs object")
                     -    edges_by_gene <- sgedgesByGene(sg, with.hits.mcols=TRUE)
                     -    edges0 <- unlist(edges_by_gene, use.names=FALSE)
                     -    edges0_mcols <- mcols(edges0)
                     -    edges0_mcolnames <- colnames(edges0_mcols)
                     -    hits_idx <- grep("\\.hits$", edges0_mcolnames)
                     -    ans <- endoapply(edges0_mcols[hits_idx], elementLengths)
                     -    colnames(ans) <- sub("\\.hits$", "", colnames(ans))
                     -    left_cols <- edges0_mcols[ , c("sgedge_id", "ex_or_in")]
                     -    cbind(left_cols, ans)
                     -}
                     +setGeneric("countReads", signature="x",
                     +    function(x, by=c("sgedge", "rsgedge")) standardGeneric("countReads")
                     +)
+                    +
                     +### Return a DataFrame with 1 row per splicing graph edge (or reduced
                     +### splicing graph edge), and 1 column per sample.
                     +setMethod("countReads", "SplicingGraphs",
                     +    function(x, by=c("sgedge", "rsgedge"))
                     +    {
                     +        by <- match.arg(by)
                     +        if (by == "sgedge") {
                     +            edges_by_gene <- sgedgesByGene(x, with.hits.mcols=TRUE)
                     +        } else {
                     +            edges_by_gene <- rsgedgesByGene(x, with.hits.mcols=TRUE)
                     +        }
                     +        edges0 <- unlist(edges_by_gene, use.names=FALSE)
                     +        edges0_mcols <- mcols(edges0)
                     +        edges0_mcolnames <- colnames(edges0_mcols)
                     +        hits_mcol_idx <- grep("\\.hits$", edges0_mcolnames)
                     +        ans <- endoapply(edges0_mcols[hits_mcol_idx], elementLengths)
                     +        colnames(ans) <- sub("\\.hits$", "", colnames(ans))
                     +        if (by == "sgedge") {
                     +            left_mcolnames <- c("sgedge_id", "ex_or_in")
                     +        } else {
                     +            left_mcolnames <- c("rsgedge_id", "ex_or_in")
                     +        }
                     +        left_cols <- edges0_mcols[left_mcolnames]
                     +        cbind(left_cols, ans)
                     +    }
                     +)

R/rsgedgesByGene-methods.R

History View file @ c10df4d

@@ -21,7 +21,6 @@
                          names(txpath3) <- names(txpath)
                          gene_id <- rep.int(names(txpath3), elementLengths(txpath3))
                     -    #tmp <- paste(gene_id, txpath3@unlistData, sep=":")
                          tmp <- txpath3@unlistData
                          gene_id <- gene_id[c(TRUE, FALSE)]
                          from <- tmp[c(TRUE, FALSE)]
@@ -146,9 +145,19 @@
                          ## Reduce hits metadata cols.
                          hits_mcol_idx <- grep("hits$", colnames(edges_mcols))
                          if (length(hits_mcol_idx) != 0L) {
                     +        ## FIXME: endoapply() on a DataFrame object is broken when applying
                     +        ## a function 'FUN' that modifies the nb of rows. Furthermore, the
                     +        ## returned object passes validitation despite being broken! Fix it
                     +        ## in IRanges.
                              hits_mcols <- endoapply(edges_mcols[hits_mcol_idx],
                                                      function(hits)
                                                        unname(unique(unlistAndSplit(hits, f))))
+                    +
                     +        ## Fix the broken DataFrame returned by endoapply().
                     +        hits_mcols@nrows <- nlevels(f)
                     +        hits_mcols@rownames <- NULL
+                    +
                     +        ## Combine with 'ans_mcols'.
                              ans_mcols <- cbind(ans_mcols, hits_mcols)
+                         }
@@ -192,16 +201,16 @@ setMethod("rsgedgesByGene", "SplicingGraphs",
                              if (keep.dup.edges)
                                  stop("'keep.dup.edges=TRUE' is not supported yet, sorry")
                              edges_by_gene <- sgedgesByGene(x, with.hits.mcols=with.hits.mcols)
                     -        all_edges <- unlist(edges_by_gene)
                     -        all_edges_mcols <- mcols(all_edges)
                     -        gene_id <- names(all_edges)
                     -        from <- all_edges_mcols[ , "from"]
                     -        to <- all_edges_mcols[ , "to"]
                     +        edges0 <- unlist(edges_by_gene)
                     +        edges0_mcols <- mcols(edges0)
                     +        gene_id <- names(edges0)
                     +        from <- edges0_mcols[ , "from"]
                     +        to <- edges0_mcols[ , "to"]
                              ui_fqnodes <- .uninformative_fqnodes(x)
                              sgedge2rsgedge_map <- .build_sgedge2rsgedge_map(gene_id, from, to,
                                                                              ui_fqnodes)
                              f <- factor(sgedge2rsgedge_map, levels=unique(sgedge2rsgedge_map))
                     -        ans_flesh <- .reduce_edges(all_edges, f)
                     +        ans_flesh <- .reduce_edges(edges0, f)
                              ans_flesh_names <- Rle(names(ans_flesh))
                              breakpoints <- cumsum(runLength(ans_flesh_names))
                              ans_names <- runValue(ans_flesh_names)

R/sgedges-methods.R

History View file @ c10df4d

@@ -133,11 +133,11 @@
                              from_to_colnames <- c("end_SSid", "start_SSid")
+                         }
                          ex_mcols <- mcols(exons)
                     -    ex_colnames <- colnames(ex_mcols)
                     -    hits_idx <- grep("hits$", ex_colnames)
                     -    hits_colnames <- ex_colnames[hits_idx]
                     -    hits_colnames <- c(from_to_colnames, hits_colnames)
                     -    exon_hits <- ex_mcols[ , hits_colnames, drop=FALSE]
                     +    ex_mcolnames <- colnames(ex_mcols)
                     +    hits_mcol_idx <- grep("hits$", ex_mcolnames)
                     +    hits_mcolnames <- ex_mcolnames[hits_mcol_idx]
                     +    hits_mcolnames <- c(from_to_colnames, hits_mcolnames)
                     +    exon_hits <- ex_mcols[ , hits_mcolnames, drop=FALSE]
                          colnames(exon_hits)[1:2] <- c("from", "to")
                          exon_hits
+                     }
@@ -160,10 +160,10 @@
                          #}
                          in_mcols <- mcols(introns)
                          in_colnames <- colnames(in_mcols)
                     -    hits_idx <- grep("hits$", in_colnames)
                     -    hits_colnames <- in_colnames[hits_idx]
                     -    #hits_colnames <- c(from_to_colnames, hits_colnames)
                     -    intron_hits <- in_mcols[ , hits_colnames, drop=FALSE]
                     +    hits_mcol_idx <- grep("hits$", in_colnames)
                     +    hits_mcolnames <- in_colnames[hits_mcol_idx]
                     +    #hits_mcolnames <- c(from_to_colnames, hits_mcolnames)
                     +    intron_hits <- in_mcols[hits_mcolnames]
                          #colnames(intron_hits)[1:2] <- c("from", "to")
                          intron_hits
+                     }

man/countReads.Rd

History View file @ c10df4d

@@ -16,7 +16,7 @@
                      \usage{
                      assignReads(sg, reads, sample.name=NA)
                     -countReads(sg)
                     +countReads(sg, by=c("sgedge", "rsgedge"))
+                     }
                      \arguments{
@@ -36,6 +36,10 @@ countReads(sg)
                          A single string containing the name of the sample where the reads
                          are coming from.
+                       }
                     +  \item{by}{
                     +    Summarize by splicing graph edge (\code{"sgedge"}) or by \emph{reduced}
                     +    splicing graph edge (\code{"rsgedge"}).
                     +  }
+                     }
                      \details{
@@ -153,11 +157,27 @@ gal
                      ## junction read with 1 gap can be assigned to 2 exons and 1 intron).
                      sg <- assignReads(sg, gal, sample.name="TOYREADS")
                     +## See the assignments to the splicing graph edges.
                      edge_by_tx <- sgedgesByTranscript(sg, with.hits.mcols=TRUE)
                     -mcols(unlist(edge_by_tx))
                     +edge_data <- mcols(unlist(edge_by_tx))
                     +colnames(edge_data)
                     +head(edge_data)
                     +edge_data[ , c("sgedge_id", "TOYREADS.hits")]
+                    +
                     +edge_by_gene <- sgedgesByGene(sg, with.hits.mcols=TRUE)
                     +mcols(unlist(edge_by_gene))
+                    +
                     +## See the assignments to the reduced splicing graph edges.
                     +redge_by_gene <- rsgedgesByGene(sg, with.hits.mcols=TRUE)
                     +mcols(unlist(redge_by_gene))
                      ## ---------------------------------------------------------------------
                      ## 4. Count the number of reads per splicing graph edge
                      ## ---------------------------------------------------------------------
                      countReads(sg)
+                    +
                     +## ---------------------------------------------------------------------
                     +## 5. Count the number of reads per reduced splicing graph edge
                     +## ---------------------------------------------------------------------
                     +countReads(sg, by="rsgedge")
+                     }

countReads() now allows counting by *reduced* splicing graph edge in addition to counting by splicing graph edge. This is controlled via the new 'by' argument.

countReads() now allows counting by reduced splicing graph edge in addition to counting by splicing graph edge. This is controlled via the new 'by' argument.