Browse code

countReads() now allows counting by *reduced* splicing graph edge in addition to counting by splicing graph edge. This is controlled via the new 'by' argument.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SplicingGraphs@75609 bc3139a8-67e5-0310-9ffc-ced21a209358

Herve Pages authored on 12/04/2013 17:59:17
Showing 6 changed files

... ...
@@ -1,6 +1,6 @@
1 1
 Package: SplicingGraphs
2 2
 Title: Creation, manipulation, visualization of splicing graphs
3
-Version: 1.1.1
3
+Version: 1.1.2
4 4
 Author: D. Bindreither, M. Carlson, M. Morgan, H. Pages
5 5
 License: Artistic-2.0
6 6
 Description: This package allows the user to create, manipulate, and visualize
... ...
@@ -56,7 +56,6 @@ export(
56 56
 
57 57
     ## countReads.R:
58 58
     assignReads,
59
-    countReads,
60 59
 
61 60
     ## toy_data.R:
62 61
     toy_genes_gff,
... ...
@@ -100,7 +99,10 @@ export(
100 99
 
101 100
     ## rsgedgesByGene-methods.R:
102 101
     rsgedgesByTranscript,
103
-    rsgedgesByGene
102
+    rsgedgesByGene,
103
+
104
+    ## countReads.R:
105
+    countReads
104 106
 )
105 107
 
106 108
 ### Exactly the same list as above.
... ...
@@ -118,6 +120,7 @@ exportMethods(
118 120
     sgraph,
119 121
     bubbles,
120 122
     rsgedgesByTranscript,
121
-    rsgedgesByGene
123
+    rsgedgesByGene,
124
+    countReads
122 125
 )
123 126
 
... ...
@@ -136,20 +136,34 @@ assignReads <- function(sg, reads, sample.name=NA)
136 136
 ### countReads()
137 137
 ###
138 138
 
139
-### Return a DataFrame with 1 row per unique splicing graph edge and 1 column
140
-### per sample.
141
-countReads <- function(sg)
142
-{
143
-    if (!is(sg, "SplicingGraphs"))
144
-        stop("'sg' must be a SplicingGraphs object")
145
-    edges_by_gene <- sgedgesByGene(sg, with.hits.mcols=TRUE)
146
-    edges0 <- unlist(edges_by_gene, use.names=FALSE)
147
-    edges0_mcols <- mcols(edges0)
148
-    edges0_mcolnames <- colnames(edges0_mcols)
149
-    hits_idx <- grep("\\.hits$", edges0_mcolnames)
150
-    ans <- endoapply(edges0_mcols[hits_idx], elementLengths)
151
-    colnames(ans) <- sub("\\.hits$", "", colnames(ans))
152
-    left_cols <- edges0_mcols[ , c("sgedge_id", "ex_or_in")]
153
-    cbind(left_cols, ans)
154
-}
139
+setGeneric("countReads", signature="x",
140
+    function(x, by=c("sgedge", "rsgedge")) standardGeneric("countReads")
141
+)
142
+
143
+### Return a DataFrame with 1 row per splicing graph edge (or reduced
144
+### splicing graph edge), and 1 column per sample.
145
+setMethod("countReads", "SplicingGraphs",
146
+    function(x, by=c("sgedge", "rsgedge"))
147
+    {
148
+        by <- match.arg(by)
149
+        if (by == "sgedge") {
150
+            edges_by_gene <- sgedgesByGene(x, with.hits.mcols=TRUE)
151
+        } else {
152
+            edges_by_gene <- rsgedgesByGene(x, with.hits.mcols=TRUE)
153
+        }
154
+        edges0 <- unlist(edges_by_gene, use.names=FALSE)
155
+        edges0_mcols <- mcols(edges0)
156
+        edges0_mcolnames <- colnames(edges0_mcols)
157
+        hits_mcol_idx <- grep("\\.hits$", edges0_mcolnames)
158
+        ans <- endoapply(edges0_mcols[hits_mcol_idx], elementLengths)
159
+        colnames(ans) <- sub("\\.hits$", "", colnames(ans))
160
+        if (by == "sgedge") {
161
+            left_mcolnames <- c("sgedge_id", "ex_or_in")
162
+        } else {
163
+            left_mcolnames <- c("rsgedge_id", "ex_or_in")
164
+        }
165
+        left_cols <- edges0_mcols[left_mcolnames]
166
+        cbind(left_cols, ans)
167
+    }
168
+)
155 169
 
... ...
@@ -21,7 +21,6 @@
21 21
     names(txpath3) <- names(txpath)
22 22
 
23 23
     gene_id <- rep.int(names(txpath3), elementLengths(txpath3))
24
-    #tmp <- paste(gene_id, txpath3@unlistData, sep=":")
25 24
     tmp <- txpath3@unlistData
26 25
     gene_id <- gene_id[c(TRUE, FALSE)]
27 26
     from <- tmp[c(TRUE, FALSE)]
... ...
@@ -146,9 +145,19 @@
146 145
     ## Reduce hits metadata cols.
147 146
     hits_mcol_idx <- grep("hits$", colnames(edges_mcols))
148 147
     if (length(hits_mcol_idx) != 0L) {
148
+        ## FIXME: endoapply() on a DataFrame object is broken when applying
149
+        ## a function 'FUN' that modifies the nb of rows. Furthermore, the
150
+        ## returned object passes validitation despite being broken! Fix it
151
+        ## in IRanges.
149 152
         hits_mcols <- endoapply(edges_mcols[hits_mcol_idx],
150 153
                                 function(hits)
151 154
                                   unname(unique(unlistAndSplit(hits, f))))
155
+
156
+        ## Fix the broken DataFrame returned by endoapply().
157
+        hits_mcols@nrows <- nlevels(f)
158
+        hits_mcols@rownames <- NULL
159
+
160
+        ## Combine with 'ans_mcols'.
152 161
         ans_mcols <- cbind(ans_mcols, hits_mcols)
153 162
     }
154 163
 
... ...
@@ -192,16 +201,16 @@ setMethod("rsgedgesByGene", "SplicingGraphs",
192 201
         if (keep.dup.edges)
193 202
             stop("'keep.dup.edges=TRUE' is not supported yet, sorry")
194 203
         edges_by_gene <- sgedgesByGene(x, with.hits.mcols=with.hits.mcols)
195
-        all_edges <- unlist(edges_by_gene)
196
-        all_edges_mcols <- mcols(all_edges)
197
-        gene_id <- names(all_edges)
198
-        from <- all_edges_mcols[ , "from"]
199
-        to <- all_edges_mcols[ , "to"]
204
+        edges0 <- unlist(edges_by_gene)
205
+        edges0_mcols <- mcols(edges0)
206
+        gene_id <- names(edges0)
207
+        from <- edges0_mcols[ , "from"]
208
+        to <- edges0_mcols[ , "to"]
200 209
         ui_fqnodes <- .uninformative_fqnodes(x)
201 210
         sgedge2rsgedge_map <- .build_sgedge2rsgedge_map(gene_id, from, to,
202 211
                                                         ui_fqnodes)
203 212
         f <- factor(sgedge2rsgedge_map, levels=unique(sgedge2rsgedge_map))
204
-        ans_flesh <- .reduce_edges(all_edges, f)
213
+        ans_flesh <- .reduce_edges(edges0, f)
205 214
         ans_flesh_names <- Rle(names(ans_flesh))
206 215
         breakpoints <- cumsum(runLength(ans_flesh_names))
207 216
         ans_names <- runValue(ans_flesh_names)
... ...
@@ -133,11 +133,11 @@
133 133
         from_to_colnames <- c("end_SSid", "start_SSid")
134 134
     }
135 135
     ex_mcols <- mcols(exons)
136
-    ex_colnames <- colnames(ex_mcols)
137
-    hits_idx <- grep("hits$", ex_colnames)
138
-    hits_colnames <- ex_colnames[hits_idx]
139
-    hits_colnames <- c(from_to_colnames, hits_colnames)
140
-    exon_hits <- ex_mcols[ , hits_colnames, drop=FALSE]
136
+    ex_mcolnames <- colnames(ex_mcols)
137
+    hits_mcol_idx <- grep("hits$", ex_mcolnames)
138
+    hits_mcolnames <- ex_mcolnames[hits_mcol_idx]
139
+    hits_mcolnames <- c(from_to_colnames, hits_mcolnames)
140
+    exon_hits <- ex_mcols[ , hits_mcolnames, drop=FALSE]
141 141
     colnames(exon_hits)[1:2] <- c("from", "to")
142 142
     exon_hits
143 143
 }
... ...
@@ -160,10 +160,10 @@
160 160
     #}
161 161
     in_mcols <- mcols(introns)
162 162
     in_colnames <- colnames(in_mcols)
163
-    hits_idx <- grep("hits$", in_colnames)
164
-    hits_colnames <- in_colnames[hits_idx]
165
-    #hits_colnames <- c(from_to_colnames, hits_colnames)
166
-    intron_hits <- in_mcols[ , hits_colnames, drop=FALSE]
163
+    hits_mcol_idx <- grep("hits$", in_colnames)
164
+    hits_mcolnames <- in_colnames[hits_mcol_idx]
165
+    #hits_mcolnames <- c(from_to_colnames, hits_mcolnames)
166
+    intron_hits <- in_mcols[hits_mcolnames]
167 167
     #colnames(intron_hits)[1:2] <- c("from", "to")
168 168
     intron_hits
169 169
 }
... ...
@@ -16,7 +16,7 @@
16 16
 
17 17
 \usage{
18 18
 assignReads(sg, reads, sample.name=NA)
19
-countReads(sg)
19
+countReads(sg, by=c("sgedge", "rsgedge"))
20 20
 }
21 21
 
22 22
 \arguments{
... ...
@@ -36,6 +36,10 @@ countReads(sg)
36 36
     A single string containing the name of the sample where the reads
37 37
     are coming from.
38 38
   }
39
+  \item{by}{
40
+    Summarize by splicing graph edge (\code{"sgedge"}) or by \emph{reduced}
41
+    splicing graph edge (\code{"rsgedge"}).
42
+  }
39 43
 }
40 44
 
41 45
 \details{
... ...
@@ -153,11 +157,27 @@ gal
153 157
 ## junction read with 1 gap can be assigned to 2 exons and 1 intron).
154 158
 sg <- assignReads(sg, gal, sample.name="TOYREADS")
155 159
 
160
+## See the assignments to the splicing graph edges.
156 161
 edge_by_tx <- sgedgesByTranscript(sg, with.hits.mcols=TRUE)
157
-mcols(unlist(edge_by_tx))
162
+edge_data <- mcols(unlist(edge_by_tx))
163
+colnames(edge_data)
164
+head(edge_data)
165
+edge_data[ , c("sgedge_id", "TOYREADS.hits")]
166
+
167
+edge_by_gene <- sgedgesByGene(sg, with.hits.mcols=TRUE)
168
+mcols(unlist(edge_by_gene))
169
+
170
+## See the assignments to the reduced splicing graph edges.
171
+redge_by_gene <- rsgedgesByGene(sg, with.hits.mcols=TRUE)
172
+mcols(unlist(redge_by_gene))
158 173
 
159 174
 ## ---------------------------------------------------------------------
160 175
 ## 4. Count the number of reads per splicing graph edge
161 176
 ## ---------------------------------------------------------------------
162 177
 countReads(sg)
178
+
179
+## ---------------------------------------------------------------------
180
+## 5. Count the number of reads per reduced splicing graph edge
181
+## ---------------------------------------------------------------------
182
+countReads(sg, by="rsgedge")
163 183
 }