git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SplicingGraphs@75609 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,6 +1,6 @@ |
1 | 1 |
Package: SplicingGraphs |
2 | 2 |
Title: Creation, manipulation, visualization of splicing graphs |
3 |
-Version: 1.1.1 |
|
3 |
+Version: 1.1.2 |
|
4 | 4 |
Author: D. Bindreither, M. Carlson, M. Morgan, H. Pages |
5 | 5 |
License: Artistic-2.0 |
6 | 6 |
Description: This package allows the user to create, manipulate, and visualize |
... | ... |
@@ -56,7 +56,6 @@ export( |
56 | 56 |
|
57 | 57 |
## countReads.R: |
58 | 58 |
assignReads, |
59 |
- countReads, |
|
60 | 59 |
|
61 | 60 |
## toy_data.R: |
62 | 61 |
toy_genes_gff, |
... | ... |
@@ -100,7 +99,10 @@ export( |
100 | 99 |
|
101 | 100 |
## rsgedgesByGene-methods.R: |
102 | 101 |
rsgedgesByTranscript, |
103 |
- rsgedgesByGene |
|
102 |
+ rsgedgesByGene, |
|
103 |
+ |
|
104 |
+ ## countReads.R: |
|
105 |
+ countReads |
|
104 | 106 |
) |
105 | 107 |
|
106 | 108 |
### Exactly the same list as above. |
... | ... |
@@ -118,6 +120,7 @@ exportMethods( |
118 | 120 |
sgraph, |
119 | 121 |
bubbles, |
120 | 122 |
rsgedgesByTranscript, |
121 |
- rsgedgesByGene |
|
123 |
+ rsgedgesByGene, |
|
124 |
+ countReads |
|
122 | 125 |
) |
123 | 126 |
|
... | ... |
@@ -136,20 +136,34 @@ assignReads <- function(sg, reads, sample.name=NA) |
136 | 136 |
### countReads() |
137 | 137 |
### |
138 | 138 |
|
139 |
-### Return a DataFrame with 1 row per unique splicing graph edge and 1 column |
|
140 |
-### per sample. |
|
141 |
-countReads <- function(sg) |
|
142 |
-{ |
|
143 |
- if (!is(sg, "SplicingGraphs")) |
|
144 |
- stop("'sg' must be a SplicingGraphs object") |
|
145 |
- edges_by_gene <- sgedgesByGene(sg, with.hits.mcols=TRUE) |
|
146 |
- edges0 <- unlist(edges_by_gene, use.names=FALSE) |
|
147 |
- edges0_mcols <- mcols(edges0) |
|
148 |
- edges0_mcolnames <- colnames(edges0_mcols) |
|
149 |
- hits_idx <- grep("\\.hits$", edges0_mcolnames) |
|
150 |
- ans <- endoapply(edges0_mcols[hits_idx], elementLengths) |
|
151 |
- colnames(ans) <- sub("\\.hits$", "", colnames(ans)) |
|
152 |
- left_cols <- edges0_mcols[ , c("sgedge_id", "ex_or_in")] |
|
153 |
- cbind(left_cols, ans) |
|
154 |
-} |
|
139 |
+setGeneric("countReads", signature="x", |
|
140 |
+ function(x, by=c("sgedge", "rsgedge")) standardGeneric("countReads") |
|
141 |
+) |
|
142 |
+ |
|
143 |
+### Return a DataFrame with 1 row per splicing graph edge (or reduced |
|
144 |
+### splicing graph edge), and 1 column per sample. |
|
145 |
+setMethod("countReads", "SplicingGraphs", |
|
146 |
+ function(x, by=c("sgedge", "rsgedge")) |
|
147 |
+ { |
|
148 |
+ by <- match.arg(by) |
|
149 |
+ if (by == "sgedge") { |
|
150 |
+ edges_by_gene <- sgedgesByGene(x, with.hits.mcols=TRUE) |
|
151 |
+ } else { |
|
152 |
+ edges_by_gene <- rsgedgesByGene(x, with.hits.mcols=TRUE) |
|
153 |
+ } |
|
154 |
+ edges0 <- unlist(edges_by_gene, use.names=FALSE) |
|
155 |
+ edges0_mcols <- mcols(edges0) |
|
156 |
+ edges0_mcolnames <- colnames(edges0_mcols) |
|
157 |
+ hits_mcol_idx <- grep("\\.hits$", edges0_mcolnames) |
|
158 |
+ ans <- endoapply(edges0_mcols[hits_mcol_idx], elementLengths) |
|
159 |
+ colnames(ans) <- sub("\\.hits$", "", colnames(ans)) |
|
160 |
+ if (by == "sgedge") { |
|
161 |
+ left_mcolnames <- c("sgedge_id", "ex_or_in") |
|
162 |
+ } else { |
|
163 |
+ left_mcolnames <- c("rsgedge_id", "ex_or_in") |
|
164 |
+ } |
|
165 |
+ left_cols <- edges0_mcols[left_mcolnames] |
|
166 |
+ cbind(left_cols, ans) |
|
167 |
+ } |
|
168 |
+) |
|
155 | 169 |
|
... | ... |
@@ -21,7 +21,6 @@ |
21 | 21 |
names(txpath3) <- names(txpath) |
22 | 22 |
|
23 | 23 |
gene_id <- rep.int(names(txpath3), elementLengths(txpath3)) |
24 |
- #tmp <- paste(gene_id, txpath3@unlistData, sep=":") |
|
25 | 24 |
tmp <- txpath3@unlistData |
26 | 25 |
gene_id <- gene_id[c(TRUE, FALSE)] |
27 | 26 |
from <- tmp[c(TRUE, FALSE)] |
... | ... |
@@ -146,9 +145,19 @@ |
146 | 145 |
## Reduce hits metadata cols. |
147 | 146 |
hits_mcol_idx <- grep("hits$", colnames(edges_mcols)) |
148 | 147 |
if (length(hits_mcol_idx) != 0L) { |
148 |
+ ## FIXME: endoapply() on a DataFrame object is broken when applying |
|
149 |
+ ## a function 'FUN' that modifies the nb of rows. Furthermore, the |
|
150 |
+ ## returned object passes validitation despite being broken! Fix it |
|
151 |
+ ## in IRanges. |
|
149 | 152 |
hits_mcols <- endoapply(edges_mcols[hits_mcol_idx], |
150 | 153 |
function(hits) |
151 | 154 |
unname(unique(unlistAndSplit(hits, f)))) |
155 |
+ |
|
156 |
+ ## Fix the broken DataFrame returned by endoapply(). |
|
157 |
+ hits_mcols@nrows <- nlevels(f) |
|
158 |
+ hits_mcols@rownames <- NULL |
|
159 |
+ |
|
160 |
+ ## Combine with 'ans_mcols'. |
|
152 | 161 |
ans_mcols <- cbind(ans_mcols, hits_mcols) |
153 | 162 |
} |
154 | 163 |
|
... | ... |
@@ -192,16 +201,16 @@ setMethod("rsgedgesByGene", "SplicingGraphs", |
192 | 201 |
if (keep.dup.edges) |
193 | 202 |
stop("'keep.dup.edges=TRUE' is not supported yet, sorry") |
194 | 203 |
edges_by_gene <- sgedgesByGene(x, with.hits.mcols=with.hits.mcols) |
195 |
- all_edges <- unlist(edges_by_gene) |
|
196 |
- all_edges_mcols <- mcols(all_edges) |
|
197 |
- gene_id <- names(all_edges) |
|
198 |
- from <- all_edges_mcols[ , "from"] |
|
199 |
- to <- all_edges_mcols[ , "to"] |
|
204 |
+ edges0 <- unlist(edges_by_gene) |
|
205 |
+ edges0_mcols <- mcols(edges0) |
|
206 |
+ gene_id <- names(edges0) |
|
207 |
+ from <- edges0_mcols[ , "from"] |
|
208 |
+ to <- edges0_mcols[ , "to"] |
|
200 | 209 |
ui_fqnodes <- .uninformative_fqnodes(x) |
201 | 210 |
sgedge2rsgedge_map <- .build_sgedge2rsgedge_map(gene_id, from, to, |
202 | 211 |
ui_fqnodes) |
203 | 212 |
f <- factor(sgedge2rsgedge_map, levels=unique(sgedge2rsgedge_map)) |
204 |
- ans_flesh <- .reduce_edges(all_edges, f) |
|
213 |
+ ans_flesh <- .reduce_edges(edges0, f) |
|
205 | 214 |
ans_flesh_names <- Rle(names(ans_flesh)) |
206 | 215 |
breakpoints <- cumsum(runLength(ans_flesh_names)) |
207 | 216 |
ans_names <- runValue(ans_flesh_names) |
... | ... |
@@ -133,11 +133,11 @@ |
133 | 133 |
from_to_colnames <- c("end_SSid", "start_SSid") |
134 | 134 |
} |
135 | 135 |
ex_mcols <- mcols(exons) |
136 |
- ex_colnames <- colnames(ex_mcols) |
|
137 |
- hits_idx <- grep("hits$", ex_colnames) |
|
138 |
- hits_colnames <- ex_colnames[hits_idx] |
|
139 |
- hits_colnames <- c(from_to_colnames, hits_colnames) |
|
140 |
- exon_hits <- ex_mcols[ , hits_colnames, drop=FALSE] |
|
136 |
+ ex_mcolnames <- colnames(ex_mcols) |
|
137 |
+ hits_mcol_idx <- grep("hits$", ex_mcolnames) |
|
138 |
+ hits_mcolnames <- ex_mcolnames[hits_mcol_idx] |
|
139 |
+ hits_mcolnames <- c(from_to_colnames, hits_mcolnames) |
|
140 |
+ exon_hits <- ex_mcols[ , hits_mcolnames, drop=FALSE] |
|
141 | 141 |
colnames(exon_hits)[1:2] <- c("from", "to") |
142 | 142 |
exon_hits |
143 | 143 |
} |
... | ... |
@@ -160,10 +160,10 @@ |
160 | 160 |
#} |
161 | 161 |
in_mcols <- mcols(introns) |
162 | 162 |
in_colnames <- colnames(in_mcols) |
163 |
- hits_idx <- grep("hits$", in_colnames) |
|
164 |
- hits_colnames <- in_colnames[hits_idx] |
|
165 |
- #hits_colnames <- c(from_to_colnames, hits_colnames) |
|
166 |
- intron_hits <- in_mcols[ , hits_colnames, drop=FALSE] |
|
163 |
+ hits_mcol_idx <- grep("hits$", in_colnames) |
|
164 |
+ hits_mcolnames <- in_colnames[hits_mcol_idx] |
|
165 |
+ #hits_mcolnames <- c(from_to_colnames, hits_mcolnames) |
|
166 |
+ intron_hits <- in_mcols[hits_mcolnames] |
|
167 | 167 |
#colnames(intron_hits)[1:2] <- c("from", "to") |
168 | 168 |
intron_hits |
169 | 169 |
} |
... | ... |
@@ -16,7 +16,7 @@ |
16 | 16 |
|
17 | 17 |
\usage{ |
18 | 18 |
assignReads(sg, reads, sample.name=NA) |
19 |
-countReads(sg) |
|
19 |
+countReads(sg, by=c("sgedge", "rsgedge")) |
|
20 | 20 |
} |
21 | 21 |
|
22 | 22 |
\arguments{ |
... | ... |
@@ -36,6 +36,10 @@ countReads(sg) |
36 | 36 |
A single string containing the name of the sample where the reads |
37 | 37 |
are coming from. |
38 | 38 |
} |
39 |
+ \item{by}{ |
|
40 |
+ Summarize by splicing graph edge (\code{"sgedge"}) or by \emph{reduced} |
|
41 |
+ splicing graph edge (\code{"rsgedge"}). |
|
42 |
+ } |
|
39 | 43 |
} |
40 | 44 |
|
41 | 45 |
\details{ |
... | ... |
@@ -153,11 +157,27 @@ gal |
153 | 157 |
## junction read with 1 gap can be assigned to 2 exons and 1 intron). |
154 | 158 |
sg <- assignReads(sg, gal, sample.name="TOYREADS") |
155 | 159 |
|
160 |
+## See the assignments to the splicing graph edges. |
|
156 | 161 |
edge_by_tx <- sgedgesByTranscript(sg, with.hits.mcols=TRUE) |
157 |
-mcols(unlist(edge_by_tx)) |
|
162 |
+edge_data <- mcols(unlist(edge_by_tx)) |
|
163 |
+colnames(edge_data) |
|
164 |
+head(edge_data) |
|
165 |
+edge_data[ , c("sgedge_id", "TOYREADS.hits")] |
|
166 |
+ |
|
167 |
+edge_by_gene <- sgedgesByGene(sg, with.hits.mcols=TRUE) |
|
168 |
+mcols(unlist(edge_by_gene)) |
|
169 |
+ |
|
170 |
+## See the assignments to the reduced splicing graph edges. |
|
171 |
+redge_by_gene <- rsgedgesByGene(sg, with.hits.mcols=TRUE) |
|
172 |
+mcols(unlist(redge_by_gene)) |
|
158 | 173 |
|
159 | 174 |
## --------------------------------------------------------------------- |
160 | 175 |
## 4. Count the number of reads per splicing graph edge |
161 | 176 |
## --------------------------------------------------------------------- |
162 | 177 |
countReads(sg) |
178 |
+ |
|
179 |
+## --------------------------------------------------------------------- |
|
180 |
+## 5. Count the number of reads per reduced splicing graph edge |
|
181 |
+## --------------------------------------------------------------------- |
|
182 |
+countReads(sg, by="rsgedge") |
|
163 | 183 |
} |