git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SplicingGraphs@76127 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -102,6 +102,7 @@ export( |
102 | 102 |
bubbles, |
103 | 103 |
|
104 | 104 |
## countReads-methods.R: |
105 |
+ getReads, |
|
105 | 106 |
countReads |
106 | 107 |
) |
107 | 108 |
|
... | ... |
@@ -121,6 +122,7 @@ exportMethods( |
121 | 122 |
rsgedgesByTranscript, |
122 | 123 |
rsgedgesByGene, |
123 | 124 |
bubbles, |
125 |
+ getReads, |
|
124 | 126 |
countReads |
125 | 127 |
) |
126 | 128 |
|
... | ... |
@@ -1,35 +1,37 @@ |
1 | 1 |
### ========================================================================= |
2 |
-### Summarizing the reads assigned to the edges of a SplicingGraphs object |
|
2 |
+### Summarizing the reads assigned to a SplicingGraphs object |
|
3 | 3 |
### ------------------------------------------------------------------------- |
4 | 4 |
|
5 | 5 |
|
6 |
-.countReads_by_sgedge <- function(x) |
|
6 |
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
7 |
+### getReads() |
|
8 |
+### |
|
9 |
+ |
|
10 |
+.getReads_by_sgedge <- function(x) |
|
7 | 11 |
{ |
8 | 12 |
edges_by_gene <- sgedgesByGene(x, with.hits.mcols=TRUE) |
9 | 13 |
edge_data <- mcols(unlist(edges_by_gene, use.names=FALSE)) |
10 | 14 |
edge_data_colnames <- colnames(edge_data) |
11 | 15 |
hits_mcol_idx <- grep("\\.hits$", edge_data_colnames) |
12 |
- ans <- endoapply(edge_data[hits_mcol_idx], elementLengths) |
|
13 |
- colnames(ans) <- sub("\\.hits$", "", colnames(ans)) |
|
16 |
+ hits_cols <- edge_data[hits_mcol_idx] |
|
14 | 17 |
left_mcolnames <- c("sgedge_id", "ex_or_in") |
15 | 18 |
left_cols <- edge_data[left_mcolnames] |
16 |
- cbind(left_cols, ans) |
|
19 |
+ cbind(left_cols, hits_cols) |
|
17 | 20 |
} |
18 | 21 |
|
19 |
-.countReads_by_rsgedge <- function(x) |
|
22 |
+.getReads_by_rsgedge <- function(x) |
|
20 | 23 |
{ |
21 | 24 |
edges_by_gene <- rsgedgesByGene(x, with.hits.mcols=TRUE) |
22 | 25 |
edge_data <- mcols(unlist(edges_by_gene, use.names=FALSE)) |
23 | 26 |
edge_data_colnames <- colnames(edge_data) |
24 | 27 |
hits_mcol_idx <- grep("\\.hits$", edge_data_colnames) |
25 |
- ans <- endoapply(edge_data[hits_mcol_idx], elementLengths) |
|
26 |
- colnames(ans) <- sub("\\.hits$", "", colnames(ans)) |
|
28 |
+ hits_cols <- edge_data[hits_mcol_idx] |
|
27 | 29 |
left_mcolnames <- c("rsgedge_id", "ex_or_in") |
28 | 30 |
left_cols <- edge_data[left_mcolnames] |
29 |
- cbind(left_cols, ans) |
|
31 |
+ cbind(left_cols, hits_cols) |
|
30 | 32 |
} |
31 | 33 |
|
32 |
-.countReads_by_tx <- function(x) |
|
34 |
+.getReads_by_tx <- function(x) |
|
33 | 35 |
{ |
34 | 36 |
ex_by_tx <- unlist(x) |
35 | 37 |
tx_id <- mcols(ex_by_tx)[ , "tx_id"] |
... | ... |
@@ -44,20 +46,18 @@ |
44 | 46 |
## a function 'FUN' that modifies the nb of rows. Furthermore, the |
45 | 47 |
## returned object passes validation despite being broken! Fix it |
46 | 48 |
## in IRanges. |
47 |
- ans <- endoapply(edge_data[hits_mcol_idx], |
|
48 |
- function(hits) |
|
49 |
- elementLengths(unique(regroup(hits, |
|
50 |
- edge_data_breakpoints)))) |
|
49 |
+ hits_cols <- endoapply(edge_data[hits_mcol_idx], |
|
50 |
+ function(hits) |
|
51 |
+ unique(regroup(hits, edge_data_breakpoints))) |
|
51 | 52 |
|
52 | 53 |
## Fix the broken DataFrame returned by endoapply(). |
53 |
- ans@nrows <- length(tx_id) |
|
54 |
- ans@rownames <- NULL |
|
54 |
+ hits_cols@nrows <- length(tx_id) |
|
55 |
+ hits_cols@rownames <- NULL |
|
55 | 56 |
|
56 |
- colnames(ans) <- sub("\\.hits$", "", colnames(ans)) |
|
57 |
- cbind(DataFrame(tx_id=tx_id, gene_id=gene_id), ans) |
|
57 |
+ cbind(DataFrame(tx_id=tx_id, gene_id=gene_id), hits_cols) |
|
58 | 58 |
} |
59 | 59 |
|
60 |
-.countReads_by_gene <- function(x) |
|
60 |
+.getReads_by_gene <- function(x) |
|
61 | 61 |
{ |
62 | 62 |
edges_by_gene <- sgedgesByGene(x, with.hits.mcols=TRUE) |
63 | 63 |
edge_data <- mcols(unlist(edges_by_gene, use.names=FALSE)) |
... | ... |
@@ -69,21 +69,43 @@ |
69 | 69 |
## a function 'FUN' that modifies the nb of rows. Furthermore, the |
70 | 70 |
## returned object passes validation despite being broken! Fix it |
71 | 71 |
## in IRanges. |
72 |
- ans <- endoapply(edge_data[hits_mcol_idx], |
|
72 |
+ hits_cols <- endoapply(edge_data[hits_mcol_idx], |
|
73 | 73 |
function(hits) |
74 |
- elementLengths(unique(regroup(hits, |
|
75 |
- edge_data_breakpoints)))) |
|
74 |
+ unique(regroup(hits, edge_data_breakpoints))) |
|
76 | 75 |
|
77 | 76 |
## Fix the broken DataFrame returned by endoapply(). |
78 |
- ans@nrows <- length(edges_by_gene) |
|
79 |
- ans@rownames <- NULL |
|
77 |
+ hits_cols@nrows <- length(edges_by_gene) |
|
78 |
+ hits_cols@rownames <- NULL |
|
80 | 79 |
|
81 |
- colnames(ans) <- sub("\\.hits$", "", colnames(ans)) |
|
82 | 80 |
gene_id <- names(edges_by_gene) |
83 | 81 |
tx_id <- unique(regroup(edge_data[ , "tx_id"], edge_data_breakpoints)) |
84 |
- cbind(DataFrame(gene_id=gene_id, tx_id=tx_id), ans) |
|
82 |
+ cbind(DataFrame(gene_id=gene_id, tx_id=tx_id), hits_cols) |
|
85 | 83 |
} |
86 | 84 |
|
85 |
+setGeneric("getReads", signature="x", |
|
86 |
+ function(x, by=c("sgedge", "rsgedge", "tx", "gene")) |
|
87 |
+ standardGeneric("getReads") |
|
88 |
+) |
|
89 |
+ |
|
90 |
+### Return a DataFrame with 1 row per splicing graph edge (or reduced |
|
91 |
+### splicing graph edge), and 1 column per sample. |
|
92 |
+setMethod("getReads", "SplicingGraphs", |
|
93 |
+ function(x, by=c("sgedge", "rsgedge", "tx", "gene")) |
|
94 |
+ { |
|
95 |
+ by <- match.arg(by) |
|
96 |
+ switch(by, |
|
97 |
+ sgedge=.getReads_by_sgedge(x), |
|
98 |
+ rsgedge=.getReads_by_rsgedge(x), |
|
99 |
+ tx=.getReads_by_tx(x), |
|
100 |
+ gene=.getReads_by_gene(x)) |
|
101 |
+ } |
|
102 |
+) |
|
103 |
+ |
|
104 |
+ |
|
105 |
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
106 |
+### countReads() |
|
107 |
+### |
|
108 |
+ |
|
87 | 109 |
setGeneric("countReads", signature="x", |
88 | 110 |
function(x, by=c("sgedge", "rsgedge", "tx", "gene")) |
89 | 111 |
standardGeneric("countReads") |
... | ... |
@@ -94,12 +116,13 @@ setGeneric("countReads", signature="x", |
94 | 116 |
setMethod("countReads", "SplicingGraphs", |
95 | 117 |
function(x, by=c("sgedge", "rsgedge", "tx", "gene")) |
96 | 118 |
{ |
97 |
- by <- match.arg(by) |
|
98 |
- switch(by, |
|
99 |
- sgedge=.countReads_by_sgedge(x), |
|
100 |
- rsgedge=.countReads_by_rsgedge(x), |
|
101 |
- tx=.countReads_by_tx(x), |
|
102 |
- gene=.countReads_by_gene(x)) |
|
119 |
+ assigned_reads <- getReads(x, by=by) |
|
120 |
+ hits_col_idx <- grep("\\.hits$", colnames(assigned_reads)) |
|
121 |
+ if (length(hits_col_idx) == 0L) |
|
122 |
+ return(assigned_reads) |
|
123 |
+ read_counts <- endoapply(assigned_reads[hits_col_idx], elementLengths) |
|
124 |
+ colnames(read_counts) <- sub("\\.hits$", "", colnames(read_counts)) |
|
125 |
+ cbind(assigned_reads[-hits_col_idx], read_counts) |
|
103 | 126 |
} |
104 | 127 |
) |
105 | 128 |
|
... | ... |
@@ -2,19 +2,10 @@ o Replace the RNA-seq data used in the vignette by data that actually |
2 | 2 |
contains junction reads. The BAM files in TBX20BamSubset don't contain |
3 | 3 |
any! |
4 | 4 |
|
5 |
-o Move vignette to vignettes/ |
|
6 |
- |
|
7 |
-o Implement rsgedgesByTranscript(). |
|
8 |
- |
|
9 | 5 |
o Complete vignette (get rid of all TODO from it). |
10 | 6 |
|
11 | 7 |
o Complete man pages (get rid of all TODO from them). |
12 | 8 |
|
13 |
-o Fix issue with calls to plotting functions requiring 1 more key-stroke than |
|
14 |
- necessary before they actually start to plot something. |
|
15 |
- |
|
16 |
-o Add by="gene" to countReads() for counting of unique reads per gene. |
|
17 |
- |
|
18 | 9 |
o Add 'drop.ambiguous.hits' arg to countReads() for dropping reads that are |
19 | 10 |
assigned to multiple edges (when by="sgedge"), to multiple reduced edges |
20 | 11 |
(when by="rsgedge"), to multiple transcripts (when by="tx"), or to |
... | ... |
@@ -30,3 +21,11 @@ o Add 'drop.ambiguous.hits' arg to countReads() for dropping reads that are |
30 | 21 |
countReads(sg, by="tx", drop.ambiguous.hits=TRUE) and compare. Should |
31 | 22 |
produce the same result. |
32 | 23 |
|
24 |
+o Fix issue with calls to plotting functions requiring 1 more key-stroke than |
|
25 |
+ necessary before they actually start to plot something. |
|
26 |
+ |
|
27 |
+o Implement rsgedgesByTranscript(). |
|
28 |
+ |
|
29 |
+o Move vignette to vignettes/ |
|
30 |
+ |
|
31 |
+ |
... | ... |
@@ -74,7 +74,7 @@ |
74 | 74 |
SplicingGraphs object. |
75 | 75 |
|
76 | 76 |
\item \code{\link{countReads}} for summarizing the reads assigned to |
77 |
- the edges of a SplicingGraphs object. |
|
77 |
+ a SplicingGraphs object. |
|
78 | 78 |
|
79 | 79 |
\item \code{\link{toy_genes_gff}} for details about the toy data included |
80 | 80 |
in this package. |
... | ... |
@@ -7,15 +7,21 @@ |
7 | 7 |
|
8 | 8 |
|
9 | 9 |
\title{ |
10 |
- Summarize the reads assigned to the edges of a SplicingGraphs object |
|
10 |
+ Summarize the reads assigned to a SplicingGraphs object |
|
11 | 11 |
} |
12 | 12 |
|
13 | 13 |
\description{ |
14 |
- \code{countReads} returns a summarized count of the reads assigned to |
|
15 |
- the edges of a SplicingGraphs object. |
|
14 |
+ \code{getReads} returns the reads assigned to a SplicingGraphs object, |
|
15 |
+ summarized either by splicing graph edge, \emph{reduced} splicing graph |
|
16 |
+ edge, transcript, or gene. |
|
17 |
+ |
|
18 |
+ \code{countReads} counts the reads assigned to a SplicingGraphs object. |
|
19 |
+ The counting can be done by splicing graph edge, \emph{reduced} splicing |
|
20 |
+ graph edge, transcript, or gene. |
|
16 | 21 |
} |
17 | 22 |
|
18 | 23 |
\usage{ |
24 |
+getReads(x, by=c("sgedge", "rsgedge", "tx", "gene")) |
|
19 | 25 |
countReads(x, by=c("sgedge", "rsgedge", "tx", "gene")) |
20 | 26 |
} |
21 | 27 |
|
... | ... |
@@ -24,9 +30,9 @@ countReads(x, by=c("sgedge", "rsgedge", "tx", "gene")) |
24 | 30 |
A \link{SplicingGraphs} object. |
25 | 31 |
} |
26 | 32 |
\item{by}{ |
27 |
- Summarize by splicing graph edge (\code{by="sgedge"}), by \emph{reduced} |
|
28 |
- splicing graph edge (\code{by="rsgedge"}), by transcript (\code{by="tx"}), |
|
29 |
- or by gene (\code{by="gene"}). |
|
33 |
+ Summarize/count by splicing graph edge (\code{by="sgedge"}), by |
|
34 |
+ \emph{reduced} splicing graph edge (\code{by="rsgedge"}), by transcript |
|
35 |
+ (\code{by="tx"}), or by gene (\code{by="gene"}). |
|
30 | 36 |
} |
31 | 37 |
} |
32 | 38 |
|
... | ... |
@@ -43,8 +49,9 @@ countReads(x, by=c("sgedge", "rsgedge", "tx", "gene")) |
43 | 49 |
\item gene if \code{by="gene"}. |
44 | 50 |
} |
45 | 51 |
|
46 |
- And with one column per sample (containing the counts for that sample), |
|
47 |
- plus the two following additional leading columns: |
|
52 |
+ And with one column per sample (containing the reads for that sample for |
|
53 |
+ \code{getReads}, and the counts for that sample for \code{countReads}), |
|
54 |
+ plus the following two additional leading columns: |
|
48 | 55 |
\itemize{ |
49 | 56 |
\item if \code{by="sgedge"}: \code{"sgedge_id"}, containing the |
50 | 57 |
\emph{global splicing graph edge ids}, and \code{"ex_or_in"}, |
... | ... |
@@ -76,16 +83,32 @@ countReads(x, by=c("sgedge", "rsgedge", "tx", "gene")) |
76 | 83 |
example(assignReads) |
77 | 84 |
|
78 | 85 |
## --------------------------------------------------------------------- |
79 |
-## 2. Summarize the reads assigned to 'sg' |
|
86 |
+## 2. Summarize the reads by splicing graph edge |
|
87 |
+## --------------------------------------------------------------------- |
|
88 |
+getReads(sg) |
|
89 |
+countReads(sg) |
|
90 |
+ |
|
91 |
+## --------------------------------------------------------------------- |
|
92 |
+## 3. Summarize the reads by reduced splicing graph edge |
|
93 |
+## --------------------------------------------------------------------- |
|
94 |
+getReads(sg, by="rsgedge") |
|
95 |
+countReads(sg, by="rsgedge") |
|
96 |
+ |
|
97 |
+## --------------------------------------------------------------------- |
|
98 |
+## 4. Summarize the reads by transcript |
|
99 |
+## --------------------------------------------------------------------- |
|
100 |
+getReads(sg, by="tx") |
|
101 |
+countReads(sg, by="tx") |
|
102 |
+ |
|
103 |
+## --------------------------------------------------------------------- |
|
104 |
+## 4. Summarize the reads by gene |
|
80 | 105 |
## --------------------------------------------------------------------- |
81 |
-countReads(sg) # nb of reads per splicing graph edge |
|
82 |
-countReads(sg, by="rsgedge") # ... per reduced splicing graph edge |
|
83 |
-countReads(sg, by="tx") # ... per transcript |
|
84 |
-countReads(sg, by="gene") # ... per gene |
|
106 |
+getReads(sg, by="gene") |
|
107 |
+countReads(sg, by="gene") |
|
85 | 108 |
|
86 | 109 |
## --------------------------------------------------------------------- |
87 |
-## 3. Remove the reads from 'sg'. |
|
110 |
+## 5. Remove the reads from 'sg'. |
|
88 | 111 |
## --------------------------------------------------------------------- |
89 |
-removeReads(sg) |
|
112 |
+sg <- removeReads(sg) |
|
90 | 113 |
countReads(sg) |
91 | 114 |
} |