Browse code

- compare() was renamed pcompare() in S4Vectors --> change code accordingly - use UTF-8 encoding in DESCRIPTION and Rd files so I can have my accents back

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SplicingGraphs@119306 bc3139a8-67e5-0310-9ffc-ced21a209358

Herve Pages authored on 12/07/2016 09:05:23
Showing 1 changed files
... ...
@@ -147,7 +147,7 @@ reportReads(x, by=c("sgedge", "rsgedge", "tx", "gene"))
147 147
 }
148 148
 
149 149
 \author{
150
-  H. Pages
150
+  H. Pagès
151 151
 }
152 152
 
153 153
 \seealso{
Browse code

fix some broken links

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SplicingGraphs@98050 bc3139a8-67e5-0310-9ffc-ced21a209358

Herve Pages authored on 06/01/2015 07:13:31
Showing 1 changed files
... ...
@@ -113,7 +113,7 @@ reportReads(x, by=c("sgedge", "rsgedge", "tx", "gene"))
113 113
 }
114 114
 
115 115
 \value{
116
-  A \link[IRanges]{DataFrame} object with one row per:
116
+  A \link[S4Vectors]{DataFrame} object with one row per:
117 117
   \itemize{
118 118
     \item unique splicing graph edge, if \code{by="sgedge"};
119 119
     \item unique \emph{reduced} splicing graph edge, if \code{by="rsgedge"};
Browse code

arghh, forgot to escape % when using %in% in \examples section

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SplicingGraphs@76172 bc3139a8-67e5-0310-9ffc-ced21a209358

Herve Pages authored on 01/05/2013 17:55:45
Showing 1 changed files
... ...
@@ -211,7 +211,7 @@ ambiguous_reads
211 211
 
212 212
 ## Reads that are ambiguous at the "rsgedge" level must also be
213 213
 ## ambiguous at the "sgedge" level:
214
-stopifnot(all(ambiguous_reads$rsgedge %in% ambiguous_reads$sgedge))
214
+stopifnot(all(ambiguous_reads$rsgedge \%in\% ambiguous_reads$sgedge))
215 215
 
216 216
 ## However, there is no reason why reads that are ambiguous at the
217 217
 ## "tx" level should also be ambiguous at the "sgedge" or "rsgedge"
Browse code

Put \pkg{} around package names in the man pages.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SplicingGraphs@76137 bc3139a8-67e5-0310-9ffc-ced21a209358

Herve Pages authored on 30/04/2013 01:20:58
Showing 1 changed files
... ...
@@ -151,7 +151,7 @@ reportReads(x, by=c("sgedge", "rsgedge", "tx", "gene"))
151 151
 }
152 152
 
153 153
 \seealso{
154
-  This man page is part of the SplicingGraphs package.
154
+  This man page is part of the \pkg{SplicingGraphs} package.
155 155
   Please see \code{?`\link{SplicingGraphs-package}`} for an overview of the
156 156
   package and for an index of its man pages.
157 157
 }
Browse code

\tabular produces an ugly table. Using \preformatted instead.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SplicingGraphs@76133 bc3139a8-67e5-0310-9ffc-ced21a209358

Herve Pages authored on 29/04/2013 22:17:15
Showing 1 changed files
... ...
@@ -73,11 +73,12 @@ reportReads(x, by=c("sgedge", "rsgedge", "tx", "gene"))
73 73
     resolution is defined by combining the relationships between consecutive
74 74
     levels. All possible parent-child relationships are summarized in the
75 75
     following table:
76
-    \tabular{rlll}{
77
-                    \tab to: sgedge   \tab to: rsgedge  \tab to: tx      \cr
78
-      from: rsgedge \tab one-to-many  \tab              \tab             \cr
79
-      from: tx      \tab many-to-many \tab many-to-many \tab             \cr
80
-      from: gene    \tab one-to-many  \tab one-to-many  \tab one-to-many \cr
76
+    \preformatted{
77
+                    | to: sgedge   | to: rsgedge  | to: tx
78
+      --------------+--------------+--------------+------------
79
+      from: rsgedge | one-to-many  |              |            
80
+      from: tx      | many-to-many | many-to-many |            
81
+      from: gene    | one-to-many  | one-to-many  | one-to-many
81 82
     }
82 83
   }
83 84
 
Browse code

- Rename getReads() -> reportReads(). - Complete man page for countReads()/reportReads().

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SplicingGraphs@76132 bc3139a8-67e5-0310-9ffc-ced21a209358

Herve Pages authored on 29/04/2013 22:07:38
Showing 1 changed files
... ...
@@ -11,18 +11,18 @@
11 11
 }
12 12
 
13 13
 \description{
14
-  \code{getReads} returns the reads assigned to a SplicingGraphs object,
15
-  summarized either by splicing graph edge, \emph{reduced} splicing graph
16
-  edge, transcript, or gene.
17
-
18 14
   \code{countReads} counts the reads assigned to a SplicingGraphs object.
19 15
   The counting can be done by splicing graph edge, \emph{reduced} splicing
20 16
   graph edge, transcript, or gene.
17
+
18
+  \code{reportReads} is similar to \code{countReads} but returns right before
19
+  the final counting step, that is, the returned DataFrame contains the reads
20
+  instead of their counts.
21 21
 }
22 22
 
23 23
 \usage{
24
-getReads(x, by=c("sgedge", "rsgedge", "tx", "gene"))
25 24
 countReads(x, by=c("sgedge", "rsgedge", "tx", "gene"))
25
+reportReads(x, by=c("sgedge", "rsgedge", "tx", "gene"))
26 26
 }
27 27
 
28 28
 \arguments{
... ...
@@ -30,14 +30,85 @@ countReads(x, by=c("sgedge", "rsgedge", "tx", "gene"))
30 30
     A \link{SplicingGraphs} object.
31 31
   }
32 32
   \item{by}{
33
-    Summarize/count by splicing graph edge (\code{by="sgedge"}), by
34
-    \emph{reduced} splicing graph edge (\code{by="rsgedge"}), by transcript
35
-    (\code{by="tx"}), or by gene (\code{by="gene"}).
33
+    Can be \code{"sgedge"}, \code{"rsgedge"}, \code{"tx"}, or \code{"gene"}.
34
+    Specifies the \emph{level of resolution} that summarization should be
35
+    performed at. See Details section below.
36 36
   }
37 37
 }
38 38
 
39 39
 \details{
40
-  TODO
40
+  \subsection{Levels of resolution}{
41
+    \code{countReads} and \code{reportReads} allow summarization of the reads
42
+    at different levels of resolution. The level of resolution is determined
43
+    by the type of feature that one chooses via the \code{by} argument.
44
+    The supported resolutions are (from highest to lowest resolution):
45
+    \enumerate{
46
+      \item \code{by="sgedge"} for summarization at the splicing graph edge
47
+            level (i.e. at the exons/intron level);
48
+      \item \code{by="rsgedge"} for summarization at the \emph{reduced}
49
+            splicing graph edge level;
50
+      \item \code{by="tx"} for summarization at the transcript level;
51
+      \item \code{by="gene"} for summarization at the gene level.
52
+    }
53
+  }
54
+
55
+  \subsection{Relationship between levels of resolution}{
56
+    There is a parent-child relationship between the features
57
+    corresponding to a given level of resolution (the parent features)
58
+    and those corresponding to a higher level of resolution (the child
59
+    features).
60
+
61
+    For example, in the case of the 2 first levels of resolution listed
62
+    above, the parent-child relationship is the following: the parent
63
+    features are the \emph{reduced} splicing graph edges, the child features
64
+    are the splicing graph edges, and each parent feature is obtained by
65
+    merging one or more child features together.
66
+    Similarly, transcripts can be seen as parent features of \emph{reduced}
67
+    splicing graph edges, and genes as parent features of transcripts.
68
+    Note that, the rsgedge/sgedge and gene/tx relationships are one-to-many,
69
+    but the tx/rsgedge relationship is many-to-many because a given edge can
70
+    belong to more than one transcript.
71
+
72
+    Finally the parent-child relationships between 2 arbitrary levels of
73
+    resolution is defined by combining the relationships between consecutive
74
+    levels. All possible parent-child relationships are summarized in the
75
+    following table:
76
+    \tabular{rlll}{
77
+                    \tab to: sgedge   \tab to: rsgedge  \tab to: tx      \cr
78
+      from: rsgedge \tab one-to-many  \tab              \tab             \cr
79
+      from: tx      \tab many-to-many \tab many-to-many \tab             \cr
80
+      from: gene    \tab one-to-many  \tab one-to-many  \tab one-to-many \cr
81
+    }
82
+  }
83
+
84
+  \subsection{Multiple hits and ambiguous reads}{
85
+    An important distinction needs to be made between a read that hits a
86
+    given feature multiple times and a read that hits more than one feature.
87
+
88
+    If the former, the read is counted/reported only once for that feature.
89
+    For example, when summarizing at the transcript level, a read is
90
+    counted/reported only once for a given transcript, even if that read
91
+    hits more than one splicing graph edge (or \emph{reduced} splicing graph
92
+    edge) associated with that transcript.
93
+
94
+    If the latter, the read is said to be \emph{ambiguous}. An ambiguous read
95
+    is currently counted/reported for each feature where it has a hit.
96
+    This is a temporary situation: in the near future the user will be offered
97
+    options to handle ambiguous reads in different ways.
98
+  }
99
+
100
+  \subsection{Ambiguous reads and levels of resolution}{
101
+    A read might be ambiguous at one level of resolution but not at the other.
102
+    Also the number of ambiguous reads is typically affected by the level
103
+    of resolution. However, even though higher resolution generally means
104
+    more ambiguous reads, this is only true when the switch from one level
105
+    of resolution to the other implies a parent-child relationship between
106
+    features that is one-to-many.
107
+    So, based on the above table, this is always true, except when
108
+    switching from using \code{by="tx"} to using \code{by="sgedge"} or
109
+    \code{by="rsgedge"}. In those cases, the switch can produce more
110
+    ambiguities but it can also produce less.
111
+  }
41 112
 }
42 113
 
43 114
 \value{
... ...
@@ -49,9 +120,9 @@ countReads(x, by=c("sgedge", "rsgedge", "tx", "gene"))
49 120
     \item gene if \code{by="gene"}.
50 121
   }
51 122
 
52
-  And with one column per sample (containing the reads for that sample for
53
-  \code{getReads}, and the counts for that sample for \code{countReads}),
54
-  plus the following two additional leading columns:
123
+  And with one column per sample (containing the counts for that sample for
124
+  \code{countReads}, and the reads for that sample for \code{reportReads}),
125
+  plus the two following left columns:
55 126
   \itemize{
56 127
     \item if \code{by="sgedge"}: \code{"sgedge_id"}, containing the
57 128
           \emph{global splicing graph edge ids}, and \code{"ex_or_in"},
... ...
@@ -63,6 +134,15 @@ countReads(x, by=c("sgedge", "rsgedge", "tx", "gene"))
63 134
     \item if \code{by="tx"}: \code{"tx_id"} and \code{"gene_id"};
64 135
     \item if \code{by="gene"}: \code{"gene_id"} and \code{"tx_id"}.
65 136
   }
137
+
138
+  For \code{countReads}, each column of counts is of type integer and
139
+  is named after the corresponding sample.
140
+  For \code{reportReads}, each column of reads is a CharacterList object
141
+  and its name is the name of the corresponding sample with the
142
+  \code{".hits"} suffix added to it.
143
+  In both cases, the name of the sample is the name that was passed to
144
+  \code{assignReads} when the reads of a given sample were initially
145
+  assigned. See \code{?\link{assignReads}} for more information.
66 146
 }
67 147
 
68 148
 \author{
... ...
@@ -85,29 +165,59 @@ example(assignReads)
85 165
 ## ---------------------------------------------------------------------
86 166
 ## 2. Summarize the reads by splicing graph edge
87 167
 ## ---------------------------------------------------------------------
88
-getReads(sg) 
89 168
 countReads(sg)
169
+reportReads(sg) 
90 170
 
91 171
 ## ---------------------------------------------------------------------
92 172
 ## 3. Summarize the reads by reduced splicing graph edge
93 173
 ## ---------------------------------------------------------------------
94
-getReads(sg, by="rsgedge")
95 174
 countReads(sg, by="rsgedge")
175
+reportReads(sg, by="rsgedge")
96 176
 
97 177
 ## ---------------------------------------------------------------------
98 178
 ## 4. Summarize the reads by transcript
99 179
 ## ---------------------------------------------------------------------
100
-getReads(sg, by="tx")
101 180
 countReads(sg, by="tx")
181
+reportReads(sg, by="tx")
102 182
 
103 183
 ## ---------------------------------------------------------------------
104
-## 4. Summarize the reads by gene
184
+## 5. Summarize the reads by gene
105 185
 ## ---------------------------------------------------------------------
106
-getReads(sg, by="gene")
107 186
 countReads(sg, by="gene")
187
+reportReads(sg, by="gene")
188
+
189
+## ---------------------------------------------------------------------
190
+## 6. A close look at ambiguous reads
191
+## ---------------------------------------------------------------------
192
+resolutions <- c("sgedge", "rsgedge", "tx", "gene")
193
+
194
+reported_reads <- lapply(resolutions,
195
+    function(by) {
196
+        reported_reads <- reportReads(sg, by=by)
197
+        unlist(reported_reads$TOYREADS.hits)
198
+    })
199
+
200
+## The set of reported reads is the same at all levels of resolution:
201
+unique_reported_reads <- lapply(reported_reads, unique)
202
+stopifnot(identical(unique_reported_reads,
203
+                    rep(unique_reported_reads[1], 4)))
204
+
205
+## Extract ambigous reads for each level of resolution:
206
+ambiguous_reads <- lapply(reported_reads,
207
+                          function(x) unique(x[duplicated(x)]))
208
+names(ambiguous_reads) <- resolutions
209
+ambiguous_reads
210
+
211
+## Reads that are ambiguous at the "rsgedge" level must also be
212
+## ambiguous at the "sgedge" level:
213
+stopifnot(all(ambiguous_reads$rsgedge %in% ambiguous_reads$sgedge))
214
+
215
+## However, there is no reason why reads that are ambiguous at the
216
+## "tx" level should also be ambiguous at the "sgedge" or "rsgedge"
217
+## level!
108 218
 
109 219
 ## ---------------------------------------------------------------------
110
-## 5. Remove the reads from 'sg'.
220
+## 7. Remove the reads from 'sg'.
111 221
 ## ---------------------------------------------------------------------
112 222
 sg <- removeReads(sg)
113 223
 countReads(sg)
Browse code

Add getReads(). Similar to countReads() but returns right before the final counting step, that is, the returned DataFrame contains the reads instead of their counts.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SplicingGraphs@76127 bc3139a8-67e5-0310-9ffc-ced21a209358

Herve Pages authored on 29/04/2013 18:02:30
Showing 1 changed files
... ...
@@ -7,15 +7,21 @@
7 7
 
8 8
 
9 9
 \title{
10
-  Summarize the reads assigned to the edges of a SplicingGraphs object
10
+  Summarize the reads assigned to a SplicingGraphs object
11 11
 }
12 12
 
13 13
 \description{
14
-  \code{countReads} returns a summarized count of the reads assigned to
15
-  the edges of a SplicingGraphs object.
14
+  \code{getReads} returns the reads assigned to a SplicingGraphs object,
15
+  summarized either by splicing graph edge, \emph{reduced} splicing graph
16
+  edge, transcript, or gene.
17
+
18
+  \code{countReads} counts the reads assigned to a SplicingGraphs object.
19
+  The counting can be done by splicing graph edge, \emph{reduced} splicing
20
+  graph edge, transcript, or gene.
16 21
 }
17 22
 
18 23
 \usage{
24
+getReads(x, by=c("sgedge", "rsgedge", "tx", "gene"))
19 25
 countReads(x, by=c("sgedge", "rsgedge", "tx", "gene"))
20 26
 }
21 27
 
... ...
@@ -24,9 +30,9 @@ countReads(x, by=c("sgedge", "rsgedge", "tx", "gene"))
24 30
     A \link{SplicingGraphs} object.
25 31
   }
26 32
   \item{by}{
27
-    Summarize by splicing graph edge (\code{by="sgedge"}), by \emph{reduced}
28
-    splicing graph edge (\code{by="rsgedge"}), by transcript (\code{by="tx"}),
29
-    or by gene (\code{by="gene"}).
33
+    Summarize/count by splicing graph edge (\code{by="sgedge"}), by
34
+    \emph{reduced} splicing graph edge (\code{by="rsgedge"}), by transcript
35
+    (\code{by="tx"}), or by gene (\code{by="gene"}).
30 36
   }
31 37
 }
32 38
 
... ...
@@ -43,8 +49,9 @@ countReads(x, by=c("sgedge", "rsgedge", "tx", "gene"))
43 49
     \item gene if \code{by="gene"}.
44 50
   }
45 51
 
46
-  And with one column per sample (containing the counts for that sample),
47
-  plus the two following additional leading columns:
52
+  And with one column per sample (containing the reads for that sample for
53
+  \code{getReads}, and the counts for that sample for \code{countReads}),
54
+  plus the following two additional leading columns:
48 55
   \itemize{
49 56
     \item if \code{by="sgedge"}: \code{"sgedge_id"}, containing the
50 57
           \emph{global splicing graph edge ids}, and \code{"ex_or_in"},
... ...
@@ -76,16 +83,32 @@ countReads(x, by=c("sgedge", "rsgedge", "tx", "gene"))
76 83
 example(assignReads)
77 84
 
78 85
 ## ---------------------------------------------------------------------
79
-## 2. Summarize the reads assigned to 'sg'
86
+## 2. Summarize the reads by splicing graph edge
87
+## ---------------------------------------------------------------------
88
+getReads(sg) 
89
+countReads(sg)
90
+
91
+## ---------------------------------------------------------------------
92
+## 3. Summarize the reads by reduced splicing graph edge
93
+## ---------------------------------------------------------------------
94
+getReads(sg, by="rsgedge")
95
+countReads(sg, by="rsgedge")
96
+
97
+## ---------------------------------------------------------------------
98
+## 4. Summarize the reads by transcript
99
+## ---------------------------------------------------------------------
100
+getReads(sg, by="tx")
101
+countReads(sg, by="tx")
102
+
103
+## ---------------------------------------------------------------------
104
+## 4. Summarize the reads by gene
80 105
 ## ---------------------------------------------------------------------
81
-countReads(sg)  # nb of reads per splicing graph edge
82
-countReads(sg, by="rsgedge")  # ... per reduced splicing graph edge
83
-countReads(sg, by="tx")  # ... per transcript
84
-countReads(sg, by="gene")  # ... per gene
106
+getReads(sg, by="gene")
107
+countReads(sg, by="gene")
85 108
 
86 109
 ## ---------------------------------------------------------------------
87
-## 3. Remove the reads from 'sg'.
110
+## 5. Remove the reads from 'sg'.
88 111
 ## ---------------------------------------------------------------------
89
-removeReads(sg)
112
+sg <- removeReads(sg)
90 113
 countReads(sg)
91 114
 }
Browse code

countReads() now supports 'by="gene"' for counting hits by gene (in addition to 'by="sgedge"', 'by="rsgedge"', and 'by="tx"').

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SplicingGraphs@76090 bc3139a8-67e5-0310-9ffc-ced21a209358

Herve Pages authored on 26/04/2013 23:16:07
Showing 1 changed files
... ...
@@ -39,11 +39,12 @@ countReads(x, by=c("sgedge", "rsgedge", "tx", "gene"))
39 39
   \itemize{
40 40
     \item unique splicing graph edge, if \code{by="sgedge"};
41 41
     \item unique \emph{reduced} splicing graph edge, if \code{by="rsgedge"};
42
-    \item transcript if \code{by="tx"}.
42
+    \item transcript if \code{by="tx"};
43
+    \item gene if \code{by="gene"}.
43 44
   }
44 45
 
45
-  And with one column per sample (containing the counts for each sample),
46
-  and the two following additional leading columns:
46
+  And with one column per sample (containing the counts for that sample),
47
+  plus the two following additional leading columns:
47 48
   \itemize{
48 49
     \item if \code{by="sgedge"}: \code{"sgedge_id"}, containing the
49 50
           \emph{global splicing graph edge ids}, and \code{"ex_or_in"},
... ...
@@ -52,7 +53,8 @@ countReads(x, by=c("sgedge", "rsgedge", "tx", "gene"))
52 53
           \emph{global reduced splicing graph edge ids}, and
53 54
           \code{"ex_or_in"}, containing the type of edge (exon, intron,
54 55
           or mixed);
55
-    \item if \code{by="tx"}: \code{"tx_id"} and \code{"gene_id"}.
56
+    \item if \code{by="tx"}: \code{"tx_id"} and \code{"gene_id"};
57
+    \item if \code{by="gene"}: \code{"gene_id"} and \code{"tx_id"}.
56 58
   }
57 59
 }
58 60
 
... ...
@@ -74,22 +76,16 @@ countReads(x, by=c("sgedge", "rsgedge", "tx", "gene"))
74 76
 example(assignReads)
75 77
 
76 78
 ## ---------------------------------------------------------------------
77
-## 2. Count the number of reads per splicing graph edge
79
+## 2. Summarize the reads assigned to 'sg'
78 80
 ## ---------------------------------------------------------------------
79
-countReads(sg)
80
-
81
-## ---------------------------------------------------------------------
82
-## 3. Count the number of reads per reduced splicing graph edge
83
-## ---------------------------------------------------------------------
84
-countReads(sg, by="rsgedge")
81
+countReads(sg)  # nb of reads per splicing graph edge
82
+countReads(sg, by="rsgedge")  # ... per reduced splicing graph edge
83
+countReads(sg, by="tx")  # ... per transcript
84
+countReads(sg, by="gene")  # ... per gene
85 85
 
86 86
 ## ---------------------------------------------------------------------
87
-## 4. Count the number of reads per transcript
88
-## ---------------------------------------------------------------------
89
-countReads(sg, by="tx")
90
-
91
-## ---------------------------------------------------------------------
92
-## 5. Remove the reads from 'sg'.
87
+## 3. Remove the reads from 'sg'.
93 88
 ## ---------------------------------------------------------------------
94 89
 removeReads(sg)
90
+countReads(sg)
95 91
 }
Browse code

Split countReads-methods.R into 2 units: assignReads.R and countReads-methods.R

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SplicingGraphs@76089 bc3139a8-67e5-0310-9ffc-ced21a209358

Herve Pages authored on 26/04/2013 22:47:35
Showing 1 changed files
... ...
@@ -2,54 +2,31 @@
2 2
 
3 3
 \alias{countReads-methods}
4 4
 
5
-\alias{assignReads}
6 5
 \alias{countReads}
7 6
 \alias{countReads,SplicingGraphs-method}
8
-\alias{removeReads}
9 7
 
10 8
 
11 9
 \title{
12
-  Assign reads to the edges of a SplicingGraphs object and summarize them
10
+  Summarize the reads assigned to the edges of a SplicingGraphs object
13 11
 }
14 12
 
15 13
 \description{
16
-  \code{assignReads} assigns reads to the exonic and intronic edges of a
17
-  \link{SplicingGraphs} object.
18
-
19
-  \code{countReads} returns a summarized count of the assigned reads.
20
-
21
-  \code{removeReads} removes all the reads assigned to a
22
-  \link{SplicingGraphs} object.
14
+  \code{countReads} returns a summarized count of the reads assigned to
15
+  the edges of a SplicingGraphs object.
23 16
 }
24 17
 
25 18
 \usage{
26
-assignReads(sg, reads, sample.name=NA)
27
-
28
-countReads(x, by=c("sgedge", "rsgedge", "tx"))
29
-
30
-removeReads(sg)
19
+countReads(x, by=c("sgedge", "rsgedge", "tx", "gene"))
31 20
 }
32 21
 
33 22
 \arguments{
34
-  \item{sg, x}{
23
+  \item{x}{
35 24
     A \link{SplicingGraphs} object.
36 25
   }
37
-  \item{reads}{
38
-    A \link[GenomicRanges]{GAlignments},
39
-    \link[GenomicRanges]{GAlignmentPairs}, or
40
-    \link[GenomicRanges]{GRangesList} object, containing the
41
-    reads to assign to the exons and introns in \code{sg}.
42
-    It must have unique names on it, typically the QNAME ("query name")
43
-    field coming from the BAM file. More on this in the 'About the read
44
-    names' section below.
45
-  }
46
-  \item{sample.name}{
47
-    A single string containing the name of the sample where the reads
48
-    are coming from.
49
-  }
50 26
   \item{by}{
51
-    Summarize by splicing graph edge (\code{"sgedge"}), by \emph{reduced}
52
-    splicing graph edge (\code{"rsgedge"}), or by transcript (\code{"tx"}).
27
+    Summarize by splicing graph edge (\code{by="sgedge"}), by \emph{reduced}
28
+    splicing graph edge (\code{by="rsgedge"}), by transcript (\code{by="tx"}),
29
+    or by gene (\code{by="gene"}).
53 30
   }
54 31
 }
55 32
 
... ...
@@ -57,65 +34,16 @@ removeReads(sg)
57 34
   TODO
58 35
 }
59 36
 
60
-\section{About read names}{
61
-  The read names are typically imported from the BAM file by calling
62
-  \code{\link[GenomicRanges]{readGAlignments}} (or
63
-  \code{\link[GenomicRanges]{readGAlignmentPairs}}) with
64
-  \code{use.names=TRUE}. This extracts the "query names" from the
65
-  file (stored in the QNAME field), and makes them the names of the
66
-  returned object.
67
-
68
-  The \code{reads} object must have unique names on it. The presence of
69
-  duplicated names generally indicates one (or both) of the following
70
-  situations:
71
-
72
-  \itemize{
73
-    \item (a) \code{reads} contains paired-end reads that have not been
74
-              paired;
75
-    \item (b) some of the reads are \emph{secondary alignments}.
76
-  }
77
-
78
-  If (a): you can find out whether reads in a BAM file are single- or
79
-  paired-end with the \code{\link[Rsamtools]{quickCountBam}} utility
80
-  from the Rsamtools package. If they're paired-end, load them with
81
-  \code{\link[GenomicRanges]{readGAlignmentPairs}}
82
-  instead of \code{\link[GenomicRanges]{readGAlignments}}, and that
83
-  will pair them.
84
-
85
-  If (b): you can filter out secondary alignments by passing
86
-  \code{'isNotPrimaryRead=FALSE'} to \code{\link[Rsamtools]{scanBamFlag}}
87
-  when preparing the \link[Rsamtools]{ScanBamParam} object used to load
88
-  the reads. For example:
89
-  \preformatted{
90
-    library(Rsamtools)
91
-    flag0 <- scanBamFlag(isNotPrimaryRead=FALSE,
92
-                         isNotPassingQualityControls=FALSE,
93
-                         isDuplicate=FALSE)
94
-    param0 <- ScanBamParam(flag=flag0)
95
-    reads <- readGAlignments("path/to/BAM/file", use.names=TRUE,
96
-                                  param=param0)
97
-  }
98
-  This will filter out records that have flag 0x100 (secondary alignment)
99
-  set to 1. See \code{?\link[Rsamtools]{scanBamFlag}} in the Rsamtools
100
-  package for more information.
101
-  See the SAM Specs on the SAMtools project page at
102
-  \url{http://samtools.sourceforge.net/} for a description of the
103
-  SAM/BAM flags.
104
-}
105
-
106 37
 \value{
107
-  For \code{assignReads}: the supplied \link{SplicingGraphs} object with
108
-  the reads assigned to it.
109
-
110
-  For \code{countReads}: a \link[IRanges]{DataFrame} object.
111
-  It has one row per:
38
+  A \link[IRanges]{DataFrame} object with one row per:
112 39
   \itemize{
113 40
     \item unique splicing graph edge, if \code{by="sgedge"};
114 41
     \item unique \emph{reduced} splicing graph edge, if \code{by="rsgedge"};
115 42
     \item transcript if \code{by="tx"}.
116 43
   }
117
-  The returned \link[IRanges]{DataFrame} object has one column of counts per
118
-  sample, and the two following additional leading columns:
44
+
45
+  And with one column per sample (containing the counts for each sample),
46
+  and the two following additional leading columns:
119 47
   \itemize{
120 48
     \item if \code{by="sgedge"}: \code{"sgedge_id"}, containing the
121 49
           \emph{global splicing graph edge ids}, and \code{"ex_or_in"},
... ...
@@ -136,83 +64,32 @@ removeReads(sg)
136 64
   This man page is part of the SplicingGraphs package.
137 65
   Please see \code{?`\link{SplicingGraphs-package}`} for an overview of the
138 66
   package and for an index of its man pages.
139
-
140
-  Other topics related to this man page and documented in other packages:
141
-  \itemize{
142
-    \item The \link[GenomicRanges]{GRangesList},
143
-          \link[GenomicRanges]{GAlignments}, and
144
-          \link[GenomicRanges]{GAlignmentPairs} classes
145
-          in the GenomicRanges package.
146
-
147
-    \item The \code{\link[Rsamtools]{quickCountBam}} and
148
-          \code{\link[Rsamtools]{ScanBamParam}} functions in the
149
-          Rsamtools package.
150
-  }
151 67
 }
152 68
 
153 69
 \examples{
154 70
 ## ---------------------------------------------------------------------
155
-## 1. Make SplicingGraphs object 'sg' from toy gene model (see
156
-##    '?SplicingGraphs')
157
-## ---------------------------------------------------------------------
158
-example(SplicingGraphs) 
159
-sg
160
-
161
-## 'sg' has 1 element per gene and 'names(sg)' gives the gene ids.
162
-names(sg)
163
-
71
+## 1. Make SplicingGraphs object 'sg' from toy gene model and assign toy
72
+##    reads to it (see '?assignReads')
164 73
 ## ---------------------------------------------------------------------
165
-## 2. Load toy reads
166
-## ---------------------------------------------------------------------
167
-## Load toy reads (single-end) from a BAM file. We filter out secondary
168
-## alignments, reads not passing quality controls, and PCR or optical
169
-## duplicates (see ?scanBamFlag in the Rsamtools package for more
170
-## information):
171
-flag0 <- scanBamFlag(isNotPrimaryRead=FALSE,
172
-                     isNotPassingQualityControls=FALSE,
173
-                     isDuplicate=FALSE)
174
-param0 <- ScanBamParam(flag=flag0)
175
-gal <- readGAlignments(toy_reads_bam(), use.names=TRUE, param=param0)
176
-gal
177
-
178
-## ---------------------------------------------------------------------
179
-## 3. Assign the reads to the exons and introns in 'sg'
180
-## ---------------------------------------------------------------------
181
-## The same read can be assigned to more than 1 exon or intron (e.g. a
182
-## junction read with 1 gap can be assigned to 2 exons and 1 intron).
183
-sg <- assignReads(sg, gal, sample.name="TOYREADS")
184
-
185
-## See the assignments to the splicing graph edges.
186
-edge_by_tx <- sgedgesByTranscript(sg, with.hits.mcols=TRUE)
187
-edge_data <- mcols(unlist(edge_by_tx))
188
-colnames(edge_data)
189
-head(edge_data)
190
-edge_data[ , c("sgedge_id", "TOYREADS.hits")]
191
-
192
-edge_by_gene <- sgedgesByGene(sg, with.hits.mcols=TRUE)
193
-mcols(unlist(edge_by_gene))
194
-
195
-## See the assignments to the reduced splicing graph edges.
196
-redge_by_gene <- rsgedgesByGene(sg, with.hits.mcols=TRUE)
197
-mcols(unlist(redge_by_gene))
74
+example(assignReads)
198 75
 
199 76
 ## ---------------------------------------------------------------------
200
-## 4. Count the number of reads per splicing graph edge
77
+## 2. Count the number of reads per splicing graph edge
201 78
 ## ---------------------------------------------------------------------
202 79
 countReads(sg)
203 80
 
204 81
 ## ---------------------------------------------------------------------
205
-## 5. Count the number of reads per reduced splicing graph edge
82
+## 3. Count the number of reads per reduced splicing graph edge
206 83
 ## ---------------------------------------------------------------------
207 84
 countReads(sg, by="rsgedge")
208 85
 
209 86
 ## ---------------------------------------------------------------------
210
-## 6. Count the number of reads per transcript
87
+## 4. Count the number of reads per transcript
211 88
 ## ---------------------------------------------------------------------
212 89
 countReads(sg, by="tx")
213 90
 
214 91
 ## ---------------------------------------------------------------------
215
-## 7. Remove the reads from 'sg'.
92
+## 5. Remove the reads from 'sg'.
216 93
 ## ---------------------------------------------------------------------
217 94
 removeReads(sg)
218 95
 }
Browse code

Starting unit tests + other minor tweaks.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/SplicingGraphs@76087 bc3139a8-67e5-0310-9ffc-ced21a209358

Herve Pages authored on 26/04/2013 22:09:30
Showing 1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,218 @@
1
+\name{countReads-methods}
2
+
3
+\alias{countReads-methods}
4
+
5
+\alias{assignReads}
6
+\alias{countReads}
7
+\alias{countReads,SplicingGraphs-method}
8
+\alias{removeReads}
9
+
10
+
11
+\title{
12
+  Assign reads to the edges of a SplicingGraphs object and summarize them
13
+}
14
+
15
+\description{
16
+  \code{assignReads} assigns reads to the exonic and intronic edges of a
17
+  \link{SplicingGraphs} object.
18
+
19
+  \code{countReads} returns a summarized count of the assigned reads.
20
+
21
+  \code{removeReads} removes all the reads assigned to a
22
+  \link{SplicingGraphs} object.
23
+}
24
+
25
+\usage{
26
+assignReads(sg, reads, sample.name=NA)
27
+
28
+countReads(x, by=c("sgedge", "rsgedge", "tx"))
29
+
30
+removeReads(sg)
31
+}
32
+
33
+\arguments{
34
+  \item{sg, x}{
35
+    A \link{SplicingGraphs} object.
36
+  }
37
+  \item{reads}{
38
+    A \link[GenomicRanges]{GAlignments},
39
+    \link[GenomicRanges]{GAlignmentPairs}, or
40
+    \link[GenomicRanges]{GRangesList} object, containing the
41
+    reads to assign to the exons and introns in \code{sg}.
42
+    It must have unique names on it, typically the QNAME ("query name")
43
+    field coming from the BAM file. More on this in the 'About the read
44
+    names' section below.
45
+  }
46
+  \item{sample.name}{
47
+    A single string containing the name of the sample where the reads
48
+    are coming from.
49
+  }
50
+  \item{by}{
51
+    Summarize by splicing graph edge (\code{"sgedge"}), by \emph{reduced}
52
+    splicing graph edge (\code{"rsgedge"}), or by transcript (\code{"tx"}).
53
+  }
54
+}
55
+
56
+\details{
57
+  TODO
58
+}
59
+
60
+\section{About read names}{
61
+  The read names are typically imported from the BAM file by calling
62
+  \code{\link[GenomicRanges]{readGAlignments}} (or
63
+  \code{\link[GenomicRanges]{readGAlignmentPairs}}) with
64
+  \code{use.names=TRUE}. This extracts the "query names" from the
65
+  file (stored in the QNAME field), and makes them the names of the
66
+  returned object.
67
+
68
+  The \code{reads} object must have unique names on it. The presence of
69
+  duplicated names generally indicates one (or both) of the following
70
+  situations:
71
+
72
+  \itemize{
73
+    \item (a) \code{reads} contains paired-end reads that have not been
74
+              paired;
75
+    \item (b) some of the reads are \emph{secondary alignments}.
76
+  }
77
+
78
+  If (a): you can find out whether reads in a BAM file are single- or
79
+  paired-end with the \code{\link[Rsamtools]{quickCountBam}} utility
80
+  from the Rsamtools package. If they're paired-end, load them with
81
+  \code{\link[GenomicRanges]{readGAlignmentPairs}}
82
+  instead of \code{\link[GenomicRanges]{readGAlignments}}, and that
83
+  will pair them.
84
+
85
+  If (b): you can filter out secondary alignments by passing
86
+  \code{'isNotPrimaryRead=FALSE'} to \code{\link[Rsamtools]{scanBamFlag}}
87
+  when preparing the \link[Rsamtools]{ScanBamParam} object used to load
88
+  the reads. For example:
89
+  \preformatted{
90
+    library(Rsamtools)
91
+    flag0 <- scanBamFlag(isNotPrimaryRead=FALSE,
92
+                         isNotPassingQualityControls=FALSE,
93
+                         isDuplicate=FALSE)
94
+    param0 <- ScanBamParam(flag=flag0)
95
+    reads <- readGAlignments("path/to/BAM/file", use.names=TRUE,
96
+                                  param=param0)
97
+  }
98
+  This will filter out records that have flag 0x100 (secondary alignment)
99
+  set to 1. See \code{?\link[Rsamtools]{scanBamFlag}} in the Rsamtools
100
+  package for more information.
101
+  See the SAM Specs on the SAMtools project page at
102
+  \url{http://samtools.sourceforge.net/} for a description of the
103
+  SAM/BAM flags.
104
+}
105
+
106
+\value{
107
+  For \code{assignReads}: the supplied \link{SplicingGraphs} object with
108
+  the reads assigned to it.
109
+
110
+  For \code{countReads}: a \link[IRanges]{DataFrame} object.
111
+  It has one row per:
112
+  \itemize{
113
+    \item unique splicing graph edge, if \code{by="sgedge"};
114
+    \item unique \emph{reduced} splicing graph edge, if \code{by="rsgedge"};
115
+    \item transcript if \code{by="tx"}.
116
+  }
117
+  The returned \link[IRanges]{DataFrame} object has one column of counts per
118
+  sample, and the two following additional leading columns:
119
+  \itemize{
120
+    \item if \code{by="sgedge"}: \code{"sgedge_id"}, containing the
121
+          \emph{global splicing graph edge ids}, and \code{"ex_or_in"},
122
+          containing the type of edge (exon or intron);
123
+    \item if \code{by="rsgedge"}: \code{"rsgedge_id"}, containing the
124
+          \emph{global reduced splicing graph edge ids}, and
125
+          \code{"ex_or_in"}, containing the type of edge (exon, intron,
126
+          or mixed);
127
+    \item if \code{by="tx"}: \code{"tx_id"} and \code{"gene_id"}.
128
+  }
129
+}
130
+
131
+\author{
132
+  H. Pages
133
+}
134
+
135
+\seealso{
136
+  This man page is part of the SplicingGraphs package.
137
+  Please see \code{?`\link{SplicingGraphs-package}`} for an overview of the
138
+  package and for an index of its man pages.
139
+
140
+  Other topics related to this man page and documented in other packages:
141
+  \itemize{
142
+    \item The \link[GenomicRanges]{GRangesList},
143
+          \link[GenomicRanges]{GAlignments}, and
144
+          \link[GenomicRanges]{GAlignmentPairs} classes
145
+          in the GenomicRanges package.
146
+
147
+    \item The \code{\link[Rsamtools]{quickCountBam}} and
148
+          \code{\link[Rsamtools]{ScanBamParam}} functions in the
149
+          Rsamtools package.
150
+  }
151
+}
152
+
153
+\examples{
154
+## ---------------------------------------------------------------------
155
+## 1. Make SplicingGraphs object 'sg' from toy gene model (see
156
+##    '?SplicingGraphs')
157
+## ---------------------------------------------------------------------
158
+example(SplicingGraphs) 
159
+sg
160
+
161
+## 'sg' has 1 element per gene and 'names(sg)' gives the gene ids.
162
+names(sg)
163
+
164
+## ---------------------------------------------------------------------
165
+## 2. Load toy reads
166
+## ---------------------------------------------------------------------
167
+## Load toy reads (single-end) from a BAM file. We filter out secondary
168
+## alignments, reads not passing quality controls, and PCR or optical
169
+## duplicates (see ?scanBamFlag in the Rsamtools package for more
170
+## information):
171
+flag0 <- scanBamFlag(isNotPrimaryRead=FALSE,
172
+                     isNotPassingQualityControls=FALSE,
173
+                     isDuplicate=FALSE)
174
+param0 <- ScanBamParam(flag=flag0)
175
+gal <- readGAlignments(toy_reads_bam(), use.names=TRUE, param=param0)
176
+gal
177
+
178
+## ---------------------------------------------------------------------
179
+## 3. Assign the reads to the exons and introns in 'sg'
180
+## ---------------------------------------------------------------------
181
+## The same read can be assigned to more than 1 exon or intron (e.g. a
182
+## junction read with 1 gap can be assigned to 2 exons and 1 intron).
183
+sg <- assignReads(sg, gal, sample.name="TOYREADS")
184
+
185
+## See the assignments to the splicing graph edges.
186
+edge_by_tx <- sgedgesByTranscript(sg, with.hits.mcols=TRUE)
187
+edge_data <- mcols(unlist(edge_by_tx))
188
+colnames(edge_data)
189
+head(edge_data)
190
+edge_data[ , c("sgedge_id", "TOYREADS.hits")]
191
+
192
+edge_by_gene <- sgedgesByGene(sg, with.hits.mcols=TRUE)
193
+mcols(unlist(edge_by_gene))
194
+
195
+## See the assignments to the reduced splicing graph edges.
196
+redge_by_gene <- rsgedgesByGene(sg, with.hits.mcols=TRUE)
197
+mcols(unlist(redge_by_gene))
198
+
199
+## ---------------------------------------------------------------------
200
+## 4. Count the number of reads per splicing graph edge
201
+## ---------------------------------------------------------------------
202
+countReads(sg)
203
+
204
+## ---------------------------------------------------------------------
205
+## 5. Count the number of reads per reduced splicing graph edge
206
+## ---------------------------------------------------------------------
207
+countReads(sg, by="rsgedge")
208
+
209
+## ---------------------------------------------------------------------
210
+## 6. Count the number of reads per transcript
211
+## ---------------------------------------------------------------------
212
+countReads(sg, by="tx")
213
+
214
+## ---------------------------------------------------------------------
215
+## 7. Remove the reads from 'sg'.
216
+## ---------------------------------------------------------------------
217
+removeReads(sg)
218
+}