Browse code

Renamed calculateGCContentByInterval to preprocessIntervals.

Markus Riester authored on 27/12/2017 02:30:38
Showing 16 changed files

... ...
@@ -26,6 +26,7 @@ export(plotAbs)
26 26
 export(plotBestNormal)
27 27
 export(poolCoverage)
28 28
 export(predictSomatic)
29
+export(preprocessIntervals)
29 30
 export(readCoverageFile)
30 31
 export(readCurationFile)
31 32
 export(runAbsoluteCN)
... ...
@@ -16,6 +16,7 @@ API CHANGES
16 16
       purity/ploidy solution)
17 17
     o changed default of runAbsoluteCN max.logr.sdev to 0.6 (from 0.75)
18 18
     o createTargetWeights does not require tumor coverages anymore
19
+    o calculateBamCoverageByInterval was renamed to preprocessIntervals
19 20
 
20 21
 
21 22
 
... ...
@@ -18,7 +18,7 @@
18 18
 #' @param keep.duplicates Keep or remove duplicated reads.
19 19
 #' @return Returns total and average coverage by intervals.
20 20
 #' @author Markus Riester
21
-#' @seealso \code{\link{calculateGCContentByInterval}
21
+#' @seealso \code{\link{preprocessIntervals}
22 22
 #' \link{correctCoverageBias} \link{runAbsoluteCN}}
23 23
 #' @examples
24 24
 #'
... ...
@@ -90,9 +90,8 @@ log.ratio.cutoffs = c(-0.9, 0.9), failed = NULL, all.genes = FALSE) {
90 90
 #' @param gc.gene.file A mapping file that assigns GC content and gene symbols
91 91
 #' to each exon in the coverage files. Used for generating gene-level calls.
92 92
 #' First column in format CHR:START-END. Second column GC content (0 to 1).
93
-#' Third column gene symbol. This file can be generated with the \sQuote{GATK
94
-#' GCContentByInterval} tool or with the
95
-#' \code{\link{calculateGCContentByInterval}} function.
93
+#' Third column gene symbol. This file is generated with the 
94
+#' \code{\link{preprocessIntervals}} function.
96 95
 #' @param fun.focal Function for identifying focal amplifications. Defaults to
97 96
 #' \code{\link{findFocal}}.
98 97
 #' @param args.focal Arguments for focal amplification function.
... ...
@@ -13,9 +13,8 @@ globalVariables(names=c("..level.."))
13 13
 #' @param gc.gene.file File providing GC content for each exon in the coverage
14 14
 #' files. First column in format CHR:START-END. Second column GC content (0 to
15 15
 #' 1).  Third column provides gene symbols, which are optional, but used in
16
-#' \code{\link{runAbsoluteCN}} to generate gene level calls. This file can be
17
-#' generated with GATK GCContentByInterval tool or with the
18
-#' \code{\link{calculateGCContentByInterval}} function.
16
+#' \code{\link{runAbsoluteCN}} to generate gene level calls. This file is 
17
+#' generated with the \code{\link{preprocessIntervals}} function.
19 18
 #' @param output.file Optionally, write file with GC corrected coverage. Can be
20 19
 #' read with the \code{\link{readCoverageFile}} function.
21 20
 #' @param plot.gc.bias Optionally, plot GC profiles of the pre-normalized and
... ...
@@ -27,7 +26,7 @@ globalVariables(names=c("..level.."))
27 26
 #' estimation is applied. If the \code{plot.gc.bias} parameter is set as
28 27
 #' \code{FALSE}, this will be ignored.
29 28
 #' @author Angad Singh, Markus Riester
30
-#' @seealso \code{\link{calculateGCContentByInterval}}
29
+#' @seealso \code{\link{preprocessIntervals}}
31 30
 #' @examples
32 31
 #' 
33 32
 #' normal.coverage.file <- system.file("extdata", "example_normal.txt", 
34 33
similarity index 88%
35 34
rename from R/calculateGCContentByInterval.R
36 35
rename to R/preprocessIntervals.R
... ...
@@ -1,9 +1,9 @@
1
-#' Calculates GC content by interval
1
+#' Preprocess intervals
2 2
 #' 
3
-#' Uses \code{scanFa} from the Rsamtools package to retrieve GC content of
4
-#' intervals in a reference FASTA file. Can optimize intervals for copy
5
-#' number calling by tiling long intervals and by including off-target regions.
6
-#' This optimization largely follows CNVkit. 
3
+#' Optimize intervals for copy number calling by tiling long intervals and by 
4
+#' including off-target regions. Uses \code{scanFa} from the Rsamtools package 
5
+#' to retrieve GC content of intervals in a reference FASTA file. If provided,
6
+#' will annotate intervals with mappability and replication timing scores.
7 7
 #' 
8 8
 #' @param interval.file File specifying the intervals. Interval is expected in
9 9
 #' first column in format CHR:START-END.  Instead of a file, a \code{GRanges}
... ...
@@ -44,25 +44,30 @@
44 44
 #'     package="PureCN", mustWork = TRUE)
45 45
 #' bed.file <- system.file("extdata", "ex2_intervals.bed", 
46 46
 #'     package="PureCN", mustWork = TRUE)
47
-#' calculateGCContentByInterval(interval.file, reference.file, 
47
+#' preprocessIntervals(interval.file, reference.file, 
48 48
 #'     output.file="gc_file.txt")
49 49
 #' 
50 50
 #' intervals <- import(bed.file)
51
-#' calculateGCContentByInterval(intervals, reference.file, 
51
+#' preprocessIntervals(intervals, reference.file, 
52 52
 #'     output.file="gc_file.txt")
53 53
 #' 
54
-#' @export calculateGCContentByInterval
54
+#' @export preprocessIntervals
55 55
 #' @importFrom rtracklayer import
56 56
 #' @importFrom Biostrings letterFrequency
57 57
 #' @importFrom BiocGenerics unstrand
58 58
 #' @importFrom stats aggregate
59 59
 #' @importFrom S4Vectors mcols
60 60
 #' @importFrom GenomeInfoDb seqlevelsInUse seqlengths seqlevels<-
61
-calculateGCContentByInterval <- function(interval.file, reference.file,
62
-output.file = NULL, off.target=FALSE, average.target.width=400, 
63
-min.off.target.width=20000, average.off.target.width=200000,  
64
-off.target.padding=-500, mappability=NULL, min.mappability=c(0.5,0.1,0.7),
65
-reptiming=NULL, off.target.seqlevels=c("targeted", "all")) {
61
+preprocessIntervals <- function(interval.file, reference.file,
62
+                                output.file = NULL, off.target = FALSE,
63
+                                average.target.width = 400,
64
+                                min.off.target.width = 20000,
65
+                                average.off.target.width = 200000,
66
+                                off.target.padding = -500, mappability = NULL,
67
+                                min.mappability = c(0.5, 0.1, 0.7), 
68
+                                reptiming = NULL,
69
+                                off.target.seqlevels=c("targeted", "all")) {
70
+
66 71
     if (class(interval.file)=="GRanges") {
67 72
         interval.gr <- .checkIntervals(interval.file)
68 73
     } else {
... ...
@@ -156,6 +161,18 @@ reptiming=NULL, off.target.seqlevels=c("targeted", "all")) {
156 161
     invisible(interval.gr)
157 162
 }
158 163
 
164
+#' Calculates GC content by interval
165
+#'
166
+#' This function was renamed to \code{\link{preprocessIntervals}}.
167
+#'
168
+#' @param ... Arguments passed to \code{\link{preprocessIntervals}}.
169
+#' 
170
+#' @export calculateGCContentByInterval
171
+calculateGCContentByInterval <- function(...) {
172
+    .Deprecated("preprocessIntervals")
173
+    preprocessIntervals(...)
174
+}    
175
+
159 176
 # this function removes short chromosomes that have no probes (mainly a
160 177
 # general way to remove chrM)
161 178
 .dropShortUntargetedSeqLevels <- function(offRegions, interval.gr, minSize) {
... ...
@@ -161,9 +161,8 @@
161 161
 #' @param gc.gene.file A mapping file that assigns GC content and gene symbols
162 162
 #' to each exon in the coverage files. Used for generating gene-level calls.
163 163
 #' First column in format CHR:START-END. Second column GC content (0 to 1).
164
-#' Third column gene symbol. This file can be generated with the \sQuote{GATK
165
-#' GCContentByInterval} tool or with the
166
-#' \code{\link{calculateGCContentByInterval}} function.
164
+#' Third column gene symbol. This file is generated with the
165
+#' \code{\link{preprocessIntervals}} function.
167 166
 #' @param max.dropout Measures GC bias as ratio of coverage in AT-rich (GC <
168 167
 #' 0.5) versus GC-rich on-target regions (GC >= 0.5). High drop-out might 
169 168
 #' indicate that  data was not GC-normalized or that the sample quality might 
... ...
@@ -10,16 +10,16 @@ option_list <- list(
10 10
         default = NULL,
11 11
         help = "Infile specifying target (baits) intervals. Needs to be parsable by rtracklayer."),
12 12
     make_option(c("--offtarget"), action = "store_true",
13
-        default = formals(PureCN::calculateGCContentByInterval)$off.target, 
13
+        default = formals(PureCN::preprocessIntervals)$off.target, 
14 14
         help = "Include off-target regions [default %default]"),
15 15
     make_option(c("--targetwidth"), action = "store", type = "integer",
16
-        default=formals(PureCN::calculateGCContentByInterval)$average.target.width, 
16
+        default=formals(PureCN::preprocessIntervals)$average.target.width, 
17 17
         help = "Split large targets to approximately that size [default %default]"),
18 18
     make_option(c("--offtargetwidth"), action = "store", type = "integer",
19
-        default = formals(PureCN::calculateGCContentByInterval)$average.off.target.width, 
19
+        default = formals(PureCN::preprocessIntervals)$average.off.target.width, 
20 20
         help = "Bin off-target regions to approximately that size [default %default]"),
21 21
     make_option(c("--offtargetseqlevels"), action = "store", type = "character",
22
-        default=formals(PureCN::calculateGCContentByInterval)$off.target.seqlevels[[2]], 
22
+        default=formals(PureCN::preprocessIntervals)$off.target.seqlevels[[2]], 
23 23
         help = "Controls how to deal with chromosomes/contigs not found in infile. One of targeted, all [default %default]"),
24 24
     make_option(c("--mappability"), action = "store", type = "character", 
25 25
         help = "File parsable by rtracklayer specifying mappability scores of genomic regions."),
... ...
@@ -110,7 +110,7 @@ if (!opt$offtarget) {
110 110
      " Amplicon data. Add --offtarget to include them.")
111 111
 }
112 112
 
113
-outGC <- calculateGCContentByInterval(intervals, reference.file, 
113
+outGC <- preprocessIntervals(intervals, reference.file, 
114 114
     output.file = outfile, off.target = opt$offtarget, 
115 115
     mappability = mappability, average.off.target.width = opt$offtargetwidth,
116 116
     reptiming = reptiming, off.target.seqlevels = opt$offtargetseqlevels,
... ...
@@ -11,6 +11,6 @@
11 11
   The following functions are deprecated and will be made defunct; use
12 12
   the replacement indicated below:
13 13
   \itemize{
14
-    \item{none}
14
+    \item{calculateGCContentByInterval: \code{\link{preprocessIntervals}}}
15 15
   }
16 16
 }
... ...
@@ -47,7 +47,7 @@ coverage <- calculateBamCoverageByInterval(bam.file = bam.file,
47 47
 
48 48
 }
49 49
 \seealso{
50
-\code{\link{calculateGCContentByInterval}
50
+\code{\link{preprocessIntervals}
51 51
 \link{correctCoverageBias} \link{runAbsoluteCN}}
52 52
 }
53 53
 \author{
... ...
@@ -1,83 +1,14 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/calculateGCContentByInterval.R
2
+% Please edit documentation in R/preprocessIntervals.R
3 3
 \name{calculateGCContentByInterval}
4 4
 \alias{calculateGCContentByInterval}
5 5
 \title{Calculates GC content by interval}
6 6
 \usage{
7
-calculateGCContentByInterval(interval.file, reference.file,
8
-  output.file = NULL, off.target = FALSE, average.target.width = 400,
9
-  min.off.target.width = 20000, average.off.target.width = 2e+05,
10
-  off.target.padding = -500, mappability = NULL, min.mappability = c(0.5,
11
-  0.1, 0.7), reptiming = NULL, off.target.seqlevels = c("targeted", "all"))
7
+calculateGCContentByInterval(...)
12 8
 }
13 9
 \arguments{
14
-\item{interval.file}{File specifying the intervals. Interval is expected in
15
-first column in format CHR:START-END.  Instead of a file, a \code{GRanges}
16
-object can be provided. This allows the use of BED files for example. Note
17
-that GATK interval files are 1-based (first position of the genome is 1).
18
-Other formats like BED files are often 0-based. The \code{import} function
19
-will automatically convert to 1-based \code{GRanges}.}
20
-
21
-\item{reference.file}{Reference FASTA file.}
22
-
23
-\item{output.file}{Optionally, write GC content file.}
24
-
25
-\item{off.target}{Include off-target regions.}
26
-
27
-\item{average.target.width}{Split large targets to approximately this size.}
28
-
29
-\item{min.off.target.width}{Only include off-target regions of that
30
-size}
31
-
32
-\item{average.off.target.width}{Split off-target regions to that}
33
-
34
-\item{off.target.padding}{Pad off-target regions.}
35
-
36
-\item{mappability}{Annotate intervals with mappability score. Assumed on a scale
37
-from 0 to 1, with score being 1/(number alignments). Expected as \code{GRanges}
38
-object with first meta column being the score. Regions outside these ranges are
39
-ignored, assuming that \code{mappability} covers the whole accessible genome.}
40
-
41
-\item{min.mappability}{\code{double(3)} specifying the minimum mappability score
42
-for on-target, off-target, and chrY regions in that order. The chrY regions
43
-are only used for sex determination in \sQuote{PureCN} and are therefore 
44
-treated differently. Requires \code{mappability}.}
45
-
46
-\item{reptiming}{Annotate intervals with replication timing score. Expected as 
47
-\code{GRanges} object with first meta column being the score.}
48
-
49
-\item{off.target.seqlevels}{Controls how to deal with chromosomes/contigs
50
-found in the \code{reference.file} but not in the \code{interval.file}.}
51
-}
52
-\value{
53
-Returns GC content by interval as \code{GRanges} object.
10
+\item{...}{Arguments passed to \code{\link{preprocessIntervals}}.}
54 11
 }
55 12
 \description{
56
-Uses \code{scanFa} from the Rsamtools package to retrieve GC content of
57
-intervals in a reference FASTA file. Can optimize intervals for copy
58
-number calling by tiling long intervals and by including off-target regions.
59
-This optimization largely follows CNVkit.
60
-}
61
-\examples{
62
-
63
-reference.file <- system.file("extdata", "ex2_reference.fa", 
64
-    package="PureCN", mustWork = TRUE)
65
-interval.file <- system.file("extdata", "ex2_intervals.txt", 
66
-    package="PureCN", mustWork = TRUE)
67
-bed.file <- system.file("extdata", "ex2_intervals.bed", 
68
-    package="PureCN", mustWork = TRUE)
69
-calculateGCContentByInterval(interval.file, reference.file, 
70
-    output.file="gc_file.txt")
71
-
72
-intervals <- import(bed.file)
73
-calculateGCContentByInterval(intervals, reference.file, 
74
-    output.file="gc_file.txt")
75
-
76
-}
77
-\references{
78
-Talevich et al. (2016). CNVkit: Genome-Wide Copy Number 
79
-Detection and Visualization from Targeted DNA Sequencing. PLoS Comput Biol.
80
-}
81
-\author{
82
-Markus Riester
13
+This function was renamed to \code{\link{preprocessIntervals}}.
83 14
 }
... ...
@@ -26,9 +26,8 @@ callAlterationsFromSegmentation(sampleid, chr, start, end, num.mark = NA,
26 26
 \item{gc.gene.file}{A mapping file that assigns GC content and gene symbols
27 27
 to each exon in the coverage files. Used for generating gene-level calls.
28 28
 First column in format CHR:START-END. Second column GC content (0 to 1).
29
-Third column gene symbol. This file can be generated with the \sQuote{GATK
30
-GCContentByInterval} tool or with the
31
-\code{\link{calculateGCContentByInterval}} function.}
29
+Third column gene symbol. This file is generated with the 
30
+\code{\link{preprocessIntervals}} function.}
32 31
 
33 32
 \item{fun.focal}{Function for identifying focal amplifications. Defaults to
34 33
 \code{\link{findFocal}}.}
... ...
@@ -14,9 +14,8 @@ correctCoverageBias(coverage.file, gc.gene.file, output.file = NULL,
14 14
 \item{gc.gene.file}{File providing GC content for each exon in the coverage
15 15
 files. First column in format CHR:START-END. Second column GC content (0 to
16 16
 1).  Third column provides gene symbols, which are optional, but used in
17
-\code{\link{runAbsoluteCN}} to generate gene level calls. This file can be
18
-generated with GATK GCContentByInterval tool or with the
19
-\code{\link{calculateGCContentByInterval}} function.}
17
+\code{\link{runAbsoluteCN}} to generate gene level calls. This file is 
18
+generated with the \code{\link{preprocessIntervals}} function.}
20 19
 
21 20
 \item{output.file}{Optionally, write file with GC corrected coverage. Can be
22 21
 read with the \code{\link{readCoverageFile}} function.}
... ...
@@ -46,7 +45,7 @@ coverage <- correctCoverageBias(normal.coverage.file, gc.gene.file)
46 45
 
47 46
 }
48 47
 \seealso{
49
-\code{\link{calculateGCContentByInterval}}
48
+\code{\link{preprocessIntervals}}
50 49
 }
51 50
 \author{
52 51
 Angad Singh, Markus Riester
... ...
@@ -227,9 +227,8 @@ are sequencing errors).}
227 227
 \item{gc.gene.file}{A mapping file that assigns GC content and gene symbols
228 228
 to each exon in the coverage files. Used for generating gene-level calls.
229 229
 First column in format CHR:START-END. Second column GC content (0 to 1).
230
-Third column gene symbol. This file can be generated with the \sQuote{GATK
231
-GCContentByInterval} tool or with the
232
-\code{\link{calculateGCContentByInterval}} function.}
230
+Third column gene symbol. This file is generated with the
231
+\code{\link{preprocessIntervals}} function.}
233 232
 
234 233
 \item{max.dropout}{Measures GC bias as ratio of coverage in AT-rich (GC <
235 234
 0.5) versus GC-rich on-target regions (GC >= 0.5). High drop-out might 
236 235
similarity index 81%
237 236
rename from tests/testthat/test_calculateGCContentByInterval.R
238 237
rename to tests/testthat/test_preprocessIntervals.R
... ...
@@ -1,4 +1,4 @@
1
-context("calculateGCContentByInterval")
1
+context("preprocessIntervals")
2 2
 
3 3
 reference.file <- system.file("extdata", "ex2_reference.fa", 
4 4
     package = "PureCN", mustWork = TRUE)
... ...
@@ -10,7 +10,7 @@ bed.file <- system.file("extdata", "ex2_intervals.bed", package = "PureCN",
10 10
 output.file <- tempfile(fileext = ".txt")
11 11
 
12 12
 test_that("GC-bias of example reference and intervals (GATK format) matches", {
13
-    gc <- calculateGCContentByInterval(interval.file, reference.file, 
13
+    gc <- preprocessIntervals(interval.file, reference.file, 
14 14
         output.file = output.file)
15 15
     x <- read.delim(output.file, as.is = TRUE)
16 16
     expect_equal(x$gc_bias, c(0.4533333, 0.5057143, 0.5733333, 
... ...
@@ -23,7 +23,7 @@ test_that("GC-bias of example reference and intervals (BED format) matches", {
23 23
     x <- read.delim(output.file, as.is = TRUE)
24 24
     intervals <- import(bed.file)
25 25
     output.file2 <- tempfile(fileext = ".txt")
26
-    y <- calculateGCContentByInterval(intervals, reference.file, 
26
+    y <- preprocessIntervals(intervals, reference.file, 
27 27
         output.file = output.file2)
28 28
     expect_equal(y$gc_bias, x$gc_bias)
29 29
     expect_equal(as.character(y), x$Target)
... ...
@@ -38,7 +38,7 @@ test_that("Exceptions happen with wrong input", {
38 38
     idata[3, 1] <- "seq2:0-149"
39 39
     write.table(idata, file = interval.file2, row.names = FALSE, 
40 40
         quote = FALSE)
41
-    expect_error(calculateGCContentByInterval(interval.file2, 
41
+    expect_error(preprocessIntervals(interval.file2, 
42 42
         reference.file),
43 43
         "Interval coordinates should start at 1, not at 0")
44 44
     file.remove(interval.file2)
... ...
@@ -50,17 +50,17 @@ test_that("reptiming annotated correctly", {
50 50
 
51 51
     reptiming <- import(reptiming.file)
52 52
     intervals <- import(bed.file)
53
-    gr <- calculateGCContentByInterval(intervals, reference.file, 
53
+    gr <- preprocessIntervals(intervals, reference.file, 
54 54
         reptiming = reptiming)
55 55
     expect_equal(c(17.5, 11.0, 50, 10.0, 10.0), gr$reptiming)
56 56
 })
57 57
 
58 58
 test_that("Offtarget settings work as expected", {
59
-    gc <- calculateGCContentByInterval(interval.file, reference.file, 
59
+    gc <- preprocessIntervals(interval.file, reference.file, 
60 60
         off.target = TRUE, min.off.target.width = 2, off.target.padding = -2)
61 61
     expect_equal(length(gc), 11)
62 62
     intervals <- import(bed.file)
63
-    gc2 <- calculateGCContentByInterval(gc, reference.file)
63
+    gc2 <- preprocessIntervals(gc, reference.file)
64 64
     expect_equal(start(gc2), start(gc))
65 65
     expect_equal(end(gc2), end(gc))
66 66
     expect_equal(gc2$mappability, gc$mappability)
... ...
@@ -69,14 +69,14 @@ test_that("Offtarget settings work as expected", {
69 69
         mappability.file <- system.file("extdata", "ex2_mappability.bigWig", 
70 70
             package = "PureCN", mustWork = TRUE)
71 71
         mappability <- import(mappability.file)
72
-        gcMap <- calculateGCContentByInterval(intervals, reference.file, 
72
+        gcMap <- preprocessIntervals(intervals, reference.file, 
73 73
             mappability = mappability)
74 74
         expect_equal(gcMap$mappability, c(1, 1, 0.7, 1, 1), tolerance = 0.001)
75 75
     }
76 76
     mappability.file <- system.file("extdata", "ex2_mappability.bed", 
77 77
         package = "PureCN", mustWork = TRUE)
78 78
     mappability <- import(mappability.file)
79
-    gcMap <- calculateGCContentByInterval(intervals, reference.file, 
79
+    gcMap <- preprocessIntervals(intervals, reference.file, 
80 80
         mappability = mappability)
81 81
     expect_equal(gcMap$mappability, c(1, 1, 0.7, 1, 1), tolerance = 0.001)
82 82
     reference.file <- system.file("extdata", "ex3_reference.fa", 
... ...
@@ -84,20 +84,20 @@ test_that("Offtarget settings work as expected", {
84 84
     bed.file3 <- system.file("extdata", "ex3_intervals.bed", 
85 85
         package = "PureCN", mustWork = TRUE)
86 86
     intervals3 <- import(bed.file3)
87
-    x <- calculateGCContentByInterval(intervals3, reference.file)
87
+    x <- preprocessIntervals(intervals3, reference.file)
88 88
     expect_equal(x$gc_bias, c(0.4533333, 0.5057143, 0.5733333,
89 89
         0.48, 0.36), tolerance = 0.001)
90 90
     seqlevelsStyle(intervals3) <- "NCBI"
91
-    x <- calculateGCContentByInterval(intervals3, reference.file)
91
+    x <- preprocessIntervals(intervals3, reference.file)
92 92
     expect_equal(x$gc_bias, c(0.4533333, 0.5057143, 0.5733333, 
93 93
         0.48, 0.36), tolerance = 0.001)
94
-    expect_error(calculateGCContentByInterval(intervals, reference.file),
94
+    expect_error(preprocessIntervals(intervals, reference.file),
95 95
         "Chromosome naming style of interval file")
96 96
     mappability.file3 <- system.file("extdata", "ex3_mappability.bed", 
97 97
         package = "PureCN", mustWork = TRUE)
98 98
     mappability3 <- import(mappability.file3)
99 99
     seqlevelsStyle(mappability3) <- "NCBI"
100
-    x <- calculateGCContentByInterval(intervals3, reference.file, 
100
+    x <- preprocessIntervals(intervals3, reference.file, 
101 101
         mappability = mappability3)
102 102
     expect_equal(x$gc_bias, c(0.4533333, 0.5057143, 0.5733333, 
103 103
         0.48, 0.36), tolerance = 0.001)
... ...
@@ -132,7 +132,7 @@ reads can be used by the segmentation function.
132 132
 It further annotates targets by GC-content (how coverage is normalized is
133 133
 described later in Section~\ref{secgcbias}). 
134 134
 
135
-\Biocpkg{PureCN} provides the \Rfunction{calculateGCContentByInterval} 
135
+\Biocpkg{PureCN} provides the \Rfunction{preprocessIntervals} 
136 136
 function:
137 137
 
138 138
 <<examplegc>>=
... ...
@@ -146,7 +146,7 @@ mappability.file <- system.file("extdata", "ex2_mappability.bigWig",
146 146
 intervals <- import(bed.file)
147 147
 mappability <- import(mappability.file)
148 148
 
149
-calculateGCContentByInterval(intervals, reference.file, 
149
+preprocessIntervals(intervals, reference.file, 
150 150
     mappability=mappability, output.file = "ex2_gc_file.txt")
151 151
 @
152 152
 
... ...
@@ -159,7 +159,7 @@ symbols using the \Rfunction{annotateTargets} function.
159 159
 
160 160
 The \Rfunction{calculateBamCoverageByInterval} function can be used to generate
161 161
 the required coverage data from BAM files. All we need to do is providing the
162
-desired intervals (as generated by \Rfunction{calculateGCContentByInterval}):
162
+desired intervals (as generated by \Rfunction{preprocessIntervals}):
163 163
 
164 164
 <<examplecoverage>>=
165 165
 bam.file <- system.file("extdata", "ex1.bam", package="PureCN",