... | ... |
@@ -16,6 +16,7 @@ API CHANGES |
16 | 16 |
purity/ploidy solution) |
17 | 17 |
o changed default of runAbsoluteCN max.logr.sdev to 0.6 (from 0.75) |
18 | 18 |
o createTargetWeights does not require tumor coverages anymore |
19 |
+ o calculateBamCoverageByInterval was renamed to preprocessIntervals |
|
19 | 20 |
|
20 | 21 |
|
21 | 22 |
|
... | ... |
@@ -18,7 +18,7 @@ |
18 | 18 |
#' @param keep.duplicates Keep or remove duplicated reads. |
19 | 19 |
#' @return Returns total and average coverage by intervals. |
20 | 20 |
#' @author Markus Riester |
21 |
-#' @seealso \code{\link{calculateGCContentByInterval} |
|
21 |
+#' @seealso \code{\link{preprocessIntervals} |
|
22 | 22 |
#' \link{correctCoverageBias} \link{runAbsoluteCN}} |
23 | 23 |
#' @examples |
24 | 24 |
#' |
... | ... |
@@ -90,9 +90,8 @@ log.ratio.cutoffs = c(-0.9, 0.9), failed = NULL, all.genes = FALSE) { |
90 | 90 |
#' @param gc.gene.file A mapping file that assigns GC content and gene symbols |
91 | 91 |
#' to each exon in the coverage files. Used for generating gene-level calls. |
92 | 92 |
#' First column in format CHR:START-END. Second column GC content (0 to 1). |
93 |
-#' Third column gene symbol. This file can be generated with the \sQuote{GATK |
|
94 |
-#' GCContentByInterval} tool or with the |
|
95 |
-#' \code{\link{calculateGCContentByInterval}} function. |
|
93 |
+#' Third column gene symbol. This file is generated with the |
|
94 |
+#' \code{\link{preprocessIntervals}} function. |
|
96 | 95 |
#' @param fun.focal Function for identifying focal amplifications. Defaults to |
97 | 96 |
#' \code{\link{findFocal}}. |
98 | 97 |
#' @param args.focal Arguments for focal amplification function. |
... | ... |
@@ -13,9 +13,8 @@ globalVariables(names=c("..level..")) |
13 | 13 |
#' @param gc.gene.file File providing GC content for each exon in the coverage |
14 | 14 |
#' files. First column in format CHR:START-END. Second column GC content (0 to |
15 | 15 |
#' 1). Third column provides gene symbols, which are optional, but used in |
16 |
-#' \code{\link{runAbsoluteCN}} to generate gene level calls. This file can be |
|
17 |
-#' generated with GATK GCContentByInterval tool or with the |
|
18 |
-#' \code{\link{calculateGCContentByInterval}} function. |
|
16 |
+#' \code{\link{runAbsoluteCN}} to generate gene level calls. This file is |
|
17 |
+#' generated with the \code{\link{preprocessIntervals}} function. |
|
19 | 18 |
#' @param output.file Optionally, write file with GC corrected coverage. Can be |
20 | 19 |
#' read with the \code{\link{readCoverageFile}} function. |
21 | 20 |
#' @param plot.gc.bias Optionally, plot GC profiles of the pre-normalized and |
... | ... |
@@ -27,7 +26,7 @@ globalVariables(names=c("..level..")) |
27 | 26 |
#' estimation is applied. If the \code{plot.gc.bias} parameter is set as |
28 | 27 |
#' \code{FALSE}, this will be ignored. |
29 | 28 |
#' @author Angad Singh, Markus Riester |
30 |
-#' @seealso \code{\link{calculateGCContentByInterval}} |
|
29 |
+#' @seealso \code{\link{preprocessIntervals}} |
|
31 | 30 |
#' @examples |
32 | 31 |
#' |
33 | 32 |
#' normal.coverage.file <- system.file("extdata", "example_normal.txt", |
34 | 33 |
similarity index 88% |
35 | 34 |
rename from R/calculateGCContentByInterval.R |
36 | 35 |
rename to R/preprocessIntervals.R |
... | ... |
@@ -1,9 +1,9 @@ |
1 |
-#' Calculates GC content by interval |
|
1 |
+#' Preprocess intervals |
|
2 | 2 |
#' |
3 |
-#' Uses \code{scanFa} from the Rsamtools package to retrieve GC content of |
|
4 |
-#' intervals in a reference FASTA file. Can optimize intervals for copy |
|
5 |
-#' number calling by tiling long intervals and by including off-target regions. |
|
6 |
-#' This optimization largely follows CNVkit. |
|
3 |
+#' Optimize intervals for copy number calling by tiling long intervals and by |
|
4 |
+#' including off-target regions. Uses \code{scanFa} from the Rsamtools package |
|
5 |
+#' to retrieve GC content of intervals in a reference FASTA file. If provided, |
|
6 |
+#' will annotate intervals with mappability and replication timing scores. |
|
7 | 7 |
#' |
8 | 8 |
#' @param interval.file File specifying the intervals. Interval is expected in |
9 | 9 |
#' first column in format CHR:START-END. Instead of a file, a \code{GRanges} |
... | ... |
@@ -44,25 +44,30 @@ |
44 | 44 |
#' package="PureCN", mustWork = TRUE) |
45 | 45 |
#' bed.file <- system.file("extdata", "ex2_intervals.bed", |
46 | 46 |
#' package="PureCN", mustWork = TRUE) |
47 |
-#' calculateGCContentByInterval(interval.file, reference.file, |
|
47 |
+#' preprocessIntervals(interval.file, reference.file, |
|
48 | 48 |
#' output.file="gc_file.txt") |
49 | 49 |
#' |
50 | 50 |
#' intervals <- import(bed.file) |
51 |
-#' calculateGCContentByInterval(intervals, reference.file, |
|
51 |
+#' preprocessIntervals(intervals, reference.file, |
|
52 | 52 |
#' output.file="gc_file.txt") |
53 | 53 |
#' |
54 |
-#' @export calculateGCContentByInterval |
|
54 |
+#' @export preprocessIntervals |
|
55 | 55 |
#' @importFrom rtracklayer import |
56 | 56 |
#' @importFrom Biostrings letterFrequency |
57 | 57 |
#' @importFrom BiocGenerics unstrand |
58 | 58 |
#' @importFrom stats aggregate |
59 | 59 |
#' @importFrom S4Vectors mcols |
60 | 60 |
#' @importFrom GenomeInfoDb seqlevelsInUse seqlengths seqlevels<- |
61 |
-calculateGCContentByInterval <- function(interval.file, reference.file, |
|
62 |
-output.file = NULL, off.target=FALSE, average.target.width=400, |
|
63 |
-min.off.target.width=20000, average.off.target.width=200000, |
|
64 |
-off.target.padding=-500, mappability=NULL, min.mappability=c(0.5,0.1,0.7), |
|
65 |
-reptiming=NULL, off.target.seqlevels=c("targeted", "all")) { |
|
61 |
+preprocessIntervals <- function(interval.file, reference.file, |
|
62 |
+ output.file = NULL, off.target = FALSE, |
|
63 |
+ average.target.width = 400, |
|
64 |
+ min.off.target.width = 20000, |
|
65 |
+ average.off.target.width = 200000, |
|
66 |
+ off.target.padding = -500, mappability = NULL, |
|
67 |
+ min.mappability = c(0.5, 0.1, 0.7), |
|
68 |
+ reptiming = NULL, |
|
69 |
+ off.target.seqlevels=c("targeted", "all")) { |
|
70 |
+ |
|
66 | 71 |
if (class(interval.file)=="GRanges") { |
67 | 72 |
interval.gr <- .checkIntervals(interval.file) |
68 | 73 |
} else { |
... | ... |
@@ -156,6 +161,18 @@ reptiming=NULL, off.target.seqlevels=c("targeted", "all")) { |
156 | 161 |
invisible(interval.gr) |
157 | 162 |
} |
158 | 163 |
|
164 |
+#' Calculates GC content by interval |
|
165 |
+#' |
|
166 |
+#' This function was renamed to \code{\link{preprocessIntervals}}. |
|
167 |
+#' |
|
168 |
+#' @param ... Arguments passed to \code{\link{preprocessIntervals}}. |
|
169 |
+#' |
|
170 |
+#' @export calculateGCContentByInterval |
|
171 |
+calculateGCContentByInterval <- function(...) { |
|
172 |
+ .Deprecated("preprocessIntervals") |
|
173 |
+ preprocessIntervals(...) |
|
174 |
+} |
|
175 |
+ |
|
159 | 176 |
# this function removes short chromosomes that have no probes (mainly a |
160 | 177 |
# general way to remove chrM) |
161 | 178 |
.dropShortUntargetedSeqLevels <- function(offRegions, interval.gr, minSize) { |
... | ... |
@@ -161,9 +161,8 @@ |
161 | 161 |
#' @param gc.gene.file A mapping file that assigns GC content and gene symbols |
162 | 162 |
#' to each exon in the coverage files. Used for generating gene-level calls. |
163 | 163 |
#' First column in format CHR:START-END. Second column GC content (0 to 1). |
164 |
-#' Third column gene symbol. This file can be generated with the \sQuote{GATK |
|
165 |
-#' GCContentByInterval} tool or with the |
|
166 |
-#' \code{\link{calculateGCContentByInterval}} function. |
|
164 |
+#' Third column gene symbol. This file is generated with the |
|
165 |
+#' \code{\link{preprocessIntervals}} function. |
|
167 | 166 |
#' @param max.dropout Measures GC bias as ratio of coverage in AT-rich (GC < |
168 | 167 |
#' 0.5) versus GC-rich on-target regions (GC >= 0.5). High drop-out might |
169 | 168 |
#' indicate that data was not GC-normalized or that the sample quality might |
... | ... |
@@ -10,16 +10,16 @@ option_list <- list( |
10 | 10 |
default = NULL, |
11 | 11 |
help = "Infile specifying target (baits) intervals. Needs to be parsable by rtracklayer."), |
12 | 12 |
make_option(c("--offtarget"), action = "store_true", |
13 |
- default = formals(PureCN::calculateGCContentByInterval)$off.target, |
|
13 |
+ default = formals(PureCN::preprocessIntervals)$off.target, |
|
14 | 14 |
help = "Include off-target regions [default %default]"), |
15 | 15 |
make_option(c("--targetwidth"), action = "store", type = "integer", |
16 |
- default=formals(PureCN::calculateGCContentByInterval)$average.target.width, |
|
16 |
+ default=formals(PureCN::preprocessIntervals)$average.target.width, |
|
17 | 17 |
help = "Split large targets to approximately that size [default %default]"), |
18 | 18 |
make_option(c("--offtargetwidth"), action = "store", type = "integer", |
19 |
- default = formals(PureCN::calculateGCContentByInterval)$average.off.target.width, |
|
19 |
+ default = formals(PureCN::preprocessIntervals)$average.off.target.width, |
|
20 | 20 |
help = "Bin off-target regions to approximately that size [default %default]"), |
21 | 21 |
make_option(c("--offtargetseqlevels"), action = "store", type = "character", |
22 |
- default=formals(PureCN::calculateGCContentByInterval)$off.target.seqlevels[[2]], |
|
22 |
+ default=formals(PureCN::preprocessIntervals)$off.target.seqlevels[[2]], |
|
23 | 23 |
help = "Controls how to deal with chromosomes/contigs not found in infile. One of targeted, all [default %default]"), |
24 | 24 |
make_option(c("--mappability"), action = "store", type = "character", |
25 | 25 |
help = "File parsable by rtracklayer specifying mappability scores of genomic regions."), |
... | ... |
@@ -110,7 +110,7 @@ if (!opt$offtarget) { |
110 | 110 |
" Amplicon data. Add --offtarget to include them.") |
111 | 111 |
} |
112 | 112 |
|
113 |
-outGC <- calculateGCContentByInterval(intervals, reference.file, |
|
113 |
+outGC <- preprocessIntervals(intervals, reference.file, |
|
114 | 114 |
output.file = outfile, off.target = opt$offtarget, |
115 | 115 |
mappability = mappability, average.off.target.width = opt$offtargetwidth, |
116 | 116 |
reptiming = reptiming, off.target.seqlevels = opt$offtargetseqlevels, |
... | ... |
@@ -1,83 +1,14 @@ |
1 | 1 |
% Generated by roxygen2: do not edit by hand |
2 |
-% Please edit documentation in R/calculateGCContentByInterval.R |
|
2 |
+% Please edit documentation in R/preprocessIntervals.R |
|
3 | 3 |
\name{calculateGCContentByInterval} |
4 | 4 |
\alias{calculateGCContentByInterval} |
5 | 5 |
\title{Calculates GC content by interval} |
6 | 6 |
\usage{ |
7 |
-calculateGCContentByInterval(interval.file, reference.file, |
|
8 |
- output.file = NULL, off.target = FALSE, average.target.width = 400, |
|
9 |
- min.off.target.width = 20000, average.off.target.width = 2e+05, |
|
10 |
- off.target.padding = -500, mappability = NULL, min.mappability = c(0.5, |
|
11 |
- 0.1, 0.7), reptiming = NULL, off.target.seqlevels = c("targeted", "all")) |
|
7 |
+calculateGCContentByInterval(...) |
|
12 | 8 |
} |
13 | 9 |
\arguments{ |
14 |
-\item{interval.file}{File specifying the intervals. Interval is expected in |
|
15 |
-first column in format CHR:START-END. Instead of a file, a \code{GRanges} |
|
16 |
-object can be provided. This allows the use of BED files for example. Note |
|
17 |
-that GATK interval files are 1-based (first position of the genome is 1). |
|
18 |
-Other formats like BED files are often 0-based. The \code{import} function |
|
19 |
-will automatically convert to 1-based \code{GRanges}.} |
|
20 |
- |
|
21 |
-\item{reference.file}{Reference FASTA file.} |
|
22 |
- |
|
23 |
-\item{output.file}{Optionally, write GC content file.} |
|
24 |
- |
|
25 |
-\item{off.target}{Include off-target regions.} |
|
26 |
- |
|
27 |
-\item{average.target.width}{Split large targets to approximately this size.} |
|
28 |
- |
|
29 |
-\item{min.off.target.width}{Only include off-target regions of that |
|
30 |
-size} |
|
31 |
- |
|
32 |
-\item{average.off.target.width}{Split off-target regions to that} |
|
33 |
- |
|
34 |
-\item{off.target.padding}{Pad off-target regions.} |
|
35 |
- |
|
36 |
-\item{mappability}{Annotate intervals with mappability score. Assumed on a scale |
|
37 |
-from 0 to 1, with score being 1/(number alignments). Expected as \code{GRanges} |
|
38 |
-object with first meta column being the score. Regions outside these ranges are |
|
39 |
-ignored, assuming that \code{mappability} covers the whole accessible genome.} |
|
40 |
- |
|
41 |
-\item{min.mappability}{\code{double(3)} specifying the minimum mappability score |
|
42 |
-for on-target, off-target, and chrY regions in that order. The chrY regions |
|
43 |
-are only used for sex determination in \sQuote{PureCN} and are therefore |
|
44 |
-treated differently. Requires \code{mappability}.} |
|
45 |
- |
|
46 |
-\item{reptiming}{Annotate intervals with replication timing score. Expected as |
|
47 |
-\code{GRanges} object with first meta column being the score.} |
|
48 |
- |
|
49 |
-\item{off.target.seqlevels}{Controls how to deal with chromosomes/contigs |
|
50 |
-found in the \code{reference.file} but not in the \code{interval.file}.} |
|
51 |
-} |
|
52 |
-\value{ |
|
53 |
-Returns GC content by interval as \code{GRanges} object. |
|
10 |
+\item{...}{Arguments passed to \code{\link{preprocessIntervals}}.} |
|
54 | 11 |
} |
55 | 12 |
\description{ |
56 |
-Uses \code{scanFa} from the Rsamtools package to retrieve GC content of |
|
57 |
-intervals in a reference FASTA file. Can optimize intervals for copy |
|
58 |
-number calling by tiling long intervals and by including off-target regions. |
|
59 |
-This optimization largely follows CNVkit. |
|
60 |
-} |
|
61 |
-\examples{ |
|
62 |
- |
|
63 |
-reference.file <- system.file("extdata", "ex2_reference.fa", |
|
64 |
- package="PureCN", mustWork = TRUE) |
|
65 |
-interval.file <- system.file("extdata", "ex2_intervals.txt", |
|
66 |
- package="PureCN", mustWork = TRUE) |
|
67 |
-bed.file <- system.file("extdata", "ex2_intervals.bed", |
|
68 |
- package="PureCN", mustWork = TRUE) |
|
69 |
-calculateGCContentByInterval(interval.file, reference.file, |
|
70 |
- output.file="gc_file.txt") |
|
71 |
- |
|
72 |
-intervals <- import(bed.file) |
|
73 |
-calculateGCContentByInterval(intervals, reference.file, |
|
74 |
- output.file="gc_file.txt") |
|
75 |
- |
|
76 |
-} |
|
77 |
-\references{ |
|
78 |
-Talevich et al. (2016). CNVkit: Genome-Wide Copy Number |
|
79 |
-Detection and Visualization from Targeted DNA Sequencing. PLoS Comput Biol. |
|
80 |
-} |
|
81 |
-\author{ |
|
82 |
-Markus Riester |
|
13 |
+This function was renamed to \code{\link{preprocessIntervals}}. |
|
83 | 14 |
} |
... | ... |
@@ -26,9 +26,8 @@ callAlterationsFromSegmentation(sampleid, chr, start, end, num.mark = NA, |
26 | 26 |
\item{gc.gene.file}{A mapping file that assigns GC content and gene symbols |
27 | 27 |
to each exon in the coverage files. Used for generating gene-level calls. |
28 | 28 |
First column in format CHR:START-END. Second column GC content (0 to 1). |
29 |
-Third column gene symbol. This file can be generated with the \sQuote{GATK |
|
30 |
-GCContentByInterval} tool or with the |
|
31 |
-\code{\link{calculateGCContentByInterval}} function.} |
|
29 |
+Third column gene symbol. This file is generated with the |
|
30 |
+\code{\link{preprocessIntervals}} function.} |
|
32 | 31 |
|
33 | 32 |
\item{fun.focal}{Function for identifying focal amplifications. Defaults to |
34 | 33 |
\code{\link{findFocal}}.} |
... | ... |
@@ -14,9 +14,8 @@ correctCoverageBias(coverage.file, gc.gene.file, output.file = NULL, |
14 | 14 |
\item{gc.gene.file}{File providing GC content for each exon in the coverage |
15 | 15 |
files. First column in format CHR:START-END. Second column GC content (0 to |
16 | 16 |
1). Third column provides gene symbols, which are optional, but used in |
17 |
-\code{\link{runAbsoluteCN}} to generate gene level calls. This file can be |
|
18 |
-generated with GATK GCContentByInterval tool or with the |
|
19 |
-\code{\link{calculateGCContentByInterval}} function.} |
|
17 |
+\code{\link{runAbsoluteCN}} to generate gene level calls. This file is |
|
18 |
+generated with the \code{\link{preprocessIntervals}} function.} |
|
20 | 19 |
|
21 | 20 |
\item{output.file}{Optionally, write file with GC corrected coverage. Can be |
22 | 21 |
read with the \code{\link{readCoverageFile}} function.} |
... | ... |
@@ -46,7 +45,7 @@ coverage <- correctCoverageBias(normal.coverage.file, gc.gene.file) |
46 | 45 |
|
47 | 46 |
} |
48 | 47 |
\seealso{ |
49 |
-\code{\link{calculateGCContentByInterval}} |
|
48 |
+\code{\link{preprocessIntervals}} |
|
50 | 49 |
} |
51 | 50 |
\author{ |
52 | 51 |
Angad Singh, Markus Riester |
... | ... |
@@ -227,9 +227,8 @@ are sequencing errors).} |
227 | 227 |
\item{gc.gene.file}{A mapping file that assigns GC content and gene symbols |
228 | 228 |
to each exon in the coverage files. Used for generating gene-level calls. |
229 | 229 |
First column in format CHR:START-END. Second column GC content (0 to 1). |
230 |
-Third column gene symbol. This file can be generated with the \sQuote{GATK |
|
231 |
-GCContentByInterval} tool or with the |
|
232 |
-\code{\link{calculateGCContentByInterval}} function.} |
|
230 |
+Third column gene symbol. This file is generated with the |
|
231 |
+\code{\link{preprocessIntervals}} function.} |
|
233 | 232 |
|
234 | 233 |
\item{max.dropout}{Measures GC bias as ratio of coverage in AT-rich (GC < |
235 | 234 |
0.5) versus GC-rich on-target regions (GC >= 0.5). High drop-out might |
236 | 235 |
similarity index 81% |
237 | 236 |
rename from tests/testthat/test_calculateGCContentByInterval.R |
238 | 237 |
rename to tests/testthat/test_preprocessIntervals.R |
... | ... |
@@ -1,4 +1,4 @@ |
1 |
-context("calculateGCContentByInterval") |
|
1 |
+context("preprocessIntervals") |
|
2 | 2 |
|
3 | 3 |
reference.file <- system.file("extdata", "ex2_reference.fa", |
4 | 4 |
package = "PureCN", mustWork = TRUE) |
... | ... |
@@ -10,7 +10,7 @@ bed.file <- system.file("extdata", "ex2_intervals.bed", package = "PureCN", |
10 | 10 |
output.file <- tempfile(fileext = ".txt") |
11 | 11 |
|
12 | 12 |
test_that("GC-bias of example reference and intervals (GATK format) matches", { |
13 |
- gc <- calculateGCContentByInterval(interval.file, reference.file, |
|
13 |
+ gc <- preprocessIntervals(interval.file, reference.file, |
|
14 | 14 |
output.file = output.file) |
15 | 15 |
x <- read.delim(output.file, as.is = TRUE) |
16 | 16 |
expect_equal(x$gc_bias, c(0.4533333, 0.5057143, 0.5733333, |
... | ... |
@@ -23,7 +23,7 @@ test_that("GC-bias of example reference and intervals (BED format) matches", { |
23 | 23 |
x <- read.delim(output.file, as.is = TRUE) |
24 | 24 |
intervals <- import(bed.file) |
25 | 25 |
output.file2 <- tempfile(fileext = ".txt") |
26 |
- y <- calculateGCContentByInterval(intervals, reference.file, |
|
26 |
+ y <- preprocessIntervals(intervals, reference.file, |
|
27 | 27 |
output.file = output.file2) |
28 | 28 |
expect_equal(y$gc_bias, x$gc_bias) |
29 | 29 |
expect_equal(as.character(y), x$Target) |
... | ... |
@@ -38,7 +38,7 @@ test_that("Exceptions happen with wrong input", { |
38 | 38 |
idata[3, 1] <- "seq2:0-149" |
39 | 39 |
write.table(idata, file = interval.file2, row.names = FALSE, |
40 | 40 |
quote = FALSE) |
41 |
- expect_error(calculateGCContentByInterval(interval.file2, |
|
41 |
+ expect_error(preprocessIntervals(interval.file2, |
|
42 | 42 |
reference.file), |
43 | 43 |
"Interval coordinates should start at 1, not at 0") |
44 | 44 |
file.remove(interval.file2) |
... | ... |
@@ -50,17 +50,17 @@ test_that("reptiming annotated correctly", { |
50 | 50 |
|
51 | 51 |
reptiming <- import(reptiming.file) |
52 | 52 |
intervals <- import(bed.file) |
53 |
- gr <- calculateGCContentByInterval(intervals, reference.file, |
|
53 |
+ gr <- preprocessIntervals(intervals, reference.file, |
|
54 | 54 |
reptiming = reptiming) |
55 | 55 |
expect_equal(c(17.5, 11.0, 50, 10.0, 10.0), gr$reptiming) |
56 | 56 |
}) |
57 | 57 |
|
58 | 58 |
test_that("Offtarget settings work as expected", { |
59 |
- gc <- calculateGCContentByInterval(interval.file, reference.file, |
|
59 |
+ gc <- preprocessIntervals(interval.file, reference.file, |
|
60 | 60 |
off.target = TRUE, min.off.target.width = 2, off.target.padding = -2) |
61 | 61 |
expect_equal(length(gc), 11) |
62 | 62 |
intervals <- import(bed.file) |
63 |
- gc2 <- calculateGCContentByInterval(gc, reference.file) |
|
63 |
+ gc2 <- preprocessIntervals(gc, reference.file) |
|
64 | 64 |
expect_equal(start(gc2), start(gc)) |
65 | 65 |
expect_equal(end(gc2), end(gc)) |
66 | 66 |
expect_equal(gc2$mappability, gc$mappability) |
... | ... |
@@ -69,14 +69,14 @@ test_that("Offtarget settings work as expected", { |
69 | 69 |
mappability.file <- system.file("extdata", "ex2_mappability.bigWig", |
70 | 70 |
package = "PureCN", mustWork = TRUE) |
71 | 71 |
mappability <- import(mappability.file) |
72 |
- gcMap <- calculateGCContentByInterval(intervals, reference.file, |
|
72 |
+ gcMap <- preprocessIntervals(intervals, reference.file, |
|
73 | 73 |
mappability = mappability) |
74 | 74 |
expect_equal(gcMap$mappability, c(1, 1, 0.7, 1, 1), tolerance = 0.001) |
75 | 75 |
} |
76 | 76 |
mappability.file <- system.file("extdata", "ex2_mappability.bed", |
77 | 77 |
package = "PureCN", mustWork = TRUE) |
78 | 78 |
mappability <- import(mappability.file) |
79 |
- gcMap <- calculateGCContentByInterval(intervals, reference.file, |
|
79 |
+ gcMap <- preprocessIntervals(intervals, reference.file, |
|
80 | 80 |
mappability = mappability) |
81 | 81 |
expect_equal(gcMap$mappability, c(1, 1, 0.7, 1, 1), tolerance = 0.001) |
82 | 82 |
reference.file <- system.file("extdata", "ex3_reference.fa", |
... | ... |
@@ -84,20 +84,20 @@ test_that("Offtarget settings work as expected", { |
84 | 84 |
bed.file3 <- system.file("extdata", "ex3_intervals.bed", |
85 | 85 |
package = "PureCN", mustWork = TRUE) |
86 | 86 |
intervals3 <- import(bed.file3) |
87 |
- x <- calculateGCContentByInterval(intervals3, reference.file) |
|
87 |
+ x <- preprocessIntervals(intervals3, reference.file) |
|
88 | 88 |
expect_equal(x$gc_bias, c(0.4533333, 0.5057143, 0.5733333, |
89 | 89 |
0.48, 0.36), tolerance = 0.001) |
90 | 90 |
seqlevelsStyle(intervals3) <- "NCBI" |
91 |
- x <- calculateGCContentByInterval(intervals3, reference.file) |
|
91 |
+ x <- preprocessIntervals(intervals3, reference.file) |
|
92 | 92 |
expect_equal(x$gc_bias, c(0.4533333, 0.5057143, 0.5733333, |
93 | 93 |
0.48, 0.36), tolerance = 0.001) |
94 |
- expect_error(calculateGCContentByInterval(intervals, reference.file), |
|
94 |
+ expect_error(preprocessIntervals(intervals, reference.file), |
|
95 | 95 |
"Chromosome naming style of interval file") |
96 | 96 |
mappability.file3 <- system.file("extdata", "ex3_mappability.bed", |
97 | 97 |
package = "PureCN", mustWork = TRUE) |
98 | 98 |
mappability3 <- import(mappability.file3) |
99 | 99 |
seqlevelsStyle(mappability3) <- "NCBI" |
100 |
- x <- calculateGCContentByInterval(intervals3, reference.file, |
|
100 |
+ x <- preprocessIntervals(intervals3, reference.file, |
|
101 | 101 |
mappability = mappability3) |
102 | 102 |
expect_equal(x$gc_bias, c(0.4533333, 0.5057143, 0.5733333, |
103 | 103 |
0.48, 0.36), tolerance = 0.001) |
... | ... |
@@ -132,7 +132,7 @@ reads can be used by the segmentation function. |
132 | 132 |
It further annotates targets by GC-content (how coverage is normalized is |
133 | 133 |
described later in Section~\ref{secgcbias}). |
134 | 134 |
|
135 |
-\Biocpkg{PureCN} provides the \Rfunction{calculateGCContentByInterval} |
|
135 |
+\Biocpkg{PureCN} provides the \Rfunction{preprocessIntervals} |
|
136 | 136 |
function: |
137 | 137 |
|
138 | 138 |
<<examplegc>>= |
... | ... |
@@ -146,7 +146,7 @@ mappability.file <- system.file("extdata", "ex2_mappability.bigWig", |
146 | 146 |
intervals <- import(bed.file) |
147 | 147 |
mappability <- import(mappability.file) |
148 | 148 |
|
149 |
-calculateGCContentByInterval(intervals, reference.file, |
|
149 |
+preprocessIntervals(intervals, reference.file, |
|
150 | 150 |
mappability=mappability, output.file = "ex2_gc_file.txt") |
151 | 151 |
@ |
152 | 152 |
|
... | ... |
@@ -159,7 +159,7 @@ symbols using the \Rfunction{annotateTargets} function. |
159 | 159 |
|
160 | 160 |
The \Rfunction{calculateBamCoverageByInterval} function can be used to generate |
161 | 161 |
the required coverage data from BAM files. All we need to do is providing the |
162 |
-desired intervals (as generated by \Rfunction{calculateGCContentByInterval}): |
|
162 |
+desired intervals (as generated by \Rfunction{preprocessIntervals}): |
|
163 | 163 |
|
164 | 164 |
<<examplecoverage>>= |
165 | 165 |
bam.file <- system.file("extdata", "ex1.bam", package="PureCN", |