... | ... |
@@ -2,8 +2,8 @@ Package: PureCN |
2 | 2 |
Type: Package |
3 | 3 |
Title: Copy number calling and SNV classification using |
4 | 4 |
targeted short read sequencing |
5 |
-Version: 1.15.22 |
|
6 |
-Date: 2019-10-05 |
|
5 |
+Version: 1.15.23 |
|
6 |
+Date: 2019-10-06 |
|
7 | 7 |
Authors@R: c(person("Markus", "Riester", |
8 | 8 |
role = c("aut", "cre"), |
9 | 9 |
email = "[email protected]", |
... | ... |
@@ -86,6 +86,7 @@ importFrom(VGAM,dbetabinom.ab) |
86 | 86 |
importFrom(VGAM,vglm) |
87 | 87 |
importFrom(data.table,data.table) |
88 | 88 |
importFrom(data.table,fread) |
89 |
+importFrom(data.table,fwrite) |
|
89 | 90 |
importFrom(futile.logger,appender.tee) |
90 | 91 |
importFrom(futile.logger,flog.appender) |
91 | 92 |
importFrom(futile.logger,flog.debug) |
... | ... |
@@ -166,4 +167,3 @@ importFrom(utils,read.csv) |
166 | 167 |
importFrom(utils,read.delim) |
167 | 168 |
importFrom(utils,tail) |
168 | 169 |
importFrom(utils,write.csv) |
169 |
-importFrom(utils,write.table) |
... | ... |
@@ -33,6 +33,9 @@ SIGNIFICANT USER-VISIBLE CHANGES |
33 | 33 |
weights when available |
34 | 34 |
o Changed default of min.target.width in preprocessIntervals from 10 to 100 |
35 | 35 |
(#73) |
36 |
+ o replaced write.table with data.table::fwrite to automatically support |
|
37 |
+ producing gzipped output (requires data.table 1.12.4, #106) |
|
38 |
+ o Coverage.R now gzips BAM file coverage (requires data.table 1.12.4, #106) |
|
36 | 39 |
|
37 | 40 |
BUGFIXES |
38 | 41 |
|
... | ... |
@@ -83,7 +83,7 @@ calculateBamCoverageByInterval <- function(bam.file, interval.file, |
83 | 83 |
on_target = intervalGr$on.target, |
84 | 84 |
duplication_rate = intervalGr$duplication.rate |
85 | 85 |
) |
86 |
- write.table(tmp, file = output.file, row.names = FALSE, quote = FALSE) |
|
86 |
+ fwrite(tmp, file = output.file, row.names = FALSE, quote = FALSE) |
|
87 | 87 |
} |
88 | 88 |
|
89 | 89 |
.filterDuplicates <- function(x) { |
... | ... |
@@ -88,7 +88,7 @@ old_method = FALSE) { |
88 | 88 |
Target = as.character(ret$weights), |
89 | 89 |
weights = ret$weights$weights) |
90 | 90 |
|
91 |
- write.table(ret_output, file = interval.weight.file, row.names = FALSE, |
|
91 |
+ fwrite(ret_output, file = interval.weight.file, row.names = FALSE, |
|
92 | 92 |
quote = FALSE, sep = "\t") |
93 | 93 |
} |
94 | 94 |
if (plot) .plotIntervalWeights(lrs.sd, width(tumor.coverage[[1]]), |
... | ... |
@@ -42,7 +42,7 @@ globalVariables(names=c("..level..")) |
42 | 42 |
#' coord_trans |
43 | 43 |
#' @importFrom gridExtra grid.arrange |
44 | 44 |
#' @importFrom stats loess lm predict |
45 |
-#' @importFrom utils write.table |
|
45 |
+#' @importFrom data.table fwrite |
|
46 | 46 |
correctCoverageBias <- function(coverage.file, interval.file, |
47 | 47 |
output.file = NULL, plot.bias = FALSE, plot.max.density = 50000, |
48 | 48 |
output.qc.file = NULL) { |
... | ... |
@@ -105,7 +105,8 @@ output.qc.file = NULL) { |
105 | 105 |
colnames(qc)[5:10] <- paste0("mom.", c("raw", "raw", "post.gc", "post.gc", |
106 | 106 |
"post.reptiming", "post.reptiming"), |
107 | 107 |
".", rep(c("ontarget", "offtarget"),3)) |
108 |
- write.table(qc, file = output.qc.file, row.names = FALSE, quote = FALSE) |
|
108 |
+ fwrite(qc, file = output.qc.file, row.names = FALSE, quote = FALSE, |
|
109 |
+ sep = " ") |
|
109 | 110 |
} |
110 | 111 |
|
111 | 112 |
.createCoverageGgplot <- function(raw, normalized, plot.max.density, x, log = FALSE) { |
... | ... |
@@ -320,7 +320,7 @@ calculateGCContentByInterval <- function() { |
320 | 320 |
Gene=interval.gr$Gene, |
321 | 321 |
on_target=interval.gr$on.target |
322 | 322 |
) |
323 |
- write.table(tmp, file=output.file, row.names=FALSE, quote=FALSE, sep="\t") |
|
323 |
+ fwrite(tmp, file = output.file, row.names = FALSE, quote = FALSE, sep = "\t") |
|
324 | 324 |
} |
325 | 325 |
|
326 | 326 |
.checkSeqlengths <- function(ref, x) { |
... | ... |
@@ -129,10 +129,10 @@ processMultipleSamples <- function(tumor.coverage.files, sampleids, normalDB, |
129 | 129 |
rownames(lrsm) <- NULL |
130 | 130 |
lrsm[idx.enough.markers,] |
131 | 131 |
#transform to DNAcopy format |
132 |
- m <- data.table::melt(lrsm, id.vars=1:5) |
|
132 |
+ m <- data.table::melt(data.table(lrsm), id.vars=1:5) |
|
133 | 133 |
m <- m[, c(6,1,3,4,5,7)] |
134 | 134 |
colnames(m) <- c("ID", "chrom", "loc.start", "loc.end", "num.mark", "seg.mean") |
135 |
- m |
|
135 |
+ data.frame(m) |
|
136 | 136 |
} |
137 | 137 |
|
138 | 138 |
.add_weights_to_normaldb <- function(interval.weight.file, normalDB = NULL) { |
... | ... |
@@ -72,6 +72,14 @@ interval.file <- normalizePath(interval.file, mustWork = TRUE) |
72 | 72 |
files |
73 | 73 |
} |
74 | 74 |
|
75 |
+checkDataTableVersion <- function() { |
|
76 |
+ if (compareVersion(package.version("data.table"), "1.12.4") < 0) { |
|
77 |
+ flog.fatal("data.table package is outdated. >= 1.12.4 required") |
|
78 |
+ q(status = 0) |
|
79 |
+ } |
|
80 |
+ return(TRUE) |
|
81 |
+} |
|
82 |
+ |
|
75 | 83 |
getCoverageBams <- function(bamFiles, indexFiles, outdir, interval.file, |
76 | 84 |
force = FALSE, keep.duplicates = FALSE, removemapq0 = FALSE) { |
77 | 85 |
|
... | ... |
@@ -83,7 +91,8 @@ getCoverageBams <- function(bamFiles, indexFiles, outdir, interval.file, |
83 | 91 |
|
84 | 92 |
.getCoverageBam <- function(bam.file, index.file = NULL, outdir, |
85 | 93 |
interval.file, force) { |
86 |
- output.file <- file.path(outdir, gsub(".bam$", "_coverage.txt", |
|
94 |
+ checkDataTableVersion() |
|
95 |
+ output.file <- file.path(outdir, gsub(".bam$", "_coverage.txt.gz", |
|
87 | 96 |
basename(bam.file))) |
88 | 97 |
futile.logger::flog.info("Processing %s...", output.file) |
89 | 98 |
if (!is.null(index.file)) { |
... | ... |
@@ -160,10 +169,11 @@ if (!is.null(bam.file)) { |
160 | 169 |
|
161 | 170 |
### GC-normalize coverage ----------------------------------------------------- |
162 | 171 |
.gcNormalize <- function(gatk.coverage, interval.file, outdir, force) { |
163 |
- output.file <- file.path(outdir, gsub(".txt$|_interval_summary", |
|
164 |
- "_loess.txt", basename(gatk.coverage))) |
|
165 |
- outpng.file <- sub("txt$", "png", output.file) |
|
166 |
- output.qc.file <- sub(".txt$", "_qc.txt", output.file) |
|
172 |
+ checkDataTableVersion() |
|
173 |
+ output.file <- file.path(outdir, gsub(".txt$|.txt.gz$|_interval_summary", |
|
174 |
+ "_loess.txt.gz", basename(gatk.coverage))) |
|
175 |
+ outpng.file <- sub("txt.gz$", "png", output.file) |
|
176 |
+ output.qc.file <- sub(".txt.gz$", "_qc.txt", output.file) |
|
167 | 177 |
|
168 | 178 |
if (file.exists(output.file) && !force) { |
169 | 179 |
flog.info("%s exists. Skipping... (--force will overwrite)", output.file) |
... | ... |
@@ -227,7 +227,7 @@ To build a normal database for coverage normalization, copy the paths to all |
227 | 227 |
GC-normalized normal coverage files in a single text file, line-by-line: |
228 | 228 |
|
229 | 229 |
``` |
230 |
-ls -a normal*loess.txt | cat > example_normal.list |
|
230 |
+ls -a normal*loess.txt.gz | cat > example_normal.list |
|
231 | 231 |
|
232 | 232 |
# From already GC-normalized files |
233 | 233 |
$ Rscript $PURECN/NormalDB.R --outdir $OUT_REF \ |
... | ... |
@@ -277,7 +277,7 @@ mkdir $OUT/$SAMPLEID |
277 | 277 |
|
278 | 278 |
# Without a matched normal (minimal test run) |
279 | 279 |
$ Rscript $PURECN/PureCN.R --out $OUT/$SAMPLEID \ |
280 |
- --tumor $OUT/$SAMPLEID/${SAMPLEID}_coverage_loess.txt \ |
|
280 |
+ --tumor $OUT/$SAMPLEID/${SAMPLEID}_coverage_loess.txt.gz \ |
|
281 | 281 |
--sampleid $SAMPLEID \ |
282 | 282 |
--vcf ${SAMPLEID}_mutect.vcf \ |
283 | 283 |
--normaldb $OUT_REF/normalDB_hg19.rds \ |
... | ... |
@@ -286,7 +286,7 @@ $ Rscript $PURECN/PureCN.R --out $OUT/$SAMPLEID \ |
286 | 286 |
|
287 | 287 |
# Production pipeline run |
288 | 288 |
$ Rscript $PURECN/PureCN.R --out $OUT/$SAMPLEID \ |
289 |
- --tumor $OUT/$SAMPLEID/${SAMPLEID}_coverage_loess.txt \ |
|
289 |
+ --tumor $OUT/$SAMPLEID/${SAMPLEID}_coverage_loess.txt.gz \ |
|
290 | 290 |
--sampleid $SAMPLEID \ |
291 | 291 |
--vcf ${SAMPLEID}_mutect.vcf \ |
292 | 292 |
--statsfile ${SAMPLEID}_mutect_stats.txt \ |
... | ... |
@@ -300,8 +300,8 @@ $ Rscript $PURECN/PureCN.R --out $OUT/$SAMPLEID \ |
300 | 300 |
# With a matched normal (test run; for production pipelines we recommend the |
301 | 301 |
# unmatched workflow described above) |
302 | 302 |
$ Rscript $PURECN/PureCN.R --out $OUT/$SAMPLEID \ |
303 |
- --tumor $OUT/$SAMPLEID/${SAMPLEID}_coverage_loess.txt \ |
|
304 |
- --normal $OUT/$SAMPLEID/${SAMPLEID_NORMAL}_coverage_loess.txt \ |
|
303 |
+ --tumor $OUT/$SAMPLEID/${SAMPLEID}_coverage_loess.txt.gz \ |
|
304 |
+ --normal $OUT/$SAMPLEID/${SAMPLEID_NORMAL}_coverage_loess.txt.gz \ |
|
305 | 305 |
--sampleid $SAMPLEID \ |
306 | 306 |
--vcf ${SAMPLEID}_mutect.vcf \ |
307 | 307 |
--normaldb $OUT_REF/normalDB_hg19.rds \ |