Bioconductor Code: PureCN

Browse code

Added support for gzipped output files (closes #106).

Markus Riester authored on 06/10/2019 21:44:29
Showing 10 changed files

DESCRIPTION index c306ec4..f3e0d55 100644
NAMESPACE index 70f8a42..1130a39 100755
NEWS index 42aaf47..4d5801d 100755
R/calculateBamCoverageByInterval.R index f118b8b..142a4c5 100644
R/calculateIntervalWeights.R index 836f111..44a648a 100644
R/correctCoverageBias.R index 184ec08..e420725 100644
R/preprocessIntervals.R index 0c07cfd..baa676c 100644
R/processMultipleSamples.R index 3780f06..3d7cd3b 100644
inst/extdata/Coverage.R index e047c12..c5958c1 100644
vignettes/Quick.Rmd index b1e1078..69a1493 100755

History View file @ 67a695e

@@ -2,8 +2,8 @@ Package: PureCN
                      Type: Package
                      Title: Copy number calling and SNV classification using
                          targeted short read sequencing
                     -Version: 1.15.22
                     -Date: 2019-10-05
                     +Version: 1.15.23
                     +Date: 2019-10-06
                      Authors@R: c(person("Markus", "Riester",
                                          role = c("aut", "cre"),
                                          email = "[email protected]",

NAMESPACE

History View file @ 67a695e

@@ -86,6 +86,7 @@ importFrom(VGAM,dbetabinom.ab)
                      importFrom(VGAM,vglm)
                      importFrom(data.table,data.table)
                      importFrom(data.table,fread)
                     +importFrom(data.table,fwrite)
                      importFrom(futile.logger,appender.tee)
                      importFrom(futile.logger,flog.appender)
                      importFrom(futile.logger,flog.debug)
@@ -166,4 +167,3 @@ importFrom(utils,read.csv)
                      importFrom(utils,read.delim)
                      importFrom(utils,tail)
                      importFrom(utils,write.csv)
                     -importFrom(utils,write.table)

NEWS

History View file @ 67a695e

@@ -33,6 +33,9 @@ SIGNIFICANT USER-VISIBLE CHANGES
                            weights when available
                          o Changed default of min.target.width in preprocessIntervals from 10 to 100
                            (#73)
                     +    o replaced write.table with data.table::fwrite to automatically support
                     +      producing gzipped output (requires data.table 1.12.4, #106)
                     +    o Coverage.R now gzips BAM file coverage (requires data.table 1.12.4, #106)
                      BUGFIXES

R/calculateBamCoverageByInterval.R

History View file @ 67a695e

@@ -83,7 +83,7 @@ calculateBamCoverageByInterval <- function(bam.file, interval.file,
         on_target = intervalGr$on.target,
         duplication_rate = intervalGr$duplication.rate
     )
-    write.table(tmp, file = output.file, row.names = FALSE, quote = FALSE)
+    fwrite(tmp, file = output.file, row.names = FALSE, quote = FALSE)
 }
 
 .filterDuplicates <- function(x) {

R/calculateIntervalWeights.R

History View file @ 67a695e

@@ -88,7 +88,7 @@ old_method = FALSE) {
                                  Target = as.character(ret$weights),
                                  weights = ret$weights$weights)
                     -        write.table(ret_output, file = interval.weight.file, row.names = FALSE,
                     +        fwrite(ret_output, file = interval.weight.file, row.names = FALSE,
                                          quote = FALSE, sep = "\t")
+                         }
                          if (plot) .plotIntervalWeights(lrs.sd, width(tumor.coverage[[1]]),

R/correctCoverageBias.R

History View file @ 67a695e

@@ -42,7 +42,7 @@ globalVariables(names=c("..level.."))
                      #'             coord_trans
                      #' @importFrom gridExtra grid.arrange
                      #' @importFrom stats loess lm predict
                     -#' @importFrom utils write.table
                     +#' @importFrom data.table fwrite
                      correctCoverageBias <- function(coverage.file, interval.file,
                      output.file = NULL, plot.bias = FALSE, plot.max.density = 50000,
                      output.qc.file = NULL) {
@@ -105,7 +105,8 @@ output.qc.file = NULL) {
                          colnames(qc)[5:10] <- paste0("mom.", c("raw", "raw", "post.gc", "post.gc",
                                                   "post.reptiming", "post.reptiming"),
                                                   ".", rep(c("ontarget", "offtarget"),3))
                     -    write.table(qc, file = output.qc.file, row.names = FALSE, quote = FALSE)
                     +    fwrite(qc, file = output.qc.file, row.names = FALSE, quote = FALSE,
                     +        sep = " ")
+                     }
                      .createCoverageGgplot <- function(raw, normalized, plot.max.density, x, log = FALSE) {

R/preprocessIntervals.R

History View file @ 67a695e

@@ -320,7 +320,7 @@ calculateGCContentByInterval <- function() {
         Gene=interval.gr$Gene,
         on_target=interval.gr$on.target
     )    
-    write.table(tmp, file=output.file, row.names=FALSE, quote=FALSE, sep="\t")
+    fwrite(tmp, file = output.file, row.names = FALSE, quote = FALSE, sep = "\t")
 }
 
 .checkSeqlengths <- function(ref, x) {

R/processMultipleSamples.R

History View file @ 67a695e

@@ -129,10 +129,10 @@ processMultipleSamples <- function(tumor.coverage.files, sampleids, normalDB,
                          rownames(lrsm) <- NULL
                          lrsm[idx.enough.markers,]
                          #transform to DNAcopy format
                     -    m <- data.table::melt(lrsm, id.vars=1:5)
                     +    m <- data.table::melt(data.table(lrsm), id.vars=1:5)
                          m <- m[, c(6,1,3,4,5,7)]
                          colnames(m) <- c("ID", "chrom", "loc.start", "loc.end", "num.mark", "seg.mean")
                     -    m
                     +    data.frame(m)
+                     }
                      .add_weights_to_normaldb <- function(interval.weight.file, normalDB = NULL) {

inst/extdata/Coverage.R

History View file @ 67a695e

@@ -72,6 +72,14 @@ interval.file <- normalizePath(interval.file, mustWork = TRUE)
                          files
+                     }
                     +checkDataTableVersion <- function() {
                     +    if (compareVersion(package.version("data.table"), "1.12.4") < 0) {
                     +        flog.fatal("data.table package is outdated. >= 1.12.4 required")
                     +        q(status = 0)
                     +    }
                     +    return(TRUE)
                     +}
+                    +
                      getCoverageBams <- function(bamFiles, indexFiles, outdir, interval.file,
                          force = FALSE, keep.duplicates = FALSE, removemapq0 = FALSE) {
@@ -83,7 +91,8 @@ getCoverageBams <- function(bamFiles, indexFiles, outdir, interval.file,
                          .getCoverageBam <- function(bam.file, index.file = NULL, outdir,
                              interval.file, force) {
                     -        output.file <- file.path(outdir,  gsub(".bam$", "_coverage.txt",
                     +        checkDataTableVersion()
                     +        output.file <- file.path(outdir,  gsub(".bam$", "_coverage.txt.gz",
                                  basename(bam.file)))
                              futile.logger::flog.info("Processing %s...", output.file)
                              if (!is.null(index.file)) {
@@ -160,10 +169,11 @@ if (!is.null(bam.file)) {
                      ### GC-normalize coverage -----------------------------------------------------
                      .gcNormalize <- function(gatk.coverage, interval.file, outdir, force) {
                     -    output.file <- file.path(outdir,  gsub(".txt$|_interval_summary",
                     -        "_loess.txt", basename(gatk.coverage)))
                     -    outpng.file <- sub("txt$", "png", output.file)
                     -    output.qc.file <- sub(".txt$", "_qc.txt", output.file)
                     +    checkDataTableVersion()
                     +    output.file <- file.path(outdir,  gsub(".txt$|.txt.gz$|_interval_summary",
                     +        "_loess.txt.gz", basename(gatk.coverage)))
                     +    outpng.file <- sub("txt.gz$", "png", output.file)
                     +    output.qc.file <- sub(".txt.gz$", "_qc.txt", output.file)
                          if (file.exists(output.file) && !force) {
                              flog.info("%s exists. Skipping... (--force will overwrite)", output.file)

vignettes/Quick.Rmd

History View file @ 67a695e

@@ -227,7 +227,7 @@ To build a normal database for coverage normalization, copy the paths to all
                      GC-normalized normal coverage files in a single text file, line-by-line:
                      ```
                     -ls -a normal*loess.txt | cat > example_normal.list
                     +ls -a normal*loess.txt.gz | cat > example_normal.list
                      # From already GC-normalized files
                      $ Rscript $PURECN/NormalDB.R --outdir $OUT_REF \
@@ -277,7 +277,7 @@ mkdir $OUT/$SAMPLEID
                      # Without a matched normal (minimal test run)
                      $ Rscript $PURECN/PureCN.R --out $OUT/$SAMPLEID \
                     -    --tumor $OUT/$SAMPLEID/${SAMPLEID}_coverage_loess.txt \
                     +    --tumor $OUT/$SAMPLEID/${SAMPLEID}_coverage_loess.txt.gz \
                          --sampleid $SAMPLEID \
                          --vcf ${SAMPLEID}_mutect.vcf \
                          --normaldb $OUT_REF/normalDB_hg19.rds \
@@ -286,7 +286,7 @@ $ Rscript $PURECN/PureCN.R --out $OUT/$SAMPLEID \
                      # Production pipeline run
                      $ Rscript $PURECN/PureCN.R --out $OUT/$SAMPLEID \
                     -    --tumor $OUT/$SAMPLEID/${SAMPLEID}_coverage_loess.txt \
                     +    --tumor $OUT/$SAMPLEID/${SAMPLEID}_coverage_loess.txt.gz \
                          --sampleid $SAMPLEID \
                          --vcf ${SAMPLEID}_mutect.vcf \
                          --statsfile ${SAMPLEID}_mutect_stats.txt \
@@ -300,8 +300,8 @@ $ Rscript $PURECN/PureCN.R --out $OUT/$SAMPLEID \
                      # With a matched normal (test run; for production pipelines we recommend the
                      # unmatched workflow described above)
                      $ Rscript $PURECN/PureCN.R --out $OUT/$SAMPLEID \
                     -    --tumor $OUT/$SAMPLEID/${SAMPLEID}_coverage_loess.txt \
                     -    --normal $OUT/$SAMPLEID/${SAMPLEID_NORMAL}_coverage_loess.txt \
                     +    --tumor $OUT/$SAMPLEID/${SAMPLEID}_coverage_loess.txt.gz \
                     +    --normal $OUT/$SAMPLEID/${SAMPLEID_NORMAL}_coverage_loess.txt.gz \
                          --sampleid $SAMPLEID \
                          --vcf ${SAMPLEID}_mutect.vcf \
                          --normaldb $OUT_REF/normalDB_hg19.rds \

...	...	@@ -320,7 +320,7 @@ calculateGCContentByInterval <- function() {
320	320	Gene=interval.gr$Gene,
321	321	on_target=interval.gr$on.target
322	322	)
323		- write.table(tmp, file=output.file, row.names=FALSE, quote=FALSE, sep="\t")
	323	+ fwrite(tmp, file = output.file, row.names = FALSE, quote = FALSE, sep = "\t")
324	324	}
325	325
326	326	.checkSeqlengths <- function(ref, x) {

...	...	@@ -83,7 +83,7 @@ calculateBamCoverageByInterval <- function(bam.file, interval.file,
83	83	on_target = intervalGr$on.target,
84	84	duplication_rate = intervalGr$duplication.rate
85	85	)
86		- write.table(tmp, file = output.file, row.names = FALSE, quote = FALSE)
	86	+ fwrite(tmp, file = output.file, row.names = FALSE, quote = FALSE)
87	87	}
88	88
89	89	.filterDuplicates <- function(x) {