Bioconductor Code: PureCN

Browse code

Initial support for CNVkit coverage data; started converting --outdir in command line tools to GATK-like --out for more file naming flexibility; Minor changes to quick vignette.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/PureCN@129450 bc3139a8-67e5-0310-9ffc-ced21a209358

Markus Riester authored on 08/05/2017 03:30:36
Showing 14 changed files

DESCRIPTION index 0730e1b..c53c844 100755
NAMESPACE index b641e30..ecdd66e 100755
NEWS index 4a49587..8020aa6 100755
R/callMutationBurden.R index c2e2603..97a9aaf 100644
R/readCoverageFile.R index c8cd8e5..8b525f0 100755
inst/extdata/Dx.R index a8fbca7..cb00155 100755
inst/extdata/PureCN.R index a9192d6..0547aef 100755
inst/extdata/example_normal3.cnn index 0000000..32653f8
inst/extdata/example_normal4.cnr index 0000000..bc7ae93
inst/unitTests/test_readCoverageFile.R index 60d8e04..e3a382a 100755
man/callMutationBurden.Rd index 671ae30..36bd85c 100644
man/readCoverageFile.Rd index abe9805..907cbf4 100644
vignettes/PureCN.Rnw index d3c3a28..46776e8 100755
vignettes/Quick.Rnw index b622041..6e6efc5 100755

DESCRIPTION

History View file @ 9ef0198

@@ -2,7 +2,7 @@ Package: PureCN
                      Type: Package
                      Title: Copy number calling and SNV classification using
                          targeted short read sequencing
                     -Version: 1.7.6
                     +Version: 1.7.7
                      Date: 2017-05-05
                      Authors@R: c(person("Markus", "Riester", role=c("aut", "cre"),
                              email="[email protected]"),
@@ -36,6 +36,7 @@ Imports:
                          futile.logger,
                          VGAM,
                          edgeR,
                     +    tools,
                          limma
                      Suggests:
                          PSCBS,

NAMESPACE

History View file @ 9ef0198

@@ -118,6 +118,7 @@ importFrom(stats,predict)
                      importFrom(stats,runif)
                      importFrom(stats,t.test)
                      importFrom(stats,weighted.mean)
                     +importFrom(tools,file_ext)
                      importFrom(utils,data)
                      importFrom(utils,head)
                      importFrom(utils,object.size)

NEWS

History View file @ 9ef0198

@@ -3,15 +3,16 @@ Changes in version 1.8.0
                      - Added mutation burden calculation
                      - Code cleanups (switch to GRanges where possible)
                     +- Added support for CNVkit coverage files (*.cnn, *.cnr)
                      API CHANGES
                     +- New function: callMutationBurden
                      - Defunct functions: createSNPBlacklist, getDiploid, autoCurateResults
                      - Minor changes:
                     -    - Remove gc.data from filterTargets since gc_bias is now added to tumor
                     +    - Removed gc.data from filterTargets since gc_bias is now added to tumor
                            coverage
+                    -
                      PLANNED FEATURES FOR 1.8
                      - Better sample summary statistics, like mutation burden, chromosomal

R/callMutationBurden.R

History View file @ 9ef0198

@@ -16,7 +16,8 @@
                      #' @param fun.countMutation Function that can be used to filter the
                      #' input VCF further for filtering, for example to only keep missense
                      #' mutations. Expects a \code{logical} vector indicating whether variant
                     -#' should be counted (\code{TRUE}) or not (\code{FALSE}).
                     +#' should be counted (\code{TRUE}) or not (\code{FALSE}). Default
                     +#' is to keep only single nucleotide variants.
                      #' @param callable \code{GRanges} object with callable genomic regions,
                      #' for example obtained by \sQuote{GATK CallableLoci} BED file, imported
                      #' with \code{rtracklayer}.
@@ -49,7 +50,8 @@
                      #'
                      #' @export callMutationBurden
                      callMutationBurden <- function(res, id = 1, remove.flagged = TRUE,
                     -    min.prior.somatic=0.1, min.cellfraction=0, fun.countMutation=NULL,
                     +    min.prior.somatic=0.1, min.cellfraction=0,
                     +    fun.countMutation=function(vcf) width(vcf)==1,
                          callable=NULL, exclude=NULL) {
                          if (is.null(res$input$vcf)) {

R/readCoverageFile.R

History View file @ 9ef0198

@@ -7,6 +7,8 @@
                      #' @param file Target coverage file.
                      #' @param format File format. If missing, derived from the file
                      #' extension. Currently only GATK DepthofCoverage format supported.
                     +#' @param zero Start position is 0-based. Default is \code{FALSE}
                     +#' for GATK, \code{TRUE} for BED file based intervals.
                      #' @return A \code{data.frame} with the parsed coverage information.
                      #' @author Markus Riester
                      #' @seealso \code{\link{calculateBamCoverageByInterval}}
@@ -16,14 +18,24 @@
                      #'     package="PureCN")
                      #' coverage <- readCoverageFile(tumor.coverage.file)
                      #'
                     +#' @importFrom tools file_ext
                      #' @export readCoverageFile
                     -readCoverageFile <- function(file, format) {
                     -    if (missing(format)) format <- "GATK"
                     -    targetCoverage <- .readCoverageGatk(file)
                     +readCoverageFile <- function(file, format, zero=NULL) {
                     +    if (missing(format)) format <- .getFormat(file)
                     +    if (format %in% c("cnn", "cnr")) {
                     +        targetCoverage <- .readCoverageCnn(file, zero)
                     +    } else {
                     +        targetCoverage <- .readCoverageGatk(file, zero)
                     +    }
                          .checkLowCoverage(targetCoverage)
                          .checkIntervals(targetCoverage)
+                     }
                     +.getFormat <- function(file) {
                     +    ext <- file_ext(file)
                     +    if (ext %in% c("cnn", "cnr")) return("cnn")
                     +    "GATK"
                     +}
                      #' Read GATK coverage files
                      #'
@@ -44,18 +56,37 @@ readCoverageGatk <- function(file) {
                          readCoverageFile(file, format="GATK")
+                     }
                     -.readCoverageGatk <- function(file) {
                     +.readCoverageGatk <- function(file, zero) {
                     +    if (!is.null(zero)) flog.warn("zero ignored for GATK coverage files.")
                          inputCoverage <- utils::read.table(file, header = TRUE)
                          if (is.null(inputCoverage$total_coverage)) inputCoverage$total_coverage <- NA
                          if (is.null(inputCoverage$average_coverage)) inputCoverage$average_coverage <- NA
                     +    if (is.null(inputCoverage$ontarget)) inputCoverage$ontarget <- TRUE
                          targetCoverage <- GRanges(inputCoverage$Target,
                              coverage=inputCoverage$total_coverage,
                     -        average.coverage=inputCoverage$average_coverage)
                     +        average.coverage=inputCoverage$average_coverage,
                     +        ontarget=inputCoverage$ontarget)
                          targetCoverage
+                     }
                     +.readCoverageCnn <- function(file, zero) {
                     +    if (is.null(zero)) zero <- TRUE
                     +    inputCoverage <- utils::read.table(file, header = TRUE)
                     +    if (zero) inputCoverage$start <- inputCoverage$start + 1
                     +    targetCoverage <- GRanges(inputCoverage)
                     +    targetCoverage$coverage <- targetCoverage$depth * width(targetCoverage)
                     +    targetCoverage$average.coverage <- targetCoverage$depth
                     +    targetCoverage$ontarget <- TRUE
                     +    targetCoverage$depth <- NULL
                     +    targetCoverage$Gene <- targetCoverage$gene
                     +    targetCoverage$ontarget[which(targetCoverage$Gene=="Background")] <- FALSE
                     +    targetCoverage$gene <- NULL
                     +    targetCoverage$log2 <- NULL
                     +    targetCoverage
                     +}
+                    +
                      .checkIntervals <- function(coverageGr) {
                          if (min(width(coverageGr))<2) {
                              flog.warn("Coverage data contains single nucleotide intervals.")

inst/extdata/Dx.R

History View file @ 9ef0198

@@ -10,7 +10,7 @@ spec <- matrix(c(
                      'rds',          'r', 1, "character",
                      'callable',     'a', 1, "character",
                      'exclude',      'b', 1, "character",
                     -'outdir',       'o', 1, "character"
                     +'out',          'o', 1, "character"
                      ), byrow=TRUE, ncol=4)
                      opt <- getopt(spec)
@@ -31,9 +31,6 @@ if (is.null(infileRds)) stop("Need --rds")
                      infileRds <- normalizePath(infileRds, mustWork=TRUE)
                      # Parse outdir
                     -outdir <- opt$outdir
                     -if (is.null(outdir)) outdir <- dirname(infileRds)
                     -outdir <- normalizePath(outdir, mustWork=TRUE)
                      # Parse both BED files restricting covered region
                      callableFile <- opt$callable
@@ -42,7 +39,7 @@ if (!is.null(callableFile)) {
                          suppressPackageStartupMessages(library(rtracklayer))
                          callableFile <- normalizePath(callableFile, mustWork=TRUE)
                          flog.info("Reading %s...", callableFile)
                     -    callable <- import(callableFile)
                     +    callable <- GRanges(import(callableFile))
+                     }
                      excludeFile <- opt$exclude
@@ -51,7 +48,7 @@ if (!is.null(excludeFile)) {
                          suppressPackageStartupMessages(library(rtracklayer))
                          excludeFile <- normalizePath(excludeFile, mustWork=TRUE)
                          flog.info("Reading %s...", excludeFile)
                     -    exclude <- import(excludeFile)
                     +    exclude <- GRanges(import(excludeFile))
+                     }
@@ -60,12 +57,28 @@ if (!is.null(excludeFile)) {
                      flog.info("Loading PureCN...")
                      suppressPackageStartupMessages(library(PureCN))
                     -flog.info("Reading %s...", infileRds)
                      res <- readCurationFile(infileRds)
                      sampleid <- res$input$sampleid
                     -file.suffix <- ""
                     -outfileMb <- file.path(outdir, paste0(sampleid, file.suffix, '_mutation_burden.csv'))
                     +.getOutPrefix <- function(opt, infile, sampleid) {
                     +    out <- opt[["out"]]
                     +    if (is.null(out)) {
                     +        if (!is.null(infile) && file.exists(infile)) {
                     +            outdir <- dirname(infile)
                     +            prefix <- sampleid
                     +        } else {
                     +            stop("Need --out")
                     +        }
                     +    } else {
                     +        outdir <- dirname(out)
                     +        prefix <- basename(out)
                     +    }
                     +    outdir <- normalizePath(outdir, mustWork=TRUE)
                     +    outPrefix <- file.path(outdir, prefix)
                     +}
                     +outPrefix <- .getOutPrefix(opt, infileRds, sampleid)
+                    +
                     +outfileMb <- paste0(outPrefix, '_mutation_burden.csv')
                      force <- !is.null(opt$force)

inst/extdata/PureCN.R

History View file @ 9ef0198

@@ -30,7 +30,7 @@ spec <- matrix(c(
                      'modelhomozygous','y', 0, "logical",
                      'model',          'q', 1, "character",
                      'funsegmentation','w', 1, "character",
                     -'outdir',         'o', 1, "character",
                     +'outdir',            'o', 1, "character",
                      'outvcf',         'u', 0, "logical",
                      'twopass',        'T', 0, "logical",
                      'sampleid',       'i', 1, "character"
@@ -78,7 +78,7 @@ outvcf <- !is.null(opt$outvcf)
                      pool <- opt$pool
                      model.homozygous <- !is.null(opt$modelhomozygous)
                      file.rds <- opt$rds
                     -file.suffix <- ''
                     +sampleidExtension <- if (is.null(opt$extension)) '' else opt$extension
                      if (!is.null(file.rds) && file.exists(file.rds)) {
                          if (is.null(outdir)) outdir <- dirname(file.rds)
@@ -86,7 +86,7 @@ if (!is.null(file.rds) && file.exists(file.rds)) {
                          if (is.null(sampleid)) stop("Need --sampleid.")
                          if (is.null(genome)) stop("Need --genome")
                          genome <- as.character(genome)
                     -    file.rds <- file.path(outdir, paste0(sampleid, file.suffix, '.rds'))
                     +    file.rds <- file.path(outdir, paste0(sampleid, sampleidExtension, '.rds'))
                          if (is.null(seg.file)) {
                              tumor.coverage.file <- normalizePath(tumor.coverage.file,
                                  mustWork=TRUE)
@@ -159,9 +159,9 @@ if (file.exists(file.rds) && !force) {
+                         }
                          normal.coverage.file <- .getNormalCoverage(normal.coverage.file)
                     -    file.log <- file.path(outdir, paste0(sampleid, file.suffix, '.log'))
                     +    file.log <- file.path(outdir, paste0(sampleid, sampleidExtension, '.log'))
                     -    pdf(paste(outdir,"/", sampleid, file.suffix, '_segmentation.pdf', sep=''),
                     +    pdf(paste(outdir,"/", sampleid, sampleidExtension, '_segmentation.pdf', sep=''),
                              width=10, height=11)
                          af.range = c(0.03, 0.97)
                          if (!is.null(opt$minaf)) {
@@ -232,43 +232,43 @@ if (file.exists(file.rds) && !force) {
                      ### Create output files -------------------------------------------------------
                      createCurationFile(file.rds)
                     -file.pdf <- file.path(outdir, paste0(sampleid, file.suffix, '.pdf'))
                     +file.pdf <- file.path(outdir, paste0(sampleid, sampleidExtension, '.pdf'))
                      pdf(file.pdf, width=10, height=11)
                      plotAbs(ret, type='all')
                      dev.off()
                     -file.png <- file.path(outdir, paste0(sampleid, file.suffix, '_contamination.png'))
                     +file.png <- file.path(outdir, paste0(sampleid, sampleidExtension, '_contamination.png'))
                      png(file.png, width=800)
                      plotAbs(ret,1, type='contamination')
                      dev.off()
                      if (outvcf) {
                     -    file.vcf <- file.path(outdir, paste0(sampleid, file.suffix, '.vcf'))
                     +    file.vcf <- file.path(outdir, paste0(sampleid, sampleidExtension, '.vcf'))
                          vcfanno <- predictSomatic(ret, return.vcf=TRUE,
                              vcf.field.prefix="PureCN.")
                          writeVcf(vcfanno, file=file.vcf)
                      } else {
                     -    file.csv <- file.path(outdir, paste0(sampleid, file.suffix, '_variants.csv'))
                     +    file.csv <- file.path(outdir, paste0(sampleid, sampleidExtension, '_variants.csv'))
                          write.csv(cbind(Sampleid=sampleid, predictSomatic(ret)), file=file.csv,
                              row.names=FALSE, quote=FALSE)
+                     }
                     -file.loh <- file.path(outdir, paste0(sampleid, file.suffix, '_loh.csv'))
                     +file.loh <- file.path(outdir, paste0(sampleid, sampleidExtension, '_loh.csv'))
                      write.csv(cbind(Sampleid=sampleid, callLOH(ret)), file=file.loh,
                          row.names=FALSE, quote=FALSE)
                     -file.genes <- file.path(outdir, paste0(sampleid, file.suffix, '_genes.csv'))
                     +file.genes <- file.path(outdir, paste0(sampleid, sampleidExtension, '_genes.csv'))
                      allAlterations <- callAlterations(ret, all.genes=TRUE)
                      write.csv(cbind(Sampleid=sampleid, gene.symbol=rownames(allAlterations),
                          allAlterations), row.names=FALSE, file=file.genes, quote=FALSE)
                     -file.seg <- file.path(outdir, paste0(sampleid, file.suffix, '_dnacopy.txt'))
                     +file.seg <- file.path(outdir, paste0(sampleid, sampleidExtension, '_dnacopy.txt'))
                      write.table(ret$results[[1]]$seg, file=file.seg, sep="\t", quote=FALSE,
                          row.names=FALSE)
                      if (!is.null(ret$input$vcf)) {
                     -    file.pdf <- file.path(outdir, paste0(sampleid, file.suffix, '_chromosomes.pdf'))
                     +    file.pdf <- file.path(outdir, paste0(sampleid, sampleidExtension, '_chromosomes.pdf'))
                          pdf(file.pdf, width=9, height=10)
                          vcf <- ret$input$vcf[ret$results[[1]]$SNV.posterior$vcf.ids]
                          chromosomes <- seqlevelsInUse(vcf)

inst/extdata/example_normal3.cnn

History View file @ 9ef0198

                     new file mode 100644
@@ -0,0 +1,5 @@
                     +chromosome	start	end	gene	depth	log2
                     +chr1	762097	762270	LINC00115	174.89	7.45031
                     +chr1	861281	861490	SAMD11	28.9043	4.85321
                     +chr1	865591	865791	SAMD11	51.26	5.67976
                     +chr1	866325	866498	SAMD11	14	3.80735

inst/extdata/example_normal4.cnr

History View file @ 9ef0198

                     new file mode 100644
@@ -0,0 +1,6 @@
                     +chromosome	start	end	gene	log2	depth	weight
                     +chr1	10500	68590	Background	0.55584	0.70587	0.466868
                     +chr1	70509	176917	Background	0.235896	1.02411	0.482562
                     +chr1	227917	267219	Background	0.163203	0.387996	0.408305
                     +chr1	318219	367158	Background	0.375418	1.42616	0.424955
                     +chr1	367658	367893	.	0.68569	17.617	0.310347

inst/unitTests/test_readCoverageFile.R

History View file @ 9ef0198

@@ -9,4 +9,20 @@ test_readCoverageFile <- function() {
                          checkEquals(3, length(coverage))
                          checkEqualsNumeric(c(1216042, 1216606, 1216791), start(coverage))
                          checkEqualsNumeric(c(1216050, 1216678, 1217991), end(coverage))
+                    +
                     +    coverageFile <- system.file("extdata", "example_normal3.cnn", package="PureCN")
                     +    coverage <- readCoverageFile(coverageFile)
                     +    checkEquals(4, length(coverage))
                     +    checkEqualsNumeric(c(762097, 861281, 865591, 866325)+1, start(coverage))
                     +    checkEqualsNumeric(c(762270, 861490, 865791, 866498), end(coverage))
                     +    coverage <- readCoverageFile(coverageFile, zero=FALSE)
                     +    checkEquals(4, length(coverage))
                     +    checkEqualsNumeric(c(762097, 861281, 865591, 866325), start(coverage))
                     +    checkEqualsNumeric(c(762270, 861490, 865791, 866498), end(coverage))
                     +    coverageFile <- system.file("extdata", "example_normal4.cnr", package="PureCN")
                     +    coverage <- readCoverageFile(coverageFile)
                     +    checkEquals(5, length(coverage))
                     +    checkEqualsNumeric(c(10500, 70509, 227917, 318219, 367658)+1, start(coverage))
                     +    checkEqualsNumeric(c(68590, 176917, 267219, 367158, 367893), end(coverage))
                     +    checkEquals(c(FALSE, FALSE, FALSE, FALSE, TRUE), coverage$ontarget)
+                     }

man/callMutationBurden.Rd

History View file @ 9ef0198

@@ -5,8 +5,9 @@
                      \title{Call mutation burden}
                      \usage{
                      callMutationBurden(res, id = 1, remove.flagged = TRUE,
                     -  min.prior.somatic = 0.1, min.cellfraction = 0, fun.countMutation = NULL,
                     -  callable = NULL, exclude = NULL)
                     +  min.prior.somatic = 0.1, min.cellfraction = 0,
                     +  fun.countMutation = function(vcf) width(vcf) == 1, callable = NULL,
                     +  exclude = NULL)
+                     }
                      \arguments{
                      \item{res}{Return object of the \code{\link{runAbsoluteCN}} function.}
@@ -27,7 +28,8 @@ with very low allelic fraction.}
                      \item{fun.countMutation}{Function that can be used to filter the
                      input VCF further for filtering, for example to only keep missense
                      mutations. Expects a \code{logical} vector indicating whether variant
                     -should be counted (\code{TRUE}) or not (\code{FALSE}).}
                     +should be counted (\code{TRUE}) or not (\code{FALSE}). Default
                     +is to keep only single nucleotide variants.}
                      \item{callable}{\code{GRanges} object with callable genomic regions,
                      for example obtained by \sQuote{GATK CallableLoci} BED file, imported

man/readCoverageFile.Rd

History View file @ 9ef0198

@@ -4,13 +4,16 @@
                      \alias{readCoverageFile}
                      \title{Read coverage file}
                      \usage{
                     -readCoverageFile(file, format)
                     +readCoverageFile(file, format, zero = NULL)
+                     }
                      \arguments{
                      \item{file}{Target coverage file.}
                      \item{format}{File format. If missing, derived from the file
                      extension. Currently only GATK DepthofCoverage format supported.}
+                    +
                     +\item{zero}{Start position is 0-based. Default is \code{FALSE}
                     +for GATK, \code{TRUE} for BED file based intervals.}
+                     }
                      \value{
                      A \code{data.frame} with the parsed coverage information.

vignettes/PureCN.Rnw

History View file @ 9ef0198

@@ -1205,14 +1205,14 @@ Dx.R extracts copy number and mutation metrics from PureCN.R output.
                      \begin{verbatim}
                      # Basic output
                     -Rscript Dx.R --outdir $OUT --rds Sample1_purecn.rds
                     +Rscript Dx.R --out ${OUT}/Sample1 --rds Sample1_purecn.rds
                      # Provide a BED file with callable regions, for examples obtained by
                      # GATK CallableLoci. Useful to calculate mutations per megabase and
                      # to exclude low quality regions.
                      grep CALLABLE Sample1_callable_status.bed > \
                          Sample1_callable_status_filtered.bed
                     -Rscript Dx.R --outdir $OUT  --rds Sample1_purecn.rds \
                     +Rscript Dx.R --out ${OUT}/Sample1  --rds Sample1_purecn.rds \
                          --callable Sample1_callable_status_filtered.bed
                      \end{verbatim}
@@ -1228,7 +1228,7 @@ Argument name  & Corresponding PureCN argument & PureCN function \\
                      --rds -r      & file.rds & \Rfunction{readCurationFile}   \\
                      --callable -a & callable & \Rfunction{callMutationBurden} \\
                      --exclude -b  & exclude  & \Rfunction{callMutationBurden} \\
                     +--out -o      & & \\
                      \bottomrule
                      \end{tabular}
                      \end{table*}

vignettes/Quick.Rnw

History View file @ 9ef0198

@@ -25,13 +25,15 @@ vignette.
                      \subsection*{Prepare environment and files}
                     -Get the path to command line scripts in R:
                     +\begin{itemize}
+                    +
                     +\item Get the path to command line scripts in R:
                      <<paths>>=
                      system.file("extdata", package="PureCN")
+                     @
                     -Store this path in an environment variable, for example in BASH:
                     +\item Store this path in an environment variable, for example in BASH:
                      \begin{verbatim}
                      $ export PURECN="/path/to/PureCN/extdata"
@@ -39,11 +41,11 @@ $ Rscript $PURECN/PureCN.R --help
                      Usage: /path/to/PureCN/inst/extdata/PureCN.R [-[-help|h]] ...
                      \end{verbatim}
                     -Generate a basic interval file from a BED file containing target coordinates:
                     +\item Generate a basic interval file from a BED file containing target coordinates:
                      \begin{verbatim}
                     -$ Rscript $PURECN/IntervalFile.R --infile ex_intervals.bed \
                     -    --fasta ex_reference.fa --outfile ex_gcgene.txt
                     +$ Rscript $PURECN/IntervalFile.R --infile baits_hg19.bed \
                     +    --fasta hg19.fa --outfile baits_hg19_gcgene.txt
                      \end{verbatim}
                      Internally, this script uses \Biocpkg{rtracklayer} to parse the
@@ -53,6 +55,8 @@ See the main vignette how to add gene symbols to the interval file. Symbols are
                      necessary to obtain gene-level copy number and LOH calls. For a test run, you
                      will not need this.
                     +\end{itemize}
+                    +
                      \subsection*{Run PureCN with third-party segmentation}
                      If you already have a segmentation from third-party tools (for example CNVkit,
@@ -62,8 +66,8 @@ EXCAVATOR2). For a test run:
                      Rscript $PURECN/PureCN.R --outdir $OUT/$SAMPLEID  \
                          --sampleid $SAMPLEID \
                          --segfile $OUT/$SAMPLEID/${SAMPLEID}.cnvkit.seg \
                     -    --vcf $VCF_FILE \
                     -    --genome hg19 --gcgene ex_gcgene.txt
                     +    --vcf ${SAMPLEID}_mutect.vcf \
                     +    --genome hg19 --gcgene baits_hg19_gcgene.txt
                      \end{verbatim}
                      The main VCF (\Rcode{--vcf}) is ideally created by \software{MuTect} 1.1.7.
@@ -77,12 +81,12 @@ and genome:
                      \begin{verbatim}
                      Rscript $PURECN/PureCN.R --outdir $OUT/$SAMPLEID  \
                          --sampleid $SAMPLEID \
                     -    --segfile $OUT/$SAMPLEID/${SAMPLEID}.cnvkit.seg \
                     +    --segfile $OUT/$SAMPLEID/${SAMPLEID}_cnvkit.seg \
                          --normal_panel $NORMAL_PANEL \
                     -    --vcf $VCF_FILE \
                     -    --statsfile $VCF_STATS_FILE \
                     +    --vcf ${SAMPLEID}_mutect.vcf \
                     +    --statsfile ${SAMPLEID}_mutect_stats.txt \
                          --snpblacklist hg19_simpleRepeats.bed \
                     -    --genome hg19 --gcgene ex_gcgene.txt \
                     +    --genome hg19 --gcgene baits_hg19_gcgene.txt \
                      #    --funsegmentation none \
                          --force --postoptimize
                      \end{verbatim}
@@ -105,7 +109,8 @@ This results in a significant runtime increase for whole-exome data.
                      The following describes \Biocpkg{PureCN} runs with internal copy number
                      normalization and segmentation. Provided are again minimal examples for
                     -test runs.
                     +test runs. See the main vignette how to get optimal results in production
                     +pipelines.
                      \subsubsection*{Coverage}
@@ -114,13 +119,13 @@ For each sample, tumor and normal:
                      \begin{verbatim}
                      # From a BAM file
                      $ Rscript $PURECN/Coverage.R --outdir $OUT/$SAMPLEID \
                     -    --bam example.bam \
                     -    --gcgene ex_gcgene.txt
                     +    --bam ${SAMPLEID}.bam \
                     +    --gcgene baits_hg19_gcgene.txt
                      # From a GATK DepthOfCoverage file
                      Rscript $PURECN/Coverage.R --outdir $OUT/$SAMPLEID \
                     -    --gatkcoverage example_tumor.txt \
                     -    --gcgene  ex_gcgene.txt
                     +    --gatkcoverage ${SAMPLEID}.coverage.sample_interval_summary \
                     +    --gcgene  baits_hg19_gcgene.txt
                      \end{verbatim}
                      \subsubsection*{NormalDB}
@@ -142,17 +147,23 @@ $ Rscript $PURECN/NormalDB.R --outdir $OUT \
                      \begin{verbatim}
                      cd $OUT/$SAMPLEID
                      # From GC-normalized coverage data
                     -$ Rscript $PURECN/PureCN.R --outdir . --tumor example_tumor_loess.txt \
                     -    --normal example_normal_loess.txt --vcf $VCF_FILE -i $SAMPLEID \
                     -    --genome hg19 --gcgene  ex_gcgene.txt
                     +$ Rscript $PURECN/PureCN.R --outdir . --tumor ${SAMPLEID}_coverage_loess.txt \
                     +    --normal ${SAMPLEID_NORMAL}_coverage_loess.txt \
                     +    --sampleid $SAMPLEID \
                     +    --vcf ${SAMPLEID}_mutect.vcf \
                     +    --genome hg19 \
                     +    --gcgene baits_hg19_gcgene.txt
                      # Without a matched normal
                     -$ Rscript $PURECN/PureCN.R --outdir . --tumor example_tumor_loess.txt \
                     -    --normaldb ../normalDB_hg19.rds --pool 5 --vcf example_vcf.vcf -i $SAMPLEID \
                     -    --genome hg19 --gcgene example_gc.gene.file.txt
                     +$ Rscript $PURECN/PureCN.R --outdir . --tumor ${SAMPLEID}_coverage_loess.txt \
                     +    --normaldb ../normalDB_hg19.rds \
                     +    --sampleid $SAMPLEID \
                     +    --vcf ${SAMPLEID}_mutect.vcf \
                     +    --pool 5 \
                     +    --genome hg19 --gcgene baits_hg19_gcgene.txt
                      # Recreate output after manual curation of Sample_purecn.csv
                     -$ Rscript $PURECN/PureCN.R --rds Sample1_purecn.rds
                     +$ Rscript $PURECN/PureCN.R --rds ${SAMPLEID}_purecn.rds
                      \end{verbatim}
                      \subsection*{Session Info}