Browse code

Switch to futile.logger; Added --version flags to command line tools; worked on contamination rate estimation (still alpha);log-ratio smoothing now in runAbsoluteCN, not in segmentation function.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/PureCN@125770 bc3139a8-67e5-0310-9ffc-ced21a209358

Markus Riester authored on 08/01/2017 23:26:53
Showing 40 changed files

... ...
@@ -2,8 +2,8 @@ Package: PureCN
2 2
 Type: Package
3 3
 Title: Copy number calling and SNV classification using
4 4
     targeted short read sequencing
5
-Version: 1.5.32
6
-Date: 2017-01-03
5
+Version: 1.5.33
6
+Date: 2017-01-06
7 7
 Authors@R: c(person("Markus", "Riester", role=c("aut", "cre"),
8 8
         email="[email protected]"),
9 9
     person("Angad P.", "Singh", role="aut"))
... ...
@@ -13,7 +13,7 @@ Description: This package estimates tumor purity, copy number, loss of
13 13
     with standard somatic variant detection pipelines, and has support for
14 14
     tumor samples without matching normal samples.
15 15
 Depends:
16
-    R (>= 3.2),
16
+    R (>= 3.3),
17 17
     DNAcopy,
18 18
     VariantAnnotation (>= 1.14.1)
19 19
 Imports:
... ...
@@ -32,6 +32,7 @@ Imports:
32 32
     Biostrings,
33 33
     rtracklayer,
34 34
     ggplot2,
35
+    futile.logger,
35 36
     edgeR,
36 37
     limma
37 38
 Suggests:
... ...
@@ -55,13 +55,22 @@ importFrom(S4Vectors,queryHits)
55 55
 importFrom(S4Vectors,subjectHits)
56 56
 importFrom(SummarizedExperiment,rowRanges)
57 57
 importFrom(data.table,data.table)
58
+importFrom(futile.logger,appender.tee)
59
+importFrom(futile.logger,flog.appender)
60
+importFrom(futile.logger,flog.fatal)
61
+importFrom(futile.logger,flog.info)
62
+importFrom(futile.logger,flog.threshold)
63
+importFrom(futile.logger,flog.warn)
58 64
 importFrom(ggplot2,aes)
59 65
 importFrom(ggplot2,aes_string)
60 66
 importFrom(ggplot2,element_text)
61 67
 importFrom(ggplot2,facet_wrap)
68
+importFrom(ggplot2,geom_boxplot)
69
+importFrom(ggplot2,geom_hline)
62 70
 importFrom(ggplot2,geom_line)
63 71
 importFrom(ggplot2,geom_point)
64 72
 importFrom(ggplot2,ggplot)
73
+importFrom(ggplot2,labs)
65 74
 importFrom(ggplot2,scale_alpha_continuous)
66 75
 importFrom(ggplot2,stat_density2d)
67 76
 importFrom(ggplot2,theme)
... ...
@@ -6,6 +6,7 @@ Changes in version 1.6.0
6 6
 - Polished plots, added new GC-normalization and volcano plots
7 7
 - Support for cell lines
8 8
 - Improved somatic vs. germline status calling
9
+- Contamination rate estimation.
9 10
 - Better mapping bias estimation and correction
10 11
 - Better copy number normalization using multiple best normals
11 12
 - New GC-normalization for smaller gene panels
... ...
@@ -16,6 +17,8 @@ Changes in version 1.6.0
16 17
 - Faster post.optimize=TRUE by not optimizing poor fits.
17 18
 - Tweaks to segmentationPSCBS
18 19
 - Lots of improvements to command line scripts.
20
+- Code cleanups (switch from inlinedocs to roxygen, from message/warn to
21
+  futile.logger)
19 22
 
20 23
 
21 24
 API CHANGES
... ...
@@ -40,6 +43,14 @@ API CHANGES
40 43
         normalized so that w[1] is 1
41 44
     - Removed ... from runAbsoluteCN
42 45
     - added centromeres to segmentation function.
46
+    - contamination.cutoff now specifies fraction of SNPs necessary to
47
+      call sample contaminated. Does not remove anymore, since these
48
+      are obviously informative for contamination rate estimation in the
49
+      PureCN model.
50
+    - Removed verbose from most functions, since messages are now controlled
51
+      with futile.logger.  
52
+    - Smoothing of log-ratios now optionally done by runAbsoluteCN, not
53
+      segmentation function.  
43 54
  
44 55
  
45 56
 PLANNED FEATURES 
... ...
@@ -50,3 +61,13 @@ PLANNED FEATURES
50 61
   MYC)
51 62
 - Switch to S4 data structures (maybe)
52 63
 - Whole dataset visualizations (maybe PureCN 1.8)
64
+
65
+
66
+ROADMAP
67
+
68
+- January 15: API freeze (no changes to existing API)
69
+- February 1: Defaults freeze (no tuning of default parameters anymore)
70
+- February 15: Feature freeze (only bugfixes, code cleanups and speedups
71
+  and documentation improvements) 
72
+- March 15: Code freeze (only bugfixes and documentation improvements)
73
+- April: Release
... ...
@@ -64,7 +64,7 @@ c(test.num.copy, round(opt.C))[i], prior.K))
64 64
 }    
65 65
 
66 66
 .calcSNVLLik <- function(vcf, tumor.id.in.vcf, ov, p, test.num.copy, 
67
-    C.posterior, C, opt.C, snv.model, prior.somatic, mapping.bias, snv.lr, 
67
+    C.likelihood, C, opt.C, snv.model, prior.somatic, mapping.bias, snv.lr, 
68 68
     sampleid = NULL, cont.rate = 0.01, prior.K, max.coverage.vcf, non.clonal.M,
69 69
     model.homozygous=FALSE, error=0.001, max.mapping.bias=0.8) {
70 70
 
... ...
@@ -84,9 +84,9 @@ c(test.num.copy, round(opt.C))[i], prior.K))
84 84
         haploid.penalty <- 2
85 85
     }
86 86
     
87
-    subclonal <- apply(C.posterior[queryHits(ov), ], 1, which.max) == ncol(C.posterior)
87
+    subclonal <- apply(C.likelihood[queryHits(ov), ], 1, which.max) == ncol(C.likelihood)
88 88
     
89
-    seg.idx <- which(seq_len(nrow(C.posterior)) %in% queryHits(ov))
89
+    seg.idx <- which(seq_len(nrow(C.likelihood)) %in% queryHits(ov))
90 90
     sd.ar <- sd(unlist(geno(vcf)$FA[, tumor.id.in.vcf]))
91 91
 
92 92
     # Fit variants in all segments
... ...
@@ -104,13 +104,13 @@ c(test.num.copy, round(opt.C))[i], prior.K))
104 104
         shape1 <- ar_all * dp_all + 1
105 105
         shape2 <- (1 - ar_all) * dp_all + 1
106 106
 
107
-        lapply(seq(ncol(C.posterior)), function(k) {
107
+        lapply(seq(ncol(C.likelihood)), function(k) {
108 108
             Ci <- c(test.num.copy, opt.C[i])[k]       
109 109
             priorM <- log(Ci + 1 + haploid.penalty)
110 110
             #priorM <- log(abs(2-Ci) + 1)
111 111
             priorHom <- ifelse(model.homozygous, -log(3), log(0))
112 112
             
113
-            skip <- test.num.copy > Ci | C.posterior[i, k] <=0
113
+            skip <- test.num.copy > Ci | C.likelihood[i, k] <=0
114 114
 
115 115
             p.ar <- lapply(c(0, 1), function(g) {
116 116
                 cns <- test.num.copy
... ...
@@ -166,8 +166,8 @@ c(test.num.copy, round(opt.C))[i], prior.K))
166 166
 
167 167
     likelihoods <- do.call(rbind, 
168 168
         lapply(seq_along(xx), function(i) Reduce("+", 
169
-            lapply(seq(ncol(C.posterior)), function(j) 
170
-                exp(xx[[i]][[j]]) * C.posterior[seg.idx[i], j]))))
169
+            lapply(seq(ncol(C.likelihood)), function(j) 
170
+                exp(xx[[i]][[j]]) * C.likelihood[seg.idx[i], j]))))
171 171
 
172 172
     colnames(likelihoods) <- c(paste("SOMATIC.M", test.num.copy, sep = ""), 
173 173
         paste("GERMLINE.M", test.num.copy, sep = ""), "GERMLINE.CONTHIGH", 
... ...
@@ -418,7 +418,7 @@ c(test.num.copy, round(opt.C))[i], prior.K))
418 418
 .getGoF <- function(result) {
419 419
     if (is.null(result$SNV.posterior$beta.model)) return(0)
420 420
     r <- result$SNV.posterior$beta.model$posteriors
421
-    e <- (r$ML.AR-r$AR)^2
421
+    e <- (r$ML.AR-r$AR.ADJUSTED)^2
422 422
     maxDist <- 0.2^2
423 423
     r2 <- max(1-mean(e,na.rm=TRUE)/maxDist,0)
424 424
     return(r2)
... ...
@@ -583,7 +583,7 @@ c(test.num.copy, round(opt.C))[i], prior.K))
583 583
     
584 584
 
585 585
 .optimizeGrid <- function(test.purity, min.ploidy, max.ploidy, test.num.copy = 0:7, 
586
-    exon.lrs, seg, sd.seg, li, max.exon.ratio, max.non.clonal, verbose = FALSE, debug = FALSE) {
586
+    exon.lrs, seg, sd.seg, li, max.exon.ratio, max.non.clonal) {
587 587
     ploidy.grid <- seq(min.ploidy, max.ploidy, by = 0.2)
588 588
     if (min.ploidy < 1.8 && max.ploidy > 2.2) {
589 589
         ploidy.grid <- c(seq(min.ploidy, 1.8, by = 0.2), 1.9, 2, 2.1, seq(2.2, max.ploidy, 
... ...
@@ -592,16 +592,12 @@ c(test.num.copy, round(opt.C))[i], prior.K))
592 592
     mm <- lapply(test.purity, function(p) {
593 593
         b <- 2 * (1 - p)
594 594
         log.ratio.offset <- 0
595
-        if (debug) 
596
-            message(paste(b, log.ratio.offset))
597 595
         lapply(ploidy.grid, function(D) {
598 596
             dt <- p/D
599 597
             llik.all <- lapply(seq_along(exon.lrs), function(i) .calcLlikSegmentExonLrs(exon.lrs[[i]], 
600 598
                 log.ratio.offset, max.exon.ratio, sd.seg, dt, b, D, test.num.copy))
601 599
             subclonal <- vapply(llik.all, which.max, double(1)) == 1
602 600
             subclonal.f <- length(unlist(exon.lrs[subclonal]))/length(unlist(exon.lrs))
603
-            if (debug) 
604
-                message(paste(sum(subclonal), subclonal.f))
605 601
             if (subclonal.f > max.non.clonal) 
606 602
                 return(-Inf)
607 603
             llik <- sum(vapply(llik.all, max, double(1)))
... ...
@@ -857,6 +853,7 @@ c(test.num.copy, round(opt.C))[i], prior.K))
857 853
     msg <- paste0(msg, "\n\nThis is most likely a user error due to",
858 854
         " invalid input data or parameters (PureCN ", 
859 855
         packageVersion("PureCN"), ").")
856
+    flog.fatal(paste(strwrap(msg),"\n"))
860 857
     stop( paste(strwrap(msg),"\n"), call.= FALSE)
861 858
 }
862 859
 .stopRuntimeError <- function(...) {
... ...
@@ -864,25 +861,41 @@ c(test.num.copy, round(opt.C))[i], prior.K))
864 861
     msg <- paste0(msg, "\n\nThis runtime error might be caused by",
865 862
         " invalid input data or parameters. Please report bug (PureCN ", 
866 863
         packageVersion("PureCN"), ").")
864
+    flog.fatal(paste(strwrap(msg),"\n"))
867 865
     stop( paste(strwrap(msg),"\n"), call.= FALSE)
868 866
 }
869
-
867
+.logHeader <- function(l) {
868
+    flog.info(strrep("-", 60))
869
+    flog.info("PureCN %s", as.character(packageVersion("PureCN")))
870
+    flog.info(strrep("-", 60))
871
+    idxSupported <- sapply(l, function(x) class(eval(x))) %in% 
872
+        c("character", "numeric", "NULL", "list", "logical") &
873
+        sapply(l, function(x) length(unlist(eval(x)))) < 12
874
+
875
+    l <- c(l[idxSupported],lapply(l[!idxSupported], function(x) "<data>"))
876
+    argsStrings <- paste(sapply(seq_along(l), function(i) 
877
+        paste0("-", names(l)[i], " ", paste(eval(l[[i]]),collapse=","))),
878
+        collapse=" ")
879
+    flog.info("Arguments: %s", argsStrings)
880
+}
881
+.logFooter <- function() {
882
+    flog.info("Done.")
883
+    flog.info(strrep("-", 60))
884
+}    
870 885
 .calcGCmetric <- function(gc.data, coverage) { 
871 886
     gcbins <- split(coverage$average.coverage, gc.data$gc_bias < 0.5); 
872 887
     mean(gcbins[[1]], na.rm=TRUE)/mean(gcbins[[2]], na.rm=TRUE) 
873 888
 }
874
-.checkGCBias <- function(normal, tumor, gc.data, max.dropout, verbose) {
889
+.checkGCBias <- function(normal, tumor, gc.data, max.dropout) {
875 890
     gcMetricNormal <- .calcGCmetric(gc.data, normal)
876 891
     gcMetricTumor <- .calcGCmetric(gc.data, tumor)
877
-    if (verbose) { 
878
-        message("AT/GC dropout: ", round(gcMetricTumor, digits=2),
879
-        " (tumor), ", round(gcMetricNormal, digits=2), " (normal).")
880
-    }    
892
+    flog.info("AT/GC dropout: %.2f (tumor), %.2f (normal). ", 
893
+        gcMetricTumor, gcMetricNormal)
881 894
     if (gcMetricNormal < max.dropout[1] || 
882 895
         gcMetricNormal > max.dropout[2] ||
883 896
         gcMetricTumor  < max.dropout[1] ||
884 897
         gcMetricTumor  > max.dropout[2]) {
885
-        warning("High GC-bias in normal or tumor. Is data GC-normalized?")
898
+        flog.warn("High GC-bias in normal or tumor. Is data GC-normalized?")
886 899
         return(TRUE)
887 900
     }
888 901
     return(FALSE)
... ...
@@ -10,7 +10,6 @@
10 10
 #' function.
11 11
 #' @param tumor Tumor coverage read in by the \code{\link{readCoverageGatk}}
12 12
 #' function.
13
-#' @param verbose Verbose output.
14 13
 #' @return \code{numeric(nrow(tumor))}, tumor vs. normal copy number log-ratios
15 14
 #' for all targets.
16 15
 #' @author Markus Riester
... ...
@@ -22,21 +21,17 @@
22 21
 #'     package="PureCN")
23 22
 #' normal <- readCoverageGatk(normal.coverage.file)
24 23
 #' tumor <- readCoverageGatk(tumor.coverage.file)
25
-#' log.ratio <- calculateLogRatio(normal, tumor, verbose=FALSE)
24
+#' log.ratio <- calculateLogRatio(normal, tumor)
26 25
 #' 
27 26
 #' @export calculateLogRatio
28
-calculateLogRatio <- function(normal, tumor, verbose=TRUE) {
27
+calculateLogRatio <- function(normal, tumor) {
29 28
     # make sure that normal and tumor align
30 29
     if (!identical(as.character(normal[, 1]), as.character(tumor[, 1]))) {
31 30
         .stopUserError("Interval files in normal and tumor different.")
32 31
     }
33
-    if (verbose) {
34
-        message("Average coverage: ", 
35
-            round(mean(tumor$average.coverage, na.rm=TRUE), digits=0), 
36
-            "X (tumor), ",
37
-            round(mean(normal$average.coverage, na.rm=TRUE), digits=0),
38
-            "X (normal).")
39
-    }    
32
+    flog.info("Average coverage: %.0fX (tumor) %.0fX (normal).", 
33
+        mean(tumor$average.coverage, na.rm=TRUE), 
34
+        mean(normal$average.coverage, na.rm=TRUE))
40 35
     total.cov.normal <- sum(as.numeric(normal$coverage), na.rm = TRUE)
41 36
     total.cov.tumor <- sum(as.numeric(tumor$coverage), na.rm = TRUE)
42 37
 
... ...
@@ -57,10 +57,10 @@ log.ratio.cutoffs = c(-0.9, 0.9), failed = NULL, all.genes = FALSE) {
57 57
 
58 58
     bm <- res$results[[id]]$SNV.posterior$beta.model
59 59
     if (!is.null(bm)) {
60
-        segids <- bm$posterior$seg.id
60
+        segids <- bm$posteriors$seg.id
61 61
         calls$num.snps.segment <- sapply(calls$seg.id, function(i) 
62 62
             sum(segids==i,na.rm=TRUE))
63
-        calls$loh <- bm$posterior$ML.M.SEGMENT[match(calls$seg.id, segids)] == 0 
63
+        calls$loh <- bm$posteriors$ML.M.SEGMENT[match(calls$seg.id, segids)] == 0 
64 64
     }
65 65
     
66 66
     calls <- calls[, !grepl("^\\.",colnames(calls))]
... ...
@@ -1,5 +1,5 @@
1 1
 # Make CMD check happy
2
-globalVariables(names=c("gcIndex", "gcNum", "..level.."))
2
+globalVariables(names=c("..level.."))
3 3
 
4 4
 #' Correct for GC bias
5 5
 #' 
... ...
@@ -110,7 +110,7 @@ plot.max.density = 50000) {
110 110
         if (density == "Low") {
111 111
             print(ggplot(gcPlot, aes_string(x="gc_bias", y="average.coverage")) + 
112 112
                 geom_point(color='red', alpha=0.2) + 
113
-                geom_line(data = plotMed, aes(x = gcIndex, y = gcNum), color = 'blue') + 
113
+                geom_line(data = plotMed, aes_string(x = 'gcIndex', y = 'gcNum'), color = 'blue') + 
114 114
                 xlab("GC content") + ylab("Coverage") + 
115 115
                 theme(axis.text = element_text(size= 6), axis.title = element_text(size=16)) + 
116 116
                 facet_wrap(~ norm_status, nrow=1))
... ...
@@ -119,7 +119,7 @@ plot.max.density = 50000) {
119 119
             geom_point(color="blue", alpha = 0.1) + 
120 120
             stat_density2d(aes(fill = ..level..), geom="polygon") + 
121 121
             scale_alpha_continuous(limits=c(0.1, 0), breaks=seq(0, 0.1, by = 0.025)) + 
122
-            geom_line(data = plotMed, aes(x = gcIndex,y = gcNum), color = 'red') + 
122
+            geom_line(data = plotMed, aes_string(x = 'gcIndex',y = 'gcNum'), color = 'red') + 
123 123
             xlab("GC content") + ylab("Coverage") + 
124 124
             theme(axis.text = element_text(size = 16), axis.title = element_text(size = 16)) + 
125 125
             facet_wrap(~norm_status, nrow=1))
... ...
@@ -46,7 +46,7 @@ max.mean.coverage = 100, ... ) {
46 46
     normals.m <- scale(normals.m, 1/z, center=FALSE)
47 47
 
48 48
     normals.pca <- prcomp(t(normals.m[idx,]), ...)
49
-    sex.determined <- sapply(normals,getSexFromCoverage, verbose=is.null(sex))
49
+    sex.determined <- sapply(normals,getSexFromCoverage)
50 50
     if (is.null(sex)) {
51 51
         sex <- sex.determined
52 52
     } else {   
... ...
@@ -59,9 +59,8 @@ max.mean.coverage = 100, ... ) {
59 59
         for (i in seq_along(sex.determined)) {
60 60
             if (!is.na(sex.determined[i]) && sex[i] != "diploid" &&
61 61
                 sex.determined[i] != sex[i]) {
62
-                warning("Sex mismatch in ", normal.coverage.files[i], 
63
-                    ". Sex provided is ", sex, ", but could be ", 
64
-                    sex.determined[i])
62
+                flog.warn("Sex mismatch in %s. Sex provided is %s, but could be %s.", 
63
+                    normal.coverage.files[i], sex[i], sex.determined[i])
65 64
             }    
66 65
         }    
67 66
     }
... ...
@@ -109,7 +108,6 @@ createExonWeightFile <- function() {
109 108
 #' (>20) to estimate target log-ratio standard deviations. Should not overlap
110 109
 #' with files in \code{tumor.coverage.files}.
111 110
 #' @param target.weight.file Output filename.
112
-#' @param verbose Verbose output.
113 111
 #' @return A \code{data.frame} with target weights.
114 112
 #' @author Markus Riester
115 113
 #' @examples
... ...
@@ -127,12 +125,12 @@ createExonWeightFile <- function() {
127 125
 #' 
128 126
 #' @export createTargetWeights
129 127
 createTargetWeights <- function(tumor.coverage.files, normal.coverage.files,
130
-target.weight.file, verbose = TRUE) {
131
-    if (verbose) message("Loading coverage data...")
128
+target.weight.file) {
129
+    flog.info("Loading coverage data...")
132 130
     tumor.coverage <- lapply(tumor.coverage.files,  readCoverageGatk)
133 131
     normal.coverage <- lapply(normal.coverage.files,  readCoverageGatk)
134 132
     lrs <- lapply(tumor.coverage, function(tc) sapply(normal.coverage, 
135
-            function(nc) calculateLogRatio(nc, tc, verbose=verbose)))
133
+            function(nc) calculateLogRatio(nc, tc)))
136 134
 
137 135
     lrs <- do.call(cbind, lrs)
138 136
 
... ...
@@ -23,7 +23,6 @@
23 23
 #' \code{\link{createNormalDatabase}}.
24 24
 #' @param normalDB.min.coverage Exclude targets with coverage lower than 20
25 25
 #' percent of the chromosome median in the pool of normals.
26
-#' @param verbose Verbose output.
27 26
 #' @return \code{logical(length(log.ratio))} specifying which targets should be
28 27
 #' used in segmentation.
29 28
 #' @author Markus Riester
... ...
@@ -55,7 +54,7 @@
55 54
 #' @export filterTargets
56 55
 filterTargets <- function(log.ratio, tumor, gc.data, seg.file, 
57 56
     filter.lowhigh.gc = 0.001, min.targeted.base = 5, normalDB = NULL,
58
-    normalDB.min.coverage = 0.2, verbose) {
57
+    normalDB.min.coverage = 0.2) {
59 58
     # NA's in log.ratio confuse the CBS function
60 59
     targetsUsed <- !is.na(log.ratio) & !is.infinite(log.ratio) 
61 60
     # With segmentation file, ignore all filters
... ...
@@ -64,12 +63,12 @@ filterTargets <- function(log.ratio, tumor, gc.data, seg.file,
64 63
     if (!is.null(gc.data)) {
65 64
         .checkFraction(filter.lowhigh.gc, "filter.lowhigh.gc")
66 65
         targetsUsed <- .filterTargetsLowHighGC(targetsUsed, tumor,
67
-            gc.data, filter.lowhigh.gc, verbose)
66
+            gc.data, filter.lowhigh.gc)
68 67
     }
69 68
     targetsUsed <- .filterTargetsNormalDB(targetsUsed, tumor, normalDB,
70
-        normalDB.min.coverage, verbose)
69
+        normalDB.min.coverage)
71 70
     targetsUsed <- .filterTargetsTargetedBase(targetsUsed, tumor,
72
-        min.targeted.base, verbose)
71
+        min.targeted.base)
73 72
 }
74 73
 
75 74
 .checkNormalDB <- function(tumor, normalDB) {
... ...
@@ -90,7 +89,7 @@ filterTargets <- function(log.ratio, tumor, gc.data, seg.file,
90 89
 }
91 90
     
92 91
 .filterTargetsNormalDB <- function(targetsUsed, tumor, normalDB,
93
-normalDB.min.coverage, verbose) {
92
+normalDB.min.coverage) {
94 93
     if (is.null(normalDB)) return(targetsUsed)
95 94
     # make sure that normalDB matches tumor
96 95
     if (!.checkNormalDB(tumor, normalDB)) {
... ...
@@ -113,40 +112,40 @@ normalDB.min.coverage, verbose) {
113 112
 
114 113
     nAfter <- sum(targetsUsed)
115 114
 
116
-    if (verbose && nAfter < nBefore) { 
117
-        message("Removing ", nBefore-nAfter, " targets with low coverage ",
118
-            "in normalDB.")
115
+    if (nAfter < nBefore) { 
116
+        flog.info("Removing %i targets with low coverage in normalDB.", 
117
+            nBefore-nAfter)
119 118
     }
120 119
     targetsUsed
121 120
 }
122 121
 
123
-.filterTargetsChrHash <- function(targetsUsed, tumor, chr.hash, verbose) {
122
+.filterTargetsChrHash <- function(targetsUsed, tumor, chr.hash) {
124 123
     if (is.null(chr.hash)) return(targetsUsed)
125 124
     nBefore <- sum(targetsUsed)
126 125
     targetsUsed <- targetsUsed & tumor$chr %in% chr.hash$chr
127 126
     nAfter <- sum(targetsUsed)
128 127
 
129
-    if (verbose && nAfter < nBefore) { 
130
-        message("Removing ", nBefore-nAfter, " targets on chromosomes ",
131
-            "outside chr.hash.")
128
+    if ( nAfter < nBefore) { 
129
+        flog.info("Removing %i targets on chromosomes outside chr.hash.", 
130
+            nBefore-nAfter)
132 131
     }
133 132
     targetsUsed
134 133
 }
135 134
 
136
-.filterTargetsTargetedBase <- function(targetsUsed, tumor, min.targeted.base,
137
-    verbose) {
135
+.filterTargetsTargetedBase <- function(targetsUsed, tumor, min.targeted.base) {
138 136
     if (is.null(min.targeted.base)) return(targetsUsed)
139 137
     nBefore <- sum(targetsUsed)
140 138
     targetsUsed <- targetsUsed & !is.na(tumor$targeted.base) & 
141 139
         tumor$targeted.base >= min.targeted.base
142 140
     nAfter <- sum(targetsUsed)
143
-    if (verbose && nAfter < nBefore) message("Removing ", nBefore-nAfter, 
144
-        " small targets")
141
+    if (nAfter < nBefore) {
142
+        flog.info("Removing %i small targets.", nBefore-nAfter)
143
+    }    
145 144
     targetsUsed
146 145
 }
147 146
 
148 147
 .filterTargetsLowHighGC <- function(targetsUsed, tumor, gc.data,
149
-    filter.lowhigh.gc, verbose) {
148
+    filter.lowhigh.gc) {
150 149
     gc.data <- gc.data[match(as.character(tumor[,1]), gc.data[,1]),]
151 150
     qq <- quantile(gc.data$gc_bias, p=c(filter.lowhigh.gc, 
152 151
         1-filter.lowhigh.gc), na.rm=TRUE)
... ...
@@ -156,8 +155,8 @@ normalDB.min.coverage, verbose) {
156 155
         !(gc.data$gc_bias < qq[1] | gc.data$gc_bias > qq[2])
157 156
     nAfter <- sum(targetsUsed)
158 157
 
159
-    if (verbose && nAfter < nBefore) message("Removing ", 
160
-        nBefore-nAfter, " low/high GC targets")
161
-
158
+    if (nAfter < nBefore) {
159
+        flog.info("Removing %i low/high GC targets.", nBefore-nAfter)
160
+    }
162 161
     targetsUsed
163 162
 }
... ...
@@ -20,8 +20,9 @@
20 20
 #' contamination. If a matched normal is available, this value is ignored,
21 21
 #' because homozygosity can be confirmed in the normal.
22 22
 #' @param contamination.cutoff Count SNPs in dbSNP with allelic fraction
23
-#' smaller than the first value, if found on most chromosomes, remove all with
24
-#' AF smaller than the second value.
23
+#' smaller than the first value or greater than 1-first value, if found on most chromosomes, mark sample
24
+#' as contaminated if the fraction of putative contamination SNPs exceeds
25
+#' the second value.
25 26
 #' @param min.coverage Minimum coverage in tumor. Variants with lower coverage
26 27
 #' are ignored.
27 28
 #' @param min.base.quality Minimim base quality in tumor. Requires a \code{BQ}
... ...
@@ -40,7 +41,6 @@
40 41
 #' SNPs. Ignored in case a matched normal is provided in the VCF.
41 42
 #' @param interval.padding Include variants in the interval flanking regions of
42 43
 #' the specified size in bp. Requires \code{target.granges}.
43
-#' @param verbose Verbose output.
44 44
 #' @return A list with elements \item{vcf}{The filtered \code{CollapsedVCF}
45 45
 #' object.} \item{flag}{A flag (\code{logical(1)}) if problems were
46 46
 #' identified.} \item{flag_comment}{A comment describing the flagging.}
... ...
@@ -61,10 +61,10 @@
61 61
 #' @importFrom stats pbeta
62 62
 filterVcfBasic <- function(vcf, tumor.id.in.vcf = NULL, 
63 63
 use.somatic.status = TRUE, snp.blacklist = NULL, af.range = c(0.03, 0.97),
64
-contamination.cutoff = c(0.05,0.075), min.coverage = 15, min.base.quality = 25,
64
+contamination.cutoff = c(0.075,0.02), min.coverage = 15, min.base.quality = 25,
65 65
 min.supporting.reads = NULL, error = 0.001, target.granges = NULL,
66
-remove.off.target.snvs = TRUE, model.homozygous = FALSE, interval.padding = 50,
67
-verbose=TRUE) {
66
+remove.off.target.snvs = TRUE, model.homozygous = FALSE, 
67
+interval.padding = 50) {
68 68
     flag <- NA
69 69
     flag_comment <- NA
70 70
 
... ...
@@ -74,25 +74,73 @@ verbose=TRUE) {
74 74
     if (use.somatic.status) {
75 75
         n <- nrow(vcf)
76 76
         vcf <- .testGermline(vcf, tumor.id.in.vcf)
77
-        if (verbose) message(paste("Removing", n-nrow(vcf), 
78
-            "non heterozygous (in matched normal) germline SNPs."))
77
+        flog.info("Removing %i non heterozygous (in matched normal) germline SNPs.", 
78
+            n-nrow(vcf))
79 79
     } else {
80 80
         info(vcf)$SOMATIC <- NULL
81 81
     }    
82 82
 
83
+    # find supporting read cutoffs based on coverage and sequencing error
84
+    #--------------------------------------------------------------------------
83 85
     if (is.null(min.supporting.reads)) {
84
-        min.supporting.reads <- calculatePowerDetectSomatic(
85
-            mean(geno(vcf)$DP[,tumor.id.in.vcf], na.rm=TRUE),
86
-            purity=1,ploidy=2, error=error, verbose=FALSE)$k
87
-    }    
86
+        depths <- geno(vcf)$DP[,tumor.id.in.vcf]
87
+        minDepth <- round(log2(mean(depths)))
88
+
89
+        depths <- sort(unique(round(log2(depths))))
90
+        depths <- depths[depths>=minDepth]
91
+        depths <- 2^depths
88 92
 
93
+        cutoffs <- sapply(depths, function(d) 
94
+            calculatePowerDetectSomatic(d, ploidy=2, purity=1, error=error, 
95
+                verbose=FALSE)$k)
96
+        depths <- c(0,depths)
97
+    } else { 
98
+        depths <- 0
99
+        cutoffs <- min.supporting.reads
100
+    }
89 101
     n <- nrow(vcf)
90 102
     
91
-    # remove variants with insufficient reads. This includes reference alleles
92
-    # to remove homozygous germline variants.
93
-    vcf <- vcf[do.call(rbind, 
94
-        geno(vcf)$AD[,tumor.id.in.vcf])[,2] >= min.supporting.reads]
95
-    
103
+    .sufficientReads <- function(vcf, ref, depths, cutoffs) {
104
+        idx <- rep(TRUE, nrow(vcf))
105
+
106
+        .filterVcfByAD <- function(vcf, min.supporting.reads, depth) {
107
+            # remove variants with insufficient reads. This includes reference alleles
108
+            # to remove homozygous germline variants.
109
+            do.call(rbind, geno(vcf)$AD[,tumor.id.in.vcf])[,ifelse(ref,1,2)] >= 
110
+                min.supporting.reads | 
111
+                geno(vcf)$DP[,tumor.id.in.vcf] < depth
112
+           }
113
+
114
+        for (i in seq_along(cutoffs)) {
115
+            idx <- idx & .filterVcfByAD(vcf, cutoffs[i], depths[i])
116
+        }
117
+        idx
118
+    }    
119
+    idxNotReference <- .sufficientReads(vcf,ref=FALSE, depths, cutoffs)
120
+    vcf <- vcf[idxNotReference]
121
+    idxNotHomozygous <- .sufficientReads(vcf,ref=TRUE, depths, cutoffs)
122
+    #--------------------------------------------------------------------------
123
+
124
+    idx <- info(vcf)$DB & idxNotHomozygous &
125
+        ( unlist(geno(vcf)$FA[,tumor.id.in.vcf]) <  contamination.cutoff[1] | 
126
+        unlist(geno(vcf)$FA[,tumor.id.in.vcf]) > (1 - contamination.cutoff[1]))
127
+
128
+    fractionContaminated <- sum(idx)/sum(info(vcf)$DB)
129
+    if (fractionContaminated > 0) {
130
+    #    cntContPerChr <- sapply(seqlevelsInUse(vcf), function(seq) nrow(keepSeqlevels(vcf[idx],seq)))
131
+    #    cntAllPerChr <- sapply(seqlevelsInUse(vcf), function(seq) nrow(keepSeqlevels(vcf,seq)))
132
+    #    flog.info("Contamination p-value across chromosomes: %.2f.",
133
+    #        fisher.test(cntContPerChr, cntAllPerChr)$p.value)
134
+    }
135
+
136
+    # do we have many low allelic fraction calls that are in dbSNP on basically
137
+    # all chromosomes? then we found some contamination
138
+    if (sum(runLength(seqnames(rowRanges(vcf[idx])))>3) >= 20 &&
139
+        fractionContaminated >= contamination.cutoff[2]) {
140
+        flag <- TRUE
141
+        flag_comment <- "POTENTIAL SAMPLE CONTAMINATION"    
142
+    }
143
+
96 144
     # If we have a matched normal, we can distinguish LOH from homozygous
97 145
     # in 100% pure samples. If not we need to see a sufficient number
98 146
     # of alt alleles to believe a heterozygous normal genotype.    
... ...
@@ -100,20 +148,15 @@ verbose=TRUE) {
100 148
         af.range[2] <- 1
101 149
         if (model.homozygous) af.range[2] <- Inf
102 150
     } else {
103
-        vcf <- vcf[do.call(rbind, 
104
-            geno(vcf)$AD[,tumor.id.in.vcf])[,1] >= min.supporting.reads]
151
+        vcf <- vcf[idxNotHomozygous]
105 152
     }
106 153
 
107 154
     vcf <- vcf[unlist(geno(vcf)$DP[,tumor.id.in.vcf]) >= min.coverage]
108 155
     vcf <- vcf[unlist(geno(vcf)$FA[,tumor.id.in.vcf]) >= af.range[1]]
109 156
     # remove homozygous germline
110 157
     vcf <- vcf[!info(vcf)$DB | geno(vcf)$FA[,tumor.id.in.vcf] < af.range[2]]
111
-    if (verbose) message("Removing ", n-nrow(vcf), 
112
-        " SNPs with AF < ", af.range[1],
113
-        " or AF >= ", af.range[2], 
114
-        " or less than ", min.supporting.reads, 
115
-        " supporting reads or depth < ",
116
-        min.coverage, ".")
158
+    flog.info("Removing %i SNPs with AF < %.3f or AF >= %.3f or less than %i supporting reads or depth < %i.", 
159
+        n-nrow(vcf), af.range[1], af.range[2], cutoffs[1], min.coverage)
117 160
 
118 161
     if (!is.null(snp.blacklist)) {
119 162
         for (i in seq_along(snp.blacklist)) {
... ...
@@ -124,7 +167,7 @@ verbose=TRUE) {
124 167
             }
125 168
             n <- nrow(vcf)
126 169
             if (sum( rownames(vcf) %in% snp.blacklist.data[,1]) > 1  ) {
127
-                warning("Old SNP blacklists are deprecated. ", 
170
+                flog.warn("Old SNP blacklists are deprecated. %s", 
128 171
                     "Use either a BED file or a normal.panel.vcf.file.")
129 172
                 vcf <- vcf[!rownames(vcf) %in% snp.blacklist.data[,1],]
130 173
             } else {
... ...
@@ -132,79 +175,37 @@ verbose=TRUE) {
132 175
                 ov <- suppressWarnings(overlapsAny(vcf, blackBed))
133 176
                 vcf <- vcf[!ov]
134 177
             }    
135
-            if (verbose) message("Removing ", n-nrow(vcf), 
136
-                " blacklisted SNPs.")
137
-        }    
138
-    }
139
-    idx <- info(vcf)$DB & unlist(geno(vcf)$FA[,tumor.id.in.vcf]) < 
140
-        contamination.cutoff[1]
141
-
142
-    # do we have many low allelic fraction calls that are in dbSNP on basically
143
-    # all chromosomes? then we found some contamination
144
-    if (sum(runLength(seqnames(rowRanges(vcf[idx])))>1) >= 20) {
145
-        idx <- info(vcf)$DB & unlist(geno(vcf)$FA[,tumor.id.in.vcf]) < 
146
-            contamination.cutoff[2]
147
-        vcf <- vcf[which(!idx)]
148
-        if (verbose) message("Removing ", sum(idx, na.rm=TRUE), 
149
-            " contamination SNPs.")
150
-        flag <- TRUE
151
-        flag_comment <- "POTENTIAL SAMPLE CONTAMINATION"    
152
-    }
153
-    
154
-    # if we have a matched normal, we can filter homozygous germline SNPs
155
-    # easily, if not, we still should remove most of them by removing everything
156
-    # with AF >= 0.97. But sometimes samples are very noisy, for example due to
157
-    # contamination, and homozygous germline SNPs are between 0.9 and 1.  This
158
-    # code finds samples which have a lot of dbSNPs between 0.95 and 0.97 on
159
-    # almost all chromosomes.  If found, then remove everything that's in dbSNP
160
-    # above 0.9.
161
-    if (!use.somatic.status && !model.homozygous) {    
162
-        idx <- info(vcf)$DB & unlist(geno(vcf)$FA[,tumor.id.in.vcf]) > 
163
-            (1 - contamination.cutoff[1])
164
-
165
-        # do we have many high allelic fraction calls that are in dbSNP on
166
-        # basically all chromosomes?  then we found a very noisy sample
167
-        if (sum(runLength(seqnames(rowRanges(vcf[idx])))>2) >= 20) {
168
-            idx <- info(vcf)$DB & unlist(geno(vcf)$FA[,tumor.id.in.vcf]) > 
169
-                (1 - contamination.cutoff[2])
170
-            vcf <- vcf[which(!idx)]
171
-            if (verbose) message("Removing ", sum(idx, na.rm=TRUE), 
172
-                " noisy homozygous germline SNPs.")
173
-            flag <- TRUE
174
-            flag_comment <- .appendComment(flag_comment, 
175
-                "NOISY HOMOZYGOUS GERMLINE CALLS")    
178
+            flog.info("Removing %i blacklisted SNPs.", n-nrow(vcf))
176 179
         }    
177 180
     }
178 181
 
179 182
     if (!is.null(geno(vcf)$BQ)) {
180
-       n.vcf.before.filter <- nrow(vcf)
181
-       vcf <- vcf[which(as.numeric(geno(vcf)$BQ[,tumor.id.in.vcf])>=min.base.quality)]
182
-       if (verbose) message("Removing ", n.vcf.before.filter - nrow(vcf), 
183
-           " low quality variants with BQ<",min.base.quality,".") 
183
+        n.vcf.before.filter <- nrow(vcf)
184
+        vcf <- vcf[which(as.numeric(geno(vcf)$BQ[,tumor.id.in.vcf])>=min.base.quality)]
185
+        flog.info("Removing %i low quality variants with BQ < %i.", 
186
+            n.vcf.before.filter - nrow(vcf), min.base.quality) 
184 187
     } else if (!is.null(rowRanges(vcf)$QUAL)) {
185
-       n.vcf.before.filter <- nrow(vcf)
186
-       idx <- which(as.numeric(rowRanges(vcf)$QUAL)>=min.base.quality)
187
-       if (length(idx)) vcf <- vcf[idx]
188
-       if (verbose) message("Removing ", n.vcf.before.filter - nrow(vcf), 
189
-           " low quality variants with BQ<",min.base.quality,".") 
188
+        n.vcf.before.filter <- nrow(vcf)
189
+        idx <- which(as.numeric(rowRanges(vcf)$QUAL)>=min.base.quality)
190
+        if (length(idx)) vcf <- vcf[idx]
191
+        flog.info("Removing %i low quality variants with BQ < %i.", 
192
+            n.vcf.before.filter - nrow(vcf), min.base.quality) 
190 193
     }     
191 194
     
192 195
     if (!is.null(target.granges)) {
193
-        vcf <- .annotateVcfTarget(vcf, target.granges, interval.padding, verbose)
196
+        vcf <- .annotateVcfTarget(vcf, target.granges, interval.padding)
194 197
         if (remove.off.target.snvs) {
195 198
             n.vcf.before.filter <- nrow(vcf)
196 199
             # make sure all SNVs are in covered exons
197 200
             vcf <- vcf[info(vcf)$OnTarget>0]
198
-            if (verbose) message("Removing ", n.vcf.before.filter - nrow(vcf), 
199
-                " variants outside intervals.", 
200
-                " Set remove.off.target.snvs=FALSE to include.")
201
+            flog.info("Removing %i variants outside intervals.", 
202
+                n.vcf.before.filter - nrow(vcf))
201 203
         }        
202 204
     }
203
-    ##value<< A list with elements
204 205
     list(
205
-        vcf=vcf, ##<< The filtered \code{CollapsedVCF} object.
206
-        flag=flag, ##<< A flag (\code{logical(1)}) if problems were identified.
207
-        flag_comment=flag_comment ##<< A comment describing the flagging. 
206
+        vcf=vcf, 
207
+        flag=flag, 
208
+        flag_comment=flag_comment 
208 209
     )
209 210
 }
210 211
 
... ...
@@ -222,7 +223,6 @@ verbose=TRUE) {
222 223
 #' @param tumor.id.in.vcf The tumor id in the VCF file, optional.
223 224
 #' @param stats.file MuTect stats file.
224 225
 #' @param ignore MuTect flags that mark variants for exclusion.
225
-#' @param verbose Verbose output.
226 226
 #' @param \dots Additional arguments passed to \code{\link{filterVcfBasic}}.
227 227
 #' @return A list with elements \code{vcf}, \code{flag} and
228 228
 #' \code{flag_comment}.  \code{vcf} contains the filtered \code{CollapsedVCF},
... ...
@@ -244,15 +244,15 @@ filterVcfMuTect <- function(vcf, tumor.id.in.vcf = NULL, stats.file = NULL,
244 244
 ignore=c("clustered_read_position", "fstar_tumor_lod", "nearby_gap_events", 
245 245
 "poor_mapping_region_alternate_allele_mapq", "poor_mapping_region_mapq0", 
246 246
 "possible_contamination", "strand_artifact", "seen_in_panel_of_normals"),
247
-verbose=TRUE, ... ){
247
+... ){
248 248
     if (is.null(stats.file)) return(
249
-        filterVcfBasic(vcf, tumor.id.in.vcf, verbose=verbose, ...))
249
+        filterVcfBasic(vcf, tumor.id.in.vcf, ...))
250 250
     
251 251
     stats <- read.delim(stats.file, as.is=TRUE, skip=1)
252 252
 
253 253
     if (is.null(stats$contig) || is.null(stats$position)) {
254 254
         warning("MuTect stats file lacks contig and position columns.")
255
-        return(filterVcfBasic(vcf, tumor.id.in.vcf, verbose=verbose, ...))
255
+        return(filterVcfBasic(vcf, tumor.id.in.vcf, ...))
256 256
     }    
257 257
 
258 258
     gr.stats <- GRanges(seqnames=stats$contig, 
... ...
@@ -265,13 +265,13 @@ verbose=TRUE, ... ){
265 265
         n <- nrow(vcf)
266 266
         stats <- stats[subjectHits(ov),]
267 267
         vcf <- vcf[queryHits(ov)]
268
-        warning("MuTect stats file and VCF file do not align perfectly. ",
269
-         "Will remove ", n-nrow(vcf), " unmatched variants.")
268
+        flog.warn("MuTect stats file and VCF file do not align perfectly. Will remove %i unmatched variants.",
269
+            n-nrow(vcf))
270 270
     }    
271 271
     if (is.null(stats$failure_reasons)) {
272
-        warning("MuTect stats file lacks failure_reasons column.",
272
+        flog.warn("MuTect stats file lacks failure_reasons column.",
273 273
             " Keeping all variants listed in stats file.")
274
-        return(filterVcfBasic(vcf, tumor.id.in.vcf, verbose=verbose, ...))
274
+        return(filterVcfBasic(vcf, tumor.id.in.vcf, ...))
275 275
     }
276 276
 
277 277
     n <- nrow(vcf)
... ...
@@ -279,9 +279,9 @@ verbose=TRUE, ... ){
279 279
     ids <- sort(unique(unlist(sapply(ignore, grep, stats$failure_reasons))))
280 280
     vcf <- vcf[-ids]
281 281
 
282
-    if (verbose) message("Removing ", n-nrow(vcf), 
283
-        " MuTect calls due to blacklisted failure reasons.")
284
-    filterVcfBasic(vcf, tumor.id.in.vcf, verbose=verbose, ...)
282
+    flog.info("Removing %i MuTect calls due to blacklisted failure reasons.", 
283
+        n-nrow(vcf))
284
+    filterVcfBasic(vcf, tumor.id.in.vcf, ...)
285 285
 }
286 286
 
287 287
 
... ...
@@ -305,7 +305,6 @@ verbose=TRUE, ... ){
305 305
 #' value is for the case that variant is in both dbSNP and COSMIC > 2.
306 306
 #' @param tumor.id.in.vcf Id of tumor in case multiple samples are stored in
307 307
 #' VCF.
308
-#' @param verbose Verbose output.
309 308
 #' @return A \code{numeric(nrow(vcf))} vector with the prior probability of
310 309
 #' somatic status for each variant in the \code{CollapsedVCF}.
311 310
 #' @author Markus Riester
... ...
@@ -320,7 +319,7 @@ verbose=TRUE, ... ){
320 319
 #' @export setPriorVcf
321 320
 setPriorVcf <- function(vcf,
322 321
 prior.somatic=c(0.5, 0.0005, 0.999, 0.0001, 0.995, 0.01), 
323
-tumor.id.in.vcf=NULL, verbose=TRUE) {
322
+tumor.id.in.vcf=NULL) {
324 323
     if (is.null(tumor.id.in.vcf)) {
325 324
         tumor.id.in.vcf <- .getTumorIdInVcf(vcf)
326 325
     }
... ...
@@ -329,24 +328,24 @@ tumor.id.in.vcf=NULL, verbose=TRUE) {
329 328
          prior.somatic <- ifelse(info(vcf)$SOMATIC,
330 329
             prior.somatic[3],prior.somatic[4])
331 330
 
332
-         if (verbose) message("Found SOMATIC annotation in VCF. ",
333
-            "Setting somatic prior probabilities for somatic variants to ", 
334
-            tmp[3]," or to ", tmp[4], " otherwise.")
331
+         flog.info("Found SOMATIC annotation in VCF.")
332
+         flog.info("Setting somatic prior probabilities for somatic variants to %f or to %f otherwise.", 
333
+            tmp[3], tmp[4])
335 334
     } else {
336 335
          tmp <- prior.somatic
337 336
          prior.somatic <- ifelse(info(vcf)$DB,
338 337
             prior.somatic[2], prior.somatic[1])
339 338
          if (!is.null(info(vcf)$Cosmic.CNT)) {
340
-             if (verbose) message("Found COSMIC annotation in VCF. ",
341
-                "Setting somatic prior probabilities for hits to\n", tmp[5],
342
-                " or to ", tmp[6], " if in both COSMIC and dbSNP.")
339
+             flog.info("Found COSMIC annotation in VCF.")
340
+             flog.info("Setting somatic prior probabilities for hits to %f or to %f if in both COSMIC and dbSNP.", 
341
+                tmp[5], tmp[6])
343 342
 
344 343
              prior.somatic[which(info(vcf)$Cosmic.CNT>2)] <- tmp[5]
345 344
              prior.somatic[which(info(vcf)$Cosmic.CNT>2 & 
346 345
                 info(vcf)$DB)] <- tmp[6]
347 346
          } else {
348
-             if (verbose) message("Setting somatic prior probabilities ",
349
-                "for dbSNP hits to ", tmp[2]," or to ", tmp[1], " otherwise.")
347
+             flog.info("Setting somatic prior probabilities for dbSNP hits to %f or to %f otherwise.", 
348
+                tmp[2], tmp[1])
350 349
          }      
351 350
     }     
352 351
     prior.somatic
... ...
@@ -452,7 +451,7 @@ function(vcf, tumor.id.in.vcf, allowed=0.05) {
452 451
     }    
453 452
     return(TRUE)
454 453
 }    
455
-.readAndCheckVcf <- function(vcf.file, genome, verbose) {
454
+.readAndCheckVcf <- function(vcf.file, genome) {
456 455
     if (class(vcf.file) == "character") {    
457 456
         vcf <- readVcf(vcf.file, genome)
458 457
     } else if (class(vcf.file) != "CollapsedVCF") {
... ...
@@ -465,7 +464,7 @@ function(vcf, tumor.id.in.vcf, allowed=0.05) {
465 464
     if (sum(triAllelic)) {
466 465
         n <- nrow(vcf)
467 466
         vcf <- vcf[which(!triAllelic)]
468
-        if (verbose) message("Removing ",n-nrow(vcf), " triallelic sites.")
467
+        flog.info("Removing %i triallelic sites.",n-nrow(vcf))
469 468
     }    
470 469
     if (is.null(info(vcf)$DB)) {
471 470
         # try to add an DB field based on rownames
... ...
@@ -525,9 +524,9 @@ function(vcf, tumor.id.in.vcf, allowed=0.05) {
525 524
     vcf
526 525
 }
527 526
     
528
-.addCosmicCNT <- function(vcf, cosmic.vcf.file, verbose=TRUE) {
527
+.addCosmicCNT <- function(vcf, cosmic.vcf.file) {
529 528
     if (!is.null(info(vcf)$Cosmic.CNT)) {
530
-        if (verbose) message("VCF already COSMIC annotated. Skipping.")
529
+        flog.info("VCF already COSMIC annotated. Skipping.")
531 530
         return(vcf)        
532 531
     }
533 532
     cosmicSeqStyle <- seqlevelsStyle(headerTabix(
... ...
@@ -537,7 +536,7 @@ function(vcf, tumor.id.in.vcf, allowed=0.05) {
537 536
     if (!length(intersect(seqlevelsStyle(vcf), cosmicSeqStyle))) {
538 537
         seqlevelsStyle(vcfRenamedSL) <- cosmicSeqStyle[1]
539 538
     }
540
-    if (verbose) message("Reading COSMIC VCF...")
539
+    flog.info("Reading COSMIC VCF...")
541 540
     # look-up the variants in COSMIC
542 541
     cosmic.vcf <- readVcf(cosmic.vcf.file, genome=genome(vcf)[1],  
543 542
         ScanVcfParam(which = rowRanges(vcfRenamedSL),
... ...
@@ -569,20 +568,18 @@ function(vcf, tumor.id.in.vcf, allowed=0.05) {
569 568
     vcf
570 569
 }
571 570
 
572
-.annotateVcfTarget <- function(vcf, target.granges, interval.padding, verbose) {
571
+.annotateVcfTarget <- function(vcf, target.granges, interval.padding) {
573 572
     target.granges.padding <- target.granges
574 573
     start(target.granges.padding) <- start(target.granges.padding)-interval.padding
575 574
     end(target.granges.padding) <- end(target.granges.padding)+interval.padding
576 575
     .calcTargetedGenome <- function(granges) {
577 576
         tmp <- reduce(granges)
578
-        round(sum(width(tmp))/(1000^2),digits=2)
579
-    }    
580
-    if (verbose) {
581
-        message("Total size of targeted genomic region: ", 
582
-            .calcTargetedGenome(target.granges), "Mb (",
583
-            .calcTargetedGenome(target.granges.padding),
584
-            "Mb with ", interval.padding, "bp padding)")
577
+        sum(width(tmp))/(1000^2)
585 578
     }    
579
+    flog.info("Total size of targeted genomic region: %.2fMb (%.2fMb with %ibp padding).", 
580
+        .calcTargetedGenome(target.granges), 
581
+        .calcTargetedGenome(target.granges.padding),
582
+        interval.padding)
586 583
 
587 584
     idxTarget <- overlapsAny(vcf, target.granges)
588 585
     idxPadding <- overlapsAny(vcf, target.granges.padding)
... ...
@@ -596,16 +593,17 @@ function(vcf, tumor.id.in.vcf, allowed=0.05) {
596 593
     info(vcf)$OnTarget <- 0
597 594
     info(vcf)$OnTarget[idxPadding] <- 2
598 595
     info(vcf)$OnTarget[idxTarget] <- 1
599
-    if (verbose) {
600
-        targetsWithSNVs <- overlapsAny(target.granges.padding, vcf)
601
-        percentTargetsWithSNVs <- round(sum(targetsWithSNVs,na.rm=TRUE)/
602
-            length(targetsWithSNVs)*100, digits=1)
603
-        tmp <- ""
604
-        if (percentTargetsWithSNVs > 20) { 
605
-            tmp <- " segmentationPSCBS might produce better results."
606
-        }
607
-        message(percentTargetsWithSNVs,"% of targets contain heterozygous ",
608
-            "SNVs.",tmp)
609
-    }    
596
+    
597
+    # report stats in log file
598
+    targetsWithSNVs <- overlapsAny(target.granges.padding, vcf)
599
+    percentTargetsWithSNVs <- sum(targetsWithSNVs,na.rm=TRUE)/
600
+        length(targetsWithSNVs)*100
601
+    tmp <- ""
602
+    if (percentTargetsWithSNVs > 20) { 
603
+        tmp <- " segmentationPSCBS might produce better results."
604
+    }
605
+    flog.info("%.1f%% of targets contain SNVs. %s", 
606
+        percentTargetsWithSNVs, tmp)
607
+
610 608
     vcf
611 609
 }
... ...
@@ -22,7 +22,6 @@
22 22
 #' @param pool.weights Either find good pooling weights by optimization or
23 23
 #' weight all best normals equally.
24 24
 #' @param plot.pool Allows the pooling function to create plots.
25
-#' @param verbose Verbose output.
26 25
 #' @param \dots Additional arguments passed to \code{\link{poolCoverage}}.
27 26
 #' @return Filename of the best matching normal.
28 27
 #' @author Markus Riester
... ...
@@ -48,7 +47,7 @@
48 47
 findBestNormal <- function(tumor.coverage.file, normalDB, pcs=1:3, 
49 48
     num.normals = 1, ignore.sex = FALSE, sex = NULL, 
50 49
     normal.coverage.files = NULL, pool = FALSE, 
51
-    pool.weights = c("voom", "equal"), plot.pool = FALSE, verbose = TRUE,
50
+    pool.weights = c("voom", "equal"), plot.pool = FALSE, 
52 51
     ...) {
53 52
     if (is.character(tumor.coverage.file)) {
54 53
         tumor  <- readCoverageGatk(tumor.coverage.file)
... ...
@@ -71,15 +70,15 @@ findBestNormal <- function(tumor.coverage.file, normalDB, pcs=1:3,
71 70
     if (!ignore.sex && !is.null(normalDB$sex) && 
72 71
         sum(!is.na(normalDB$sex))>0) {
73 72
         if (is.null(sex)) {
74
-            sex <- getSexFromCoverage(tumor, verbose=FALSE)
73
+            sex <- getSexFromCoverage(tumor)
75 74
         }
76
-        if (verbose) message("Sex of sample: ", sex)
75
+        flog.info("Sample sex: %s", sex)
77 76
         if (!is.na(sex)) {
78 77
             idx.normals <- which(normalDB$sex == sex)
79 78
         }
80 79
         if (length(idx.normals) < 2) { 
81
-            warning("Not enough samples of sex ", sex, 
82
-                " in database. Ignoring sex.")
80
+            flog.warn("Not enough samples of sex %s %s", sex, 
81
+                "in database. Ignoring sex.")
83 82
             idx.normals <- seq_along(normalDB$normal.coverage.files)
84 83
         }
85 84
     }
... ...
@@ -98,10 +97,8 @@ findBestNormal <- function(tumor.coverage.file, normalDB, pcs=1:3,
98 97
     if (pool) {
99 98
       normals <- lapply(normal.coverage.files, readCoverageGatk)
100 99
       pool.weights <- match.arg(pool.weights)
101
-      if (verbose) {
102
-          message("Pooling ", paste(basename(normal.coverage.files), 
103
-            collapse=", "))
104
-      }        
100
+      flog.info("Pooling %s.", paste(basename(normal.coverage.files), 
101
+        collapse=", "))
105 102
       w <- NULL
106 103
       if (pool.weights == "voom" && num.normals > 1) {  
107 104
           logRatio <- .voomLogRatio(tumor, 
... ...
@@ -19,7 +19,6 @@
19 19
 #' be considered when setting cutoffs.
20 20
 #' @param remove.outliers Removes coverage outliers before calculating mean
21 21
 #' chromosome coverages.
22
-#' @param verbose Verbose output.
23 22
 #' @return Returns a \code{character(1)} with \code{M} for male, \code{F} for
24 23
 #' female, or \code{NA} if unknown.
25 24
 #' @author Markus Riester
... ...
@@ -32,7 +31,7 @@
32 31
 #' 
33 32
 #' @export getSexFromCoverage
34 33
 getSexFromCoverage <- function(coverage.file, min.ratio = 25, min.ratio.na = 20,
35
-    remove.outliers = TRUE, verbose = TRUE) {
34
+    remove.outliers = TRUE) {
36 35
     if (is.character(coverage.file)) {
37 36
         x <- readCoverageGatk(coverage.file)
38 37
     } else {
... ...
@@ -51,7 +50,7 @@ getSexFromCoverage <- function(coverage.file, min.ratio = 25, min.ratio.na = 20,
51 50
     }    
52 51
 
53 52
     if (is.na(avg.coverage[sex.chr[1]]) || is.na(avg.coverage[sex.chr[2]]) ) {
54
-        if (verbose) message(
53
+        flog.warn(
55 54
             "Allosome coverage appears to be missing, cannot determine sex.")
56 55
         return(NA)
57 56
     }    
... ...
@@ -60,18 +59,13 @@ getSexFromCoverage <- function(coverage.file, min.ratio = 25, min.ratio.na = 20,
60 59
         names(avg.coverage))],na.rm=TRUE)
61 60
     autosome.ratio <- avg.autosome.coverage/(avg.coverage[sex.chr[1]]+0.0001)
62 61
     if (autosome.ratio > 5) { 
63
-        if (verbose) message(
64
-            "Allosome coverage very low, cannot determine sex.")
62
+        flog.info("Allosome coverage very low, cannot determine sex.")
65 63
         return(NA)
66 64
     }
67 65
     XY.ratio <- avg.coverage[sex.chr[1]]/ (avg.coverage[sex.chr[2]]+ 0.0001)
68
-    if (verbose) {
69
-        message("Mean coverages:",
70
-                " chrX: ",  round(avg.coverage[sex.chr[1]], digits=2), 
71
-                " chrY: ", round(avg.coverage[sex.chr[2]], digits=2),
72
-                " chr1-22: ",round(avg.autosome.coverage, digits=2),"."
73
-        )
74
-    }     
66
+    flog.info("Mean coverages: chrX: %.2f, chrY: %.2f, chr1-22: %.2f.",
67
+            avg.coverage[sex.chr[1]], avg.coverage[sex.chr[2]],
68
+            avg.autosome.coverage)
75 69
     if (XY.ratio > min.ratio) return("F")
76 70
     if (XY.ratio > min.ratio.na) return(NA)
77 71
     return("M") 
... ...
@@ -112,7 +106,6 @@ getSexFromCoverage <- function(coverage.file, min.ratio = 25, min.ratio.na = 20,
112 106
 #' homozygous.
113 107
 #' @param af.cutoff Remove all SNVs with allelic fraction lower than the
114 108
 #' specified value.
115
-#' @param verbose Verbose output.
116 109
 #' @return Returns a \code{character(1)} with \code{M} for male, \code{F} for
117 110
 #' female, or \code{NA} if unknown.
118 111
 #' @author Markus Riester
... ...
@@ -129,7 +122,7 @@ getSexFromCoverage <- function(coverage.file, min.ratio = 25, min.ratio.na = 20,
129 122
 #' @importFrom stats fisher.test
130 123
 getSexFromVcf <- function(vcf, tumor.id.in.vcf=NULL, min.or = 4, 
131 124
     min.or.na = 2.5, max.pv = 0.001, homozygous.cutoff = 0.95,
132
-    af.cutoff = 0.2, verbose=TRUE) {
125
+    af.cutoff = 0.2) {
133 126
     if (is.null(tumor.id.in.vcf)) {
134 127
         tumor.id.in.vcf <- .getTumorIdInVcf(vcf) 
135 128
     }
... ...
@@ -144,41 +137,34 @@ getSexFromVcf <- function(vcf, tumor.id.in.vcf=NULL, min.or = 4,
144 137
 
145 138
     homozygous <- geno(vcf)$FA[,tumor.id.in.vcf] > homozygous.cutoff
146 139
     if ( sum(homozygous)/length(homozygous) < 0.001 ) {
147
-        if (verbose) { 
148
-            message("No homozygous variants in VCF, provide unfiltered VCF.")
149
-        }    
140
+        flog.info("No homozygous variants in VCF, provide unfiltered VCF.")
150 141
         return(NA)
151 142
     }
152 143
 
153 144
     if (!sum(chrX)) {
154
-        if (verbose) { 
155
-            message("No variants on chrX in VCF.")
156
-        }    
145
+        flog.info("No variants on chrX in VCF.")
157 146
         return(NA)
158 147
     }    
159 148
     res <- fisher.test(homozygous, as.vector(chrX))
160
-    if (verbose) message(sum( homozygous & as.vector(chrX)), 
161
-        " homozygous and ", sum( !homozygous & as.vector(chrX)), 
162
-        " heterozygous variants on chromosome X.")
149
+    flog.info("%i homozygous and %i heterozygous variants on chrX.", 
150
+        sum( homozygous & as.vector(chrX)), 
151
+        sum( !homozygous & as.vector(chrX)))
163 152
     sex <- "F"    
164 153
     if (res$estimate >= min.or.na) sex <- NA
165 154
     if (res$estimate >= min.or && res$p.value > max.pv) sex <- NA
166 155
     if (res$p.value <= max.pv && res$estimate >= min.or) sex <- "M"
167
-    if (verbose) { 
168
-        message("Sex from VCF: ", sex, " (Fisher's p-value: ", 
169
-            ifelse(res$p.value < 0.0001, "< 0.0001", 
170
-            round(res$p.value, digits=3)), 
171
-            "  odds-ratio: ", round(res$estimate, digits=2), ")")
172
-    }    
156
+    flog.info("Sex from VCF: %s (Fisher's p-value: %s, odds-ratio: %.2f).", 
157
+        sex, ifelse(res$p.value < 0.0001, "< 0.0001", round(res$p.value, digits=3)), 
158
+        res$estimate)
173 159
     return(sex)    
174 160
 }
175 161
 
176 162
 .getSex <- function(sex, normal, tumor) {
177 163
     if (sex != "?") return(sex)
178
-    sex.tumor <- getSexFromCoverage(tumor, verbose=FALSE)
179
-    sex.normal <- getSexFromCoverage(normal, verbose=FALSE)
164
+    sex.tumor <- getSexFromCoverage(tumor)
165
+    sex.normal <- getSexFromCoverage(normal)
180 166
     if (!identical(sex.tumor, sex.normal)) {
181
-        warning("Sex tumor/normal mismatch: tumor = ", sex.tumor, 
167
+        flog.warn("Sex tumor/normal mismatch: tumor = ", sex.tumor, 
182 168
             " normal = ", sex.normal)
183 169
     }
184 170
     sex <- sex.tumor    
... ...
@@ -57,6 +57,7 @@
57 57
 #' @importFrom graphics abline axis boxplot contour hist image
58 58
 #'             legend lines par plot text mtext polygon points
59 59
 #'             rect strwidth symbols barplot
60
+#' @importFrom ggplot2 geom_boxplot geom_hline labs
60 61
 plotAbs <- function(res, ids = NULL, 
61 62
 type = c("hist", "overview", "overview2", "BAF", "AF", "volcano", "all"),
62 63
 chr = NULL, germline.only = TRUE, show.contour = FALSE, purity = NULL, 
... ...
@@ -152,7 +153,7 @@ max.mapping.bias = 0.8, palette.name = "Paired", ... ) {
152 153
                 "Purity:", round(res$results[[i]]$purity[[1]], digits=2), 
153 154
                 " Tumor ploidy:", round( res$results[[i]]$ploidy, digits=3), 
154 155
                 " SNV log-likelihood:", 
155
-                    round(res$results[[i]]$SNV.posterior$beta$llik, digits=2),
156
+                    round(res$results[[i]]$SNV.posterior$beta.model$llik, digits=2),
156 157
                 " GoF:", r2,    
157 158
                 " Mean coverage:", 
158 159
                     paste(round(apply(geno(res$input$vcf)$DP,2,mean)),
... ...
@@ -315,7 +316,7 @@ max.mapping.bias = 0.8, palette.name = "Paired", ... ) {
315 316
             r <- .getVariantPosteriors(res, i, max.mapping.bias)
316 317
             if (is.null(r)) next
317 318
 
318
-            vcf <- res$input$vcf[res$results[[i]]$SNV.posterior$beta$vcf.ids]
319
+            vcf <- res$input$vcf[res$results[[i]]$SNV.posterior$beta.model$vcf.ids]
319 320
             # brwer.pal requires at least 3 levels
320 321
 
321 322
             r <- .getAFPlotGroups(r, is.null(info(vcf)$SOMATIC))
... ...
@@ -418,7 +419,15 @@ max.mapping.bias = 0.8, palette.name = "Paired", ... ) {
418 419
                 legend("topright", legend=as.character(mycol.palette$group),
419 420
                     col=mycol.palette$color, 
420 421
                     pch=mycol.palette$pch, cex=0.8)
421
-
422
+                
423
+                estimatedContRate <- 
424
+                    res$results[[i]]$SNV.posterior$beta.model$posterior.contamination
425
+                if (!is.null(estimatedContRate) && 
426
+                    estimatedContRate > min(r$AR[r$ML.SOMATIC])) {
427
+                    abline(h=estimatedContRate, col="red")
428
+                    text(x=mylogratio.xlim[1], y=estimatedContRate+0.02, 
429
+                        labels="Contamination", col="red", pos=4)
430
+                }
422 431
                 text(x=peak.ideal.means[
423 432
                     as.character(r$ML.C[r$ML.SOMATIC])][idx.labels], 
424 433
                     y=r$ML.AR[r$ML.SOMATIC][idx.labels], 
... ...
@@ -597,7 +606,7 @@ ss) {
597 606
 }    
598 607
 
599 608
 .getVariantPosteriors <- function(res, i, max.mapping.bias=NULL) {
600
-    r <- res$results[[i]]$SNV.posterior$beta.model$posterior
609
+    r <- res$results[[i]]$SNV.posterior$beta.model$posteriors
601 610
     if (!is.null(r) && !is.null(max.mapping.bias)) {
602 611
         r <- r[r$MAPPING.BIAS >= max.mapping.bias,]
603 612
     }    
... ...
@@ -607,22 +616,56 @@ ss) {
607 616
 .getAFPlotGroups <- function(r, single.mode) {
608 617
     if (single.mode) {
609 618
         groupLevels <- c("dbSNP/germline", "dbSNP/somatic", "novel/somatic",
610
-            "novel/germline", "COSMIC/germline", "COSMIC/somatic") 
619
+            "novel/germline", "COSMIC/germline", "COSMIC/somatic", "contamination") 
611 620
         r$group <- groupLevels[1]
612 621
         r$group[r$prior.somatic < 0.1 & r$ML.SOMATIC] <- groupLevels[2]
613 622
         r$group[r$prior.somatic >= 0.1 & r$ML.SOMATIC] <- groupLevels[3]
614 623
         r$group[r$prior.somatic >= 0.1 & !r$ML.SOMATIC] <- groupLevels[4]
615 624
         r$group[r$prior.somatic >= 0.9 & !r$ML.SOMATIC] <- groupLevels[5]
616 625
         r$group[r$prior.somatic >= 0.9 & r$ML.SOMATIC] <- groupLevels[6]
626
+        r$group[r$GERMLINE.CONTLOW > 0.5 | 
627
+            r$GERMLINE.CONTHIGH > 0.5] <- groupLevels[7]
617 628
         r$group <- factor(r$group, levels=groupLevels)
618 629
         return(r)
619 630
     } 
620 631
     groupLevels <- c("germline", "germline/ML somatic", "somatic", 
621
-        "somatic/ML germline")
632
+        "somatic/ML germline", "contamination")
622 633
     r$group <- groupLevels[1]
623 634
     r$group[r$prior.somatic < 0.1 & r$ML.SOMATIC] <- groupLevels[2]
624 635
     r$group[r$prior.somatic >= 0.1 & r$ML.SOMATIC] <- groupLevels[3]
625 636
     r$group[r$prior.somatic >= 0.1 & !r$ML.SOMATIC] <- groupLevels[4]
637
+    r$group[r$GERMLINE.CONTLOW > 0.5 | 
638
+        r$GERMLINE.CONTHIGH > 0.5] <- groupLevels[5]
626 639
     r$group <- factor(r$group, levels=groupLevels)
627 640
     r
628 641
 }            
642
+
643
+
644
+.plotContamination <- function(pp,  max.mapping.bias=NULL, plot=TRUE) {
645
+    if (is.null(max.mapping.bias)) max.mapping.bias=0
646
+
647
+    idx <- pp$GERMLINE.CONTHIGH+pp$GERMLINE.CONTLOW > 0.5 & 
648
+        pp$MAPPING.BIAS >= max.mapping.bias
649
+    if (!length(which(idx))) return(0)
650
+    df <- data.frame(
651
+        chr=pp$chr[idx], 
652
+        AR=sapply(pp$AR.ADJUSTED[idx], function(x) ifelse(x>0.5, 1-x,x)),
653
+        HIGHLOW=ifelse(pp$GERMLINE.CONTHIGH>pp$GERMLINE.CONTLOW, 
654
+            "HIGH", "LOW")[idx]
655
+    )
656
+    df$chr <- factor(df$chr, levels=unique(df$chr))
657
+    # take the chromosome median and then average. the low count
658
+    # might be biased in case contamination rate is < AR cutoff
659
+    estimatedRate <- weighted.mean( 
660
+        sapply(split(df$AR, df$HIGHLOW), median), 
661
+        sapply(split(df$AR, df$HIGHLOW), length)
662
+    )
663
+    if (plot) {
664
+        gp <- ggplot(df, aes_string(x="chr",y="AR",fill="HIGHLOW"))+geom_boxplot()+
665
+        geom_hline(yintercept=estimatedRate, color="grey", linetype="dashed")+
666
+        labs(fill="")
667
+        print(gp)
668
+   }     
669
+   estimatedRate
670
+}
671
+
... ...
@@ -19,8 +19,8 @@
19 19
 #' mutations.} \item{k}{Minimum number of supporting reads.} \item{f}{Expected
20 20
 #' allelic fraction. }
21 21
 #' @author Markus Riester
22
-#' @references Carter et al., Absolute quantification of somatic DNA
23
-#' alterations in human cancer. Nature Biotechnology 2012.
22
+#' @references Carter et al. (2012), Absolute quantification of somatic DNA
23
+#' alterations in human cancer. Nature Biotechnology.
24 24
 #' @examples
25 25
 #' 
26 26
 #' purity <- c(0.1,0.15,0.2,0.25,0.4,0.6,1)
... ...
@@ -15,7 +15,6 @@
15 15
 #' be used to automatically ignore unlikely solutions.
16 16
 #' @param max.ploidy Maximum ploidy to be considered. If \code{NULL}, all. Can
17 17
 #' be used to automatically ignore unlikely solutions.
18
-#' @param verbose Verbose output.
19 18
 #' @return The return value of the corresponding \code{\link{runAbsoluteCN}}
20 19
 #' call, but with the results array manipulated according the curation CSV file
21 20
 #' and arguments of this function.
... ...
@@ -35,8 +34,8 @@
35 34
 readCurationFile <- function(file.rds,
36 35
 file.curation = gsub(".rds$", ".csv", file.rds),
37 36
 remove.failed = FALSE, report.best.only=FALSE, min.ploidy = NULL,
38
-max.ploidy = NULL, verbose=FALSE) {
39
-    if (verbose) message("Reading ", file.rds, "...")
37
+max.ploidy = NULL) {
38
+    flog.info("Reading %s...", file.rds)
40 39
     res <- readRDS(file.rds)
41 40
     curation <- read.csv(file.curation, as.is=TRUE, nrows=1)
42 41
     .checkLogical <- function(field) {
... ...
@@ -58,7 +58,7 @@
58 58
 #' \code{\link{filterVcfBasic}}.
59 59
 #' @param args.filterVcf Arguments for variant filtering function. Arguments
60 60
 #' \code{vcf}, \code{tumor.id.in.vcf}, \code{min.coverage},
61
-#' \code{model.homozygous}, \code{error} and \code{verbose} are required in the
61
+#' \code{model.homozygous} and \code{error} are required in the
62 62
 #' filter function and are automatically set.
63 63
 #' @param fun.setPriorVcf Function to set prior for somatic status for each
64 64
 #' variant in the VCF. Defaults to \code{\link{setPriorVcf}}.
... ...
@@ -70,15 +70,15 @@
70 70
 #' coverage files. Needs to return a \code{logical} vector whether an interval
71 71
 #' should be used for segmentation. Defaults to \code{\link{filterTargets}}.
72 72
 #' @param args.filterTargets Arguments for target filtering function. Arguments
73
-#' \code{log.ratio}, \code{tumor}, \code{gc.data}, \code{seg.file},
74
-#' \code{normalDB} and \code{verbose} are required and automatically set 
73
+#' \code{log.ratio}, \code{tumor}, \code{gc.data}, \code{seg.file} and
74
+#' \code{normalDB} are required and automatically set 
75 75
 #' @param fun.segmentation Function for segmenting the copy number log-ratios.
76 76
 #' Expected return value is a \code{data.frame} representation of the
77 77
 #' segmentation. Defaults to \code{\link{segmentationCBS}}.
78 78
 #' @param args.segmentation Arguments for segmentation function. Arguments
79
-#' \code{normal}, \code{tumor}, \code{log.ratio}, \code{plot.cnv},
79
+#' \code{normal}, \code{tumor}, \code{log.ratio}, \code{plot.cnv} and
80 80
 #' \code{min.coverage}, \code{sampleid}, \code{vcf}, \code{tumor.id.in.vcf},
81
-#' \code{centromeres} and \code{verbose} are required in the segmentation function 
81
+#' \code{centromeres} are required in the segmentation function 
82 82
 #' and automatically set.
83 83
 #' @param fun.focal Function for identifying focal amplifications. Defaults to
84 84
 #' \code{\link{findFocal}}.
... ...
@@ -133,6 +133,8 @@
133 133
 #' \code{sd(log.ratio)*log.ratio.calibration}.
134 134
 #' @param remove.off.target.snvs Deprecated. Use the corresponding argument in
135 135
 #' \code{args.filterVcf}.
136
+#' @param smooth.log.ratio Smooth \code{log.ratio} using the \code{DNAcopy}
137
+#' package.
136 138
 #' @param model.homozygous Homozygous germline SNPs are uninformative and by
137 139
 #' default removed. In 100 percent pure samples such as cell lines, however,
138 140
 #' heterozygous germline SNPs appear homozygous in case of LOH. Setting this
... ...
@@ -171,11 +173,19 @@
171 173
 #' typically result in a slightly more accurate purity, especially for rather
172 174
 #' silent genomes or very low purities. Otherwise, it will just use the purity
173 175
 #' determined via the SCNA-fit.
176
+#' @param log.file If not \code{NULL}, store verbose output to file.
174 177
 #' @param verbose Verbose output.
175 178
 #' @return A list with elements \item{candidates}{Results of the grid search.}
176 179
 #' \item{results}{All local optima, sorted by final rank.} \item{input}{The
177 180
 #' input data.}
178 181
 #' @author Markus Riester
182
+#' @references Riester et al. (2016). PureCN: Copy number calling and SNV 
183
+#' classification using targeted short read sequencing. Source Code for Biology 
184
+#' and Medicine, 11, pp. 13. 
185
+#'
186
+#' Carter et al. (2012), Absolute quantification of somatic DNA alterations in 
187
+#' human cancer. Nature Biotechnology.
188
+#'
179 189
 #' @seealso \code{\link{correctCoverageBias}} \code{\link{segmentationCBS}}
180 190
 #' \code{\link{calculatePowerDetectSomatic}}
181 191
 #' @examples
... ...
@@ -221,6 +231,8 @@
221 231
 #' @importFrom utils data read.delim tail packageVersion
222 232
 #' @importFrom S4Vectors queryHits subjectHits DataFrame
223 233
 #' @importFrom data.table data.table
234
+#' @importFrom futile.logger flog.info flog.warn flog.fatal
235
+#'             flog.threshold flog.appender appender.tee
224 236
 runAbsoluteCN <- function(normal.coverage.file = NULL, 
225 237
     tumor.coverage.file = NULL, log.ratio = NULL, seg.file = NULL, 
226 238
     seg.file.sdev = 0.4, vcf.file = NULL, normalDB = NULL, genome, 
... ...
@@ -237,29 +249,32 @@ runAbsoluteCN <- function(normal.coverage.file = NULL,
237 249
     candidates = NULL, min.coverage = 15, max.coverage.vcf = 300, 
238 250
     max.non.clonal = 0.2, max.homozygous.loss = c(0.05, 1e07) , non.clonal.M = 1/3, 
239 251
     max.mapping.bias = 0.8, iterations = 30, log.ratio.calibration = 0.25, 
240
-    remove.off.target.snvs = NULL, model.homozygous = FALSE, error = 0.001, 
241
-    gc.gene.file = NULL, max.dropout = c(0.95, 1.1), max.logr.sdev = 0.75, 
252
+    smooth.log.ratio = TRUE, remove.off.target.snvs = NULL, 
253
+    model.homozygous = FALSE, error = 0.001, gc.gene.file = NULL, 
254
+    max.dropout = c(0.95, 1.1), max.logr.sdev = 0.75, 
242 255
     max.segments = 300, min.gof = 0.8, plot.cnv = TRUE, cosmic.vcf.file = NULL, 
243
-    post.optimize = FALSE, verbose = TRUE) {
244
-    debug <- FALSE
245
-    
256
+    post.optimize = FALSE, log.file = NULL, verbose = TRUE) {
257
+
258
+    if (!verbose) flog.threshold("WARN")
259
+    if (!is.null(log.file)) flog.appender(appender.tee(log.file))
260
+
261
+     # log function arguments     
262
+    try(.logHeader(as.list(match.call())[-1]), silent=TRUE)
263
+
246 264
     # TODO: remove in PureCN 1.8
247 265
     if (!is.null(remove.off.target.snvs)) {
248 266
         args.filterVcf$remove.off.target.snvs <- remove.off.target.snvs
249
-        message("remove.off.target.snvs is deprecated. ", 
250
-            "Please use it in args.filterVcf instead.")
267
+        flog.warn("remove.off.target.snvs is deprecated. Please use it in args.filterVcf instead.")
251 268
     }
252 269
     # TODO: remove in PureCN 1.8
253 270
     if (length(max.homozygous.loss)==1){
254 271
          max.homozygous.loss <- c(max.homozygous.loss, 1e07)
255
-         message("max.homozygous.loss now a double(2) vector. ", 
256
-            "Please provide both values.")
272
+         flog.warn("max.homozygous.loss now a double(2) vector. Please provide both values.")
257 273
     }     
258 274
     # TODO: remove in PureCN 1.8
259 275
     if (is.null(normalDB) && !is.null(args.filterTargets$normalDB)) {
260 276
         normalDB <- args.filterTargets$normalDB
261
-        message("normalDB now a runAbsoluteCN argument. ", 
262
-            "Please provide it there, not in args.filterTargets.")
277
+        flog.warn("normalDB now a runAbsoluteCN argument. Please provide it there, not in args.filterTargets.")
263 278
     }    
264 279
     centromeres <- .getCentromerePositions(centromeres, genome)
265 280
     
... ...
@@ -274,8 +289,7 @@ runAbsoluteCN <- function(normal.coverage.file = NULL,
274 289
     
275 290
     test.num.copy <- sort(test.num.copy)
276 291
     
277
-    if (verbose) 
278
-        message("Loading GATK coverage files...")
292
+    flog.info("Loading GATK coverage files...")
279 293
     
280 294
     if (!is.null(normal.coverage.file)) {
281 295
         if (is.character(normal.coverage.file)) {
... ...
@@ -298,9 +312,6 @@ runAbsoluteCN <- function(normal.coverage.file = NULL,
298 312
         if (is.null(sampleid)) 
299 313
             sampleid <- basename(tumor.coverage.file)
300 314
     } else {
301
-        if (verbose) 
302
-            message("tumor.coverage.file does not appear to be a filename, assuming", 
303
-                " it is valid GATK coverage data.")
304 315
         tumor <- tumor.coverage.file
305 316
         if (is.null(sampleid)) 
306 317
             sampleid <- "Sample.1"
... ...
@@ -326,14 +337,15 @@ runAbsoluteCN <- function(normal.coverage.file = NULL,
326 337
             if (is.null(normal.coverage.file)) 
327 338
                 normal <- tumor
328 339
             log.ratio <- .createFakeLogRatios(tumor, seg.file, chr.hash)
340
+            smooth.log.ratio <- FALSE
329 341
             if (is.null(sampleid)) 
330
-                sampleid <- read.delim(seg.file)[1, 1]
342
+                sampleid <- read.delim(seg.file, as.is=TRUE)[1, 1]
331 343
         } else {
332 344
             if (is.null(normal.coverage.file)) {
333 345
                 .stopUserError("Need a normal coverage file if log.ratio and seg.file are not", 
334 346
                   " provided.")
335 347
             }
336
-            log.ratio <- calculateLogRatio(normal, tumor, verbose = verbose)
348
+            log.ratio <- calculateLogRatio(normal, tumor)
337 349
         }
338 350
     } else {
339 351
         # the segmentation algorithm will remove targets with low coverage in both tumor
... ...
@@ -362,14 +374,14 @@ runAbsoluteCN <- function(normal.coverage.file = NULL,
362 374
     }
363 375
     
364 376
     args.filterTargets <- c(list(log.ratio = log.ratio, tumor = tumor, 
365
-        gc.data = gc.data, seg.file = seg.file, normalDB = normalDB, 
366
-        verbose = verbose), args.filterTargets)
377
+        gc.data = gc.data, seg.file = seg.file, normalDB = normalDB),
378
+        args.filterTargets)
367 379
     
368 380
     targetsUsed <- do.call(fun.filterTargets, 
369 381
         .checkArgs(args.filterTargets, "filterTargets"))
370 382
     
371 383
     # chr.hash is an internal data structure, so we need to do this separately.
372
-    targetsUsed <- .filterTargetsChrHash(targetsUsed, tumor, chr.hash, verbose)
384
+    targetsUsed <- .filterTargetsChrHash(targetsUsed, tumor, chr.hash)
373 385
     targetsUsed <- which(targetsUsed)
374 386
     if (nrow(tumor) != nrow(normal) || nrow(tumor) != length(log.ratio) || (!is.null(gc.gene.file) && 
375 387
         nrow(tumor) != nrow(gc.data))) {
... ...
@@ -381,28 +393,31 @@ runAbsoluteCN <- function(normal.coverage.file = NULL,
381 393
     if (!is.null(gc.gene.file)) {
382 394
         gc.data <- gc.data[targetsUsed, ]
383 395
     }
384
-    if (verbose) 
385
-        message("Using ", nrow(tumor), " targets.")
396
+    flog.info("Using %i targets.", nrow(tumor))
397
+
398
+    if (smooth.log.ratio) {
399
+        CNA.obj <- smooth.CNA(CNA(log.ratio, 
400
+            .strip.chr.name(normal$chr, chr.hash), 
401
+            floor((normal$probe_start + normal$probe_end)/2), 
402
+            data.type="logratio", sampleid="sample"))
403
+        log.ratio <- CNA.obj$sample
404
+    }     
386 405
     
387 406
     dropoutWarning <- FALSE
388 407
     # clean up noisy targets, but not if the segmentation was already provided.
389 408
     if (is.null(seg.file)) {
390 409
         if (!is.null(gc.gene.file)) {
391
-            dropoutWarning <- .checkGCBias(normal, tumor, gc.data, max.dropout, verbose)
392
-        } else if (verbose) {
393
-            message("No gc.gene.file provided. Cannot check if data was ", 
394
-                    "GC-normalized. Was it?")
410
+            dropoutWarning <- .checkGCBias(normal, tumor, gc.data, max.dropout)
411
+        } else {
412
+            flog.info("No gc.gene.file provided. Cannot check if data was GC-normalized. Was it?")
395 413
         }
396 414
     }
397 415
     
398 416
     if (!is.null(gc.gene.file) && is.null(gc.data$Gene)) {
399
-        if (verbose) 
400
-            message("No Gene column in gc.gene.file.", 
401
-                    " You won't get gene-level calls.")
417
+        flog.info("No Gene column in gc.gene.file. You won't get gene-level calls.")
402 418
         gc.gene.file <- NULL
403 419
     }
404 420
     
405
-    
406 421
     exon.gr <- GRanges(seqnames=tumor$chr, IRanges(start=tumor$probe_start, 
407 422
                                                    end=tumor$probe_end))
408 423
     
... ...
@@ -416,9 +431,8 @@ runAbsoluteCN <- function(normal.coverage.file = NULL,
416 431
     sex.vcf <- NULL
417 432
     
418 433
     if (!is.null(vcf.file)) {
419
-        if (verbose) 
420
-            message("Loading VCF...")
421
-        vcf <- .readAndCheckVcf(vcf.file, genome=genome, verbose=verbose)
434
+        flog.info("Loading VCF...")
435
+        vcf <- .readAndCheckVcf(vcf.file, genome=genome)
422 436
         
423 437
         if (length(intersect(tumor$chr, seqlevels(vcf))) < 1) {
424 438
             .stopUserError("Different chromosome names in coverage and VCF.")
... ...
@@ -429,7 +443,6 @@ runAbsoluteCN <- function(normal.coverage.file = NULL,
429 443
         }
430 444
         
431 445
         if (sum(colSums(geno(vcf)$DP) > 0) == 1 && args.filterVcf$use.somatic.status) {
432
-            message("VCF file seems to have only one sample. ", "Using SNVs in single mode.")
433 446
             args.filterVcf$use.somatic.status <- FALSE
434 447
         }
435 448
         
... ...
@@ -439,23 +452,21 @@ runAbsoluteCN <- function(normal.coverage.file = NULL,
439 452
             normal.id.in.vcf <- .getNormalIdInVcf(vcf, tumor.id.in.vcf)
440 453
         }
441 454
         
442
-        if (verbose) 
443
-            message("Assuming ", tumor.id.in.vcf, " is tumor in VCF file.")
455
+        flog.info("%s is tumor in VCF file.", tumor.id.in.vcf)
444 456
         if (sex != "diploid") {
445
-            sex.vcf <- getSexFromVcf(vcf, tumor.id.in.vcf, verbose = verbose)
457
+            sex.vcf <- getSexFromVcf(vcf, tumor.id.in.vcf)
446 458
             if (!is.na(sex.vcf) && sex %in% c("F", "M") && sex.vcf != sex) {
447
-                warning("Sex mismatch of coverage and VCF. ", 
459
+                flog.warn("Sex mismatch of coverage and VCF. %s%s", 
448 460
                         "Could be because of noisy data, contamination, ", 
449 461
                   "loss of chrY or a mis-alignment of coverage and VCF.")
450 462
             }
451 463
         }
452 464
         n.vcf.before.filter <- nrow(vcf)
453
-        if (verbose) 
454
-            message("Found ", n.vcf.before.filter, " variants in VCF file.")
465
+        flog.info("Found %i variants in VCF file.", n.vcf.before.filter)
455 466
         
456 467
         args.filterVcf <- c(list(vcf = vcf, tumor.id.in.vcf = tumor.id.in.vcf, 
457 468
             model.homozygous = model.homozygous, error = error, 
458
-            target.granges = exon.gr, verbose = verbose), args.filterVcf)
469
+            target.granges = exon.gr), args.filterVcf)
459 470
         if (is.null(args.filterVcf$min.coverage)) {
460 471
             args.filterVcf$min.coverage <- min.coverage
461 472
         }
... ...
@@ -465,34 +476,31 @@ runAbsoluteCN <- function(normal.coverage.file = NULL,
465 476
         vcf <- vcf.filtering$vcf
466 477
         
467 478
         if (!is.null(cosmic.vcf.file)) {
468
-            vcf <- .addCosmicCNT(vcf, cosmic.vcf.file, verbose = verbose)
479
+            vcf <- .addCosmicCNT(vcf, cosmic.vcf.file)
469 480
         }
470 481
         
471
-        args.setPriorVcf <- c(list(vcf = vcf, tumor.id.in.vcf = tumor.id.in.vcf, 
472
-            verbose = verbose), args.setPriorVcf)
482
+        args.setPriorVcf <- c(list(vcf = vcf, tumor.id.in.vcf = tumor.id.in.vcf),
483
+            args.setPriorVcf)
473 484
         prior.somatic <- do.call(fun.setPriorVcf, 
474 485
             .checkArgs(args.setPriorVcf, "setPriorVcf"))
475 486
         
476 487
         # get mapping bias
477 488
         args.setMappingBiasVcf$vcf <- vcf
478 489
         args.setMappingBiasVcf$tumor.id.in.vcf <- tumor.id.in.vcf
479
-        args.setMappingBiasVcf$verbose <- verbose
480 490
         mapping.bias <- do.call(fun.setMappingBiasVcf,
481 491
             .checkArgs(args.setMappingBiasVcf, "setMappingBiasVcf"))
482 492
         idxHqGermline <- prior.somatic < 0.5 & mapping.bias >= max.mapping.bias
483 493
         vcf.germline <- vcf[idxHqGermline]
484 494
     }
485 495
     
486
-    if (verbose) 
487
-        message("Sex of sample: ", sex)
488
-    if (verbose) 
489
-        message("Segmenting data...")
496
+    flog.info("Sample sex: %s", sex)
497
+    flog.info("Segmenting data...")
490 498
     
491 499
     args.segmentation <- c(list(normal = normal, tumor = tumor, log.ratio = log.ratio, 
492 500
         seg = .loadSegFile(seg.file), plot.cnv = plot.cnv, min.coverage = ifelse(is.null(seg.file), 
493 501
             min.coverage, -1), sampleid = sampleid, vcf = vcf.germline, tumor.id.in.vcf = tumor.id.in.vcf, 
494 502
         normal.id.in.vcf = normal.id.in.vcf, max.segments = max.segments, chr.hash = chr.hash, 
495
-        centromeres = centromeres, verbose = verbose), args.segmentation)
503
+        centromeres = centromeres), args.segmentation)
496 504
     
497 505
     vcf.germline <- NULL
498 506
     seg <- do.call(fun.segmentation,
... ...
@@ -516,20 +524,13 @@ runAbsoluteCN <- function(normal.coverage.file = NULL,
516 524
             n.vcf.before.filter <- nrow(vcf)
517 525
             vcf <- vcf[!is.na(snv.lr)]
518 526
             mapping.bias <- mapping.bias[!is.na(snv.lr)]
527
+            prior.somatic <- prior.somatic[!is.na(snv.lr)]
519 528
             
520 529
             # make sure all SNVs are in covered segments
521
-            if (verbose) 
522
-                message("Removing ", n.vcf.before.filter - nrow(vcf), " variants outside segments.")
530
+            flog.info("Removing %i variants outside segments.", n.vcf.before.filter - nrow(vcf))
523 531
         }
524 532
         ov <- findOverlaps(seg.gr, vcf)
525
-        if (verbose) 
526
-            message("Using ", nrow(vcf), " variants.")
527
-        
528
-        # get final somatic priors
529
-        args.setPriorVcf$vcf <- vcf
530
-        args.setPriorVcf$verbose <- FALSE
531
-        prior.somatic <- do.call(fun.setPriorVcf,
532
-            .checkArgs(args.setPriorVcf, "setPriorVcf"))
533
+        flog.info("Using %i variants.", nrow(vcf))
533 534
     }
534 535
     
535 536
     # get target log-ratios for all segments
... ...
@@ -554,7 +555,7 @@ runAbsoluteCN <- function(normal.coverage.file = NULL,
554 555
     if (!is.null(seg.file)) {
555 556
         sd.seg <- seg.file.sdev
556 557
     } else {
557
-        exon.lrs <- lapply(exon.lrs, .smoothOutliers)
558
+    #    exon.lrs <- lapply(exon.lrs, .smoothOutliers)
558 559
     }
559 560
     
560 561
     # renormalize, in case segmentation function changed means
... ...
@@ -587,14 +588,10 @@ runAbsoluteCN <- function(normal.coverage.file = NULL,
587 588
     
588 589
     if (sum(li < 0) > 0) 
589 590
         .stopRuntimeError("Some segments have negative size.")
590
-    
591
-    if (verbose) {
592
-        message("Mean standard deviation of log-ratios: ", round(sd.seg, digits = 2))
593
-    }
591
+    flog.info("Mean standard deviation of log-ratios: %.2f", sd.seg)
594 592
     log.ratio.offset <- rep(0, nrow(seg))
595 593
     
596
-    if (verbose) 
597
-        message("Optimizing purity and ploidy. ", "Will take a minute or two...")
594
+    flog.info("2D-grid search of purity and ploidy...")
598 595
     
599 596
     # find local maxima. use a coarser grid for purity, otherwise we will get far too
600 597
     # many solutions, which we will need to cluster later anyways.
... ...
@@ -603,7 +600,7 @@ runAbsoluteCN <- function(normal.coverage.file = NULL,
603 600
     } else {
604 601
         candidate.solutions <- .optimizeGrid(test.purity = seq(max(0.1, min(test.purity)), 
605 602
             min(0.99, max(test.purity)), by = 1/30), min.ploidy, max.ploidy, test.num.copy = test.num.copy, 
606
-            exon.lrs, seg, sd.seg, li, max.exon.ratio, max.non.clonal, verbose, debug)
603
+            exon.lrs, seg, sd.seg, li, max.exon.ratio, max.non.clonal)
607 604
         
608 605
         # if we have > 20 somatic mutations, we can try estimating purity based on
609 606
         # allelic fractions and assuming diploid genomes.
... ...
@@ -623,11 +620,9 @@ runAbsoluteCN <- function(normal.coverage.file = NULL,
623 620
         candidate.solutions$candidates <- candidate.solutions$candidates[idx.keep, 
624 621
             ]
625 622
     }
626
-    
627
-    if (verbose) 
628
-        message(paste(strwrap(paste("Local optima:", paste(round(candidate.solutions$candidates$purity, 
629
-            digits = 2), round(candidate.solutions$candidates$ploidy, digits = 2), 
630
-            sep = "/", collapse = ", "))), collapse = "\n"))
623
+    flog.info(paste(strwrap(paste("Local optima:\n", paste(round(candidate.solutions$candidates$purity, 
624
+        digits = 2), round(candidate.solutions$candidates$ploidy, digits = 2), 
625
+        sep = "/", collapse = ", "))), collapse = "\n"))
631 626
     
632 627
     simulated.annealing <- TRUE
633 628
     
... ...
@@ -640,16 +635,14 @@ runAbsoluteCN <- function(normal.coverage.file = NULL,
640 635
             total.ploidy <- candidate.solutions$candidates$ploidy[cpi]
641 636
             p <- candidate.solutions$candidates$purity[cpi]
642 637
             
643
-            if (verbose) 
644
-                message("Testing local optimum ", cpi, "/", nrow(candidate.solutions$candidates), 
645
-                  " at purity ", round(p, digits = 2), " and total ploidy ", round(total.ploidy, 
646
-                    digits = 2), "...")
638
+            flog.info("Testing local optimum %i/%i at purity %.2f and total ploidy %.2f...", 
639
+                cpi, nrow(candidate.solutions$candidates), p, total.ploidy)
647 640
             
648 641
             subclonal <- rep(FALSE, nrow(seg))
649 642
             old.llik <- -1
650 643
             cnt.llik.equal <- 0
651
-            C.posterior <- matrix(ncol = length(test.num.copy) + 1, nrow = nrow(seg))
652
-            colnames(C.posterior) <- c(test.num.copy, "Subclonal")
644
+            C.likelihood <- matrix(ncol = length(test.num.copy) + 1, nrow = nrow(seg))
645
+            colnames(C.likelihood) <- c(test.num.copy, "Subclonal")
653 646
             for (iter in seq_len(iterations)) {
654 647
                 # test for convergence
655 648
                 if (abs(old.llik - llik) < 0.0001) {
... ...
@@ -710,12 +703,6 @@ runAbsoluteCN <- function(normal.coverage.file = NULL,
710 703
                     p, " and total ploidy ", total.ploidy, ".")
711 704
                 }
712 705
                 
713
-                if (debug) 
714
-                  message(paste("Iteration:", iter, " Log-likelihood: ", llik, " Purity:", 
715
-                    p, " Total Ploidy:", total.ploidy, " Tumor Ploidy:", sum(li * 
716
-                      (C))/sum(li), " Fraction sub-clonal:", subclonal.f, " Mean log-ratio offset", 
717
-                    mean(log.ratio.offset)))
718
-                
719 706
                 for (i in seq_len(nrow(seg))) {
720 707
                   # Gibbs sample copy number Step 1: calculate log-likelihoods of fits In the first
721 708
                   # iteration, we do not have the integer copy numbers yet, so calculate ploidy
... ...
@@ -743,8 +730,9 @@ runAbsoluteCN <- function(normal.coverage.file = NULL,
743 730
                     ifelse(C[-i] == 0, 1, 0)) + li[i] * ifelse(Ci == 0, 1, 0))/sum(li), 
744 731
                     double(1))
745 732
 
746
-                  # set maximimum homozygous loss size to 10mb.  
747
-                  if (li[i]>max.homozygous.loss[2] && test.num.copy[1]<1) frac.homozygous.loss[1] <- 1
733
+                  if (li[i] > max.homozygous.loss[2] && test.num.copy[1]<1) {
734
+                       frac.homozygous.loss[1] <- 1
735
+                  }     
748 736
                   log.prior.homozygous.loss <- log(ifelse(frac.homozygous.loss > 
749 737
                     max.homozygous.loss[1], 0, 1))
750 738
                   if (iter > 1) 
... ...
@@ -754,7 +742,7 @@ runAbsoluteCN <- function(normal.coverage.file = NULL,
754 742
                   p.rij <- c(p.rij, .calcLlikSegmentSubClonal(exon.lrs[[i]] + log.ratio.offset[i], 
755 743
                     max.exon.ratio))
756 744
                   
757
-                  C.posterior[i, ] <- exp(p.rij - max(p.rij))
745
+                  C.likelihood[i, ] <- exp(p.rij - max(p.rij))
758 746
                   
759 747
                   if (simulated.annealing) 
760 748
                     p.rij <- p.rij * exp(iter/4)
... ...
@@ -772,8 +760,6 @@ runAbsoluteCN <- function(normal.coverage.file = NULL,
772 760
                   old.C <- C[i]
773 761
                   opt.C <- (2^(seg$seg.mean + log.ratio.offset) * total.ploidy)/p - 
774 762
                     ((2 * (1 - p))/p)
775
-                  # message(opt.C[i], ' seg: ', seg$seg.mean[i], ' offset: ', log.ratio.offset[i],
776
-                  # ' purity: ', p)
777 763
                   opt.C[opt.C < 0] <- 0
778 764
                   if (id > length(test.num.copy)) {
779 765
                     # optimal non-integer copy number
... ...
@@ -783,16 +769,14 @@ runAbsoluteCN <- function(normal.coverage.file = NULL,
783 769
                     C[i] <- test.num.copy[id]
784 770
                     subclonal[i] <- FALSE
785 771
                   }
786
-                  if (old.C != C[i] && debug) 
787
-                    message("Old: ", old.C, " New: ", C[i], " LR: ", mean(exon.lrs[[i]]))
788 772
                 }
789 773
             }
790 774
             if (subclonal.f < max.non.clonal && abs(total.ploidy - candidate.solutions$candidates$ploidy[cpi]) < 
791 775
                 1) 
792 776
                 break
793 777
             log.ratio.calibration <- log.ratio.calibration + 0.25
794
-            if (verbose && attempt < max.attempts) {
795
-                message("Recalibrating log-ratios...")
778
+            if (attempt < max.attempts) {
779
+                flog.info("Recalibrating log-ratios...")
796 780
             }
797 781
         }
798 782
         seg.adjusted <- seg
... ...
@@ -807,7 +791,7 @@ runAbsoluteCN <- function(normal.coverage.file = NULL,
807 791
                 SNV.posterior <- list(beta.model = list(llik = -Inf))
808 792
             }
809 793
             return(list(log.likelihood = llik, purity = p, ploidy = weighted.mean(C, 
810
-                li), total.ploidy = total.ploidy, seg = seg.adjusted, C.posterior = data.frame(C.posterior/rowSums(C.posterior), 
794
+                li), total.ploidy = total.ploidy, seg = seg.adjusted, C.likelihood = data.frame(C.likelihood/rowSums(C.likelihood), 
811 795
                 ML.C = C, ML.Subclonal = subclonal), SNV.posterior = SNV.posterior, 
812 796
                 fraction.subclonal = subclonal.f, fraction.homozygous.loss = sum(li[which(C < 
813 797
                   0.01)])/sum(li), gene.calls = NA, log.ratio.offset = log.ratio.offset, 
... ...
@@ -833,15 +817,15 @@ runAbsoluteCN <- function(normal.coverage.file = NULL,
833 817
                 tp <- p
834 818
                 pp <- 1
835 819
             }
820
+            cont.rate <- prior.contamination
836 821
             .fitSNV <- function(tp, pp) {
837
-                .fitSNVp <- function(px) {
838
-                  if (verbose) 
839
-                    message("Fitting SNVs for purity ", round(px, digits = 2), " and tumor ploidy ", 
840
-                      round(weighted.mean(C, li), digits = 2), ".")
822
+                .fitSNVp <- function(px, cont.rate=prior.contamination) {
823
+                    flog.info("Fitting SNVs for purity %.2f, tumor ploidy %.2f and contamination %.2f.", 
824
+                        px, weighted.mean(C, li), cont.rate)
841 825
                   
842 826
                   list(beta.model = .calcSNVLLik(vcf, tumor.id.in.vcf, ov, px, test.num.copy, 
843
-                    C.posterior, C, opt.C, snv.model = "beta", prior.somatic, mapping.bias, 
844
-                    snv.lr, sampleid, cont.rate = prior.contamination, prior.K = prior.K, 
827
+                    C.likelihood, C, opt.C, snv.model = "beta", prior.somatic, mapping.bias, 
828
+                    snv.lr, sampleid, cont.rate = cont.rate, prior.K = prior.K, 
845 829
                     max.coverage.vcf = max.coverage.vcf, non.clonal.M = non.clonal.M, 
846 830
                     model.homozygous = model.homozygous, error = error, max.mapping.bias = max.mapping.bias))
847 831
                 }
... ...
@@ -852,9 +836,7 @@ runAbsoluteCN <- function(normal.coverage.file = NULL,
852 836
                     GoF <- .getGoF(list(SNV.posterior=res.snvllik[[1]]))
853 837
                     idx <- 1
854 838
                     if (GoF < (min.gof-0.05)) { 
855
-                        if (verbose) message("Poor goodness-of-fit (", 
856
-                            round(GoF, digits=3), 
857
-                            "). Skipping post-optimization.")
839
+                        flog.info("Poor goodness-of-fit (%.3f). Skipping post-optimization.", GoF)
858 840
                     } else {    
859 841
                         res.snvllik <- c(res.snvllik, lapply(tp[-1], .fitSNVp))
860 842
                       px.rij <- lapply(tp, function(px) vapply(which(!is.na(C)), function(i) .calcLlikSegment(subclonal = subclonal[i], 
... ...
@@ -870,8 +852,7 @@ runAbsoluteCN <- function(normal.coverage.file = NULL,
870 852
                   idx <- 1
871 853
                 }
872 854
                 p <- tp[idx]
873
-                if (verbose) 
874
-                  message("Optimized purity: ", p)
855
+                flog.info("Optimized purity: %.2f", p)
875 856
                 SNV.posterior <- res.snvllik[[idx]]
876 857
                 list(p = p, SNV.posterior = SNV.posterior, llik = px.rij.s[idx])
877 858
             }
... ...
@@ -879,21 +860,57 @@ runAbsoluteCN <- function(normal.coverage.file = NULL,
879 860
             p <- fitRes$p
880 861
             SNV.posterior <- fitRes$SNV.posterior
881 862
         }
882
-        
883
-        list(log.likelihood = llik, purity = p, ploidy = weighted.mean(C, li), total.ploidy = total.ploidy, 
884
-            seg = seg.adjusted, C.posterior = data.frame(C.posterior/rowSums(C.posterior), 
885
-                ML.C = C, ML.Subclonal = subclonal), SNV.posterior = SNV.posterior, 
886
-            fraction.subclonal = subclonal.f, fraction.homozygous.loss = sum(li[which(C < 
887
-                0.01)])/sum(li), gene.calls = gene.calls, log.ratio.offset = log.ratio.offset, 
888
-            SA.iterations = iter, failed = FALSE)
863
+         
864
+        list(log.likelihood = llik, purity = p, ploidy = weighted.mean(C, li),
865
+            total.ploidy = total.ploidy, seg = seg.adjusted, 
866
+            C.posterior = data.frame(C.likelihood/rowSums(C.likelihood), ML.C =
867
+            C, Opt.C = opt.C, ML.Subclonal = subclonal),
868
+            C.likelihood=C.likelihood, SNV.posterior = SNV.posterior,
869
+            fraction.subclonal = subclonal.f, fraction.homozygous.loss =
870
+            sum(li[which(C < 0.01)])/sum(li), gene.calls = gene.calls,
871
+            log.ratio.offset = log.ratio.offset, SA.iterations = iter, failed =
872
+            FALSE)
889 873
     }
890 874
     
891 875
     results <- lapply(seq_len(nrow(candidate.solutions$candidates)), .optimizeSolution)
892
-    if (verbose) 
893
-        message("Remember, posterior probabilities assume a correct SCNA fit.")
894 876
     
895 877
     results <- .rankResults(results)
896 878
     results <- .filterDuplicatedResults(results)
879
+
880
+    if (grepl("CONTAMINATION", vcf.filtering$flag_comment)) {
881
+        cont.rate <- .plotContamination(
882
+            results[[1]]$SNV.posterior$beta.model$posteriors, 
883
+            max.mapping.bias, plot=FALSE)
884
+        if (cont.rate > prior.contamination) {
885
+            flog.info("Initial guess of contamination rate: %.3f", cont.rate)
886
+        }    
887
+    }
888
+    ## optimize contamination. we just re-run the fitting 
889
+    if (grepl("CONTAMINATION", vcf.filtering$flag_comment) && 
890
+        cont.rate>prior.contamination) {
891
+        flog.info("Optimizing contamination rate...")
892
+             
893
+        res.snvllik <-
894
+            .calcSNVLLik(vcf, tumor.id.in.vcf,
895
+                  ov, results[[1]]$purity, test.num.copy, results[[1]]$C.likelihood, 
896
+                  results[[1]]$C.posterior$ML.C,
897
+                  results[[1]]$C.posterior$Opt.C,
898
+                  snv.model = "beta", prior.somatic, mapping.bias,
899
+                  snv.lr, sampleid, cont.rate = cont.rate, prior.K = prior.K,
900
+                  max.coverage.vcf = max.coverage.vcf, non.clonal.M = non.clonal.M,
901
+                  model.homozygous = model.homozygous, error = error,
902
+                  max.mapping.bias = max.mapping.bias)
903
+        results[[1]]$SNV.posterior$beta.model <- res.snvllik
904
+        cont.rate <- .plotContamination(
905
+                    results[[1]]$SNV.posterior$beta.model$posteriors,
906
+                                max.mapping.bias, plot=FALSE)
907
+                flog.info("Optimized contamination rate: %.3f", cont.rate)
908
+        results[[1]]$SNV.posterior$beta.model$posterior.contamination <- cont.rate
909
+        # add contamination rate to flag comment
910
+        vcf.filtering$flag_comment <- gsub("POTENTIAL SAMPLE CONTAMINATION", 
911
+            paste0("POTENTIAL SAMPLE CONTAMINATION (", round(cont.rate*100, digits=1),"%)"), 
912
+            vcf.filtering$flag_comment)
913
+    }
897 914
     results <- .flagResults(results, max.non.clonal = max.non.clonal, max.logr.sdev = max.logr.sdev, 
898 915
         logr.sdev = sd.seg, max.segments = max.segments, min.gof = min.gof, flag = vcf.filtering$flag, 
899 916
         flag_comment = vcf.filtering$flag_comment, dropout = dropoutWarning, use.somatic.status = args.filterVcf$use.somatic.status, 
... ...
@@ -905,10 +922,13 @@ runAbsoluteCN <- function(normal.coverage.file = NULL,
905 922
     }
906 923
     
907 924
     if (length(results) < 1) {
908
-        warning("Could not find valid purity and ploidy solution.")
925
+        flog.warn("Could not find valid purity and ploidy solution.")
909 926
     }
927
+    .logFooter()
910 928
     list(candidates = candidate.solutions, results = results, input = list(tumor = tumor.coverage.file, 
911 929
         normal = normal.coverage.file, log.ratio = data.frame(probe = normal[, 1], 
912 930
             log.ratio = log.ratio), log.ratio.sdev = sd.seg, vcf = vcf, sampleid = sampleid, 
913 931
         sex = sex, sex.vcf = sex.vcf, chr.hash = chr.hash, centromeres = centromeres))
914 932
 }
933
+
934
+
... ...
@@ -37,9 +37,16 @@
37 37
 #' properly ordered.
38 38
 #' @param centromeres A \code{data.frame} with centromere positions in first
39 39
 #' three columns.  Currently not supported in this function.
40
-#' @param verbose Verbose output.
41 40
 #' @return \code{data.frame} containing the segmentation.
42 41
 #' @author Markus Riester
42
+#' @references Olshen, A. B., Venkatraman, E. S., Lucito, R., Wigler, M. 
43
+#' (2004). Circular binary segmentation for the analysis of array-based DNA 
44
+#' copy number data. Biostatistics 5: 557-572.
45
+#'
46
+#' Venkatraman, E. S., Olshen, A. B. (2007). A faster circular binary 
47
+#' segmentation algorithm for the analysis of array CGH data. Bioinformatics 
48
+#' 23: 657-63.
49
+#'
43 50
 #' @seealso \code{\link{runAbsoluteCN}}
44 51
 #' @examples
45 52
 #' 
... ...
@@ -67,7 +74,7 @@ segmentationCBS <- function(normal, tumor, log.ratio, seg, plot.cnv,
67 74
     min.coverage, sampleid, target.weight.file = NULL, alpha = 0.005, undo.SD =
68 75
         NULL, vcf = NULL, tumor.id.in.vcf = 1, normal.id.in.vcf = NULL,
69 76
     max.segments = NULL, prune.hclust.h = NULL, prune.hclust.method = "ward.D",
70
-    chr.hash = NULL, centromeres = NULL, verbose = TRUE) {
77
+    chr.hash = NULL, centromeres = NULL) {
71 78
 
72 79
     if (is.null(chr.hash)) chr.hash <- .getChrHash(tumor$chr)
73 80
     
... ...
@@ -76,24 +83,21 @@ segmentationCBS <- function(normal, tumor, log.ratio, seg, plot.cnv,
76 83
         target.weights <- read.delim(target.weight.file, as.is=TRUE)
77 84
         target.weights <- target.weights[match(as.character(tumor[,1]), 
78 85
             target.weights[,1]),2]
79
-        if (verbose) message("Target weights found, will use weighted CBS.")
86
+        flog.info("Target weights found, will use weighted CBS.")
80 87
     }
81
-    x <- .CNV.analyze2(normal, tumor, logR=log.ratio, plot.cnv=plot.cnv, 
88
+    x <- .CNV.analyze2(normal, tumor, log.ratio=log.ratio, plot.cnv=plot.cnv, 
82 89
         min.coverage=min.coverage, sampleid=sampleid, alpha=alpha, 
83 90
         weights=target.weights, sdundo=undo.SD, max.segments=max.segments,
84
-        chr.hash=chr.hash, verbose=verbose) 
91
+        chr.hash=chr.hash) 
85 92
     if (!is.null(vcf)) {
86 93
         x <- .pruneByVCF(x, vcf, tumor.id.in.vcf, chr.hash=chr.hash)
87 94
         x <- .findCNNLOH(x, vcf, tumor.id.in.vcf, alpha=alpha, 
88 95
             chr.hash=chr.hash)
89 96
         x <- .pruneByHclust(x, vcf, tumor.id.in.vcf, h=prune.hclust.h, 
90
-            method=prune.hclust.method, chr.hash=chr.hash, verbose=verbose)
97
+            method=prune.hclust.method, chr.hash=chr.hash)
91 98
     }
92 99
     idx.enough.markers <- x$cna$output$num.mark > 1
93 100
     rownames(x$cna$output) <- NULL
94
-    if (verbose) {
95
-        print(x$cna$output[idx.enough.markers,])
96
-    }
97 101
     x$cna$output[idx.enough.markers,]
98 102
 }
99 103
 
... ...
@@ -159,7 +163,7 @@ iterations=2, chr.hash ) {
159 163
 }
160 164
         
161 165
 .pruneByHclust <- function(x, vcf, tumor.id.in.vcf, h=NULL, method="ward.D", 
162
-    min.variants=5, chr.hash, iterations=2, verbose=TRUE) {
166
+    min.variants=5, chr.hash, iterations=2) {
163 167
     for (iter in seq_len(iterations)) {
164 168
     seg <- x$cna$output
165 169
     #message("HCLUST: ", iter, " Num segment LRs: ", length(table(x$cna$output$seg.mean)))
... ...
@@ -180,7 +184,7 @@ iterations=2, chr.hash ) {
180 184
 
181 185
     if (is.null(h)) {
182 186
         h <- .getPruneH(seg)
183
-        if (verbose) message("Setting prune.hclust.h parameter to ", h)
187
+        flog.info("Setting prune.hclust.h parameter to %f.", h)
184 188
     }   
185 189
 
186 190
     numVariants <- sapply(seq_len(nrow(seg)), function(i) 
... ...
@@ -293,9 +297,9 @@ iterations=2, chr.hash ) {
293 297
         
294 298
 # ExomeCNV version without the x11() calls 
295 299
 .CNV.analyze2 <-
296
-function(normal, tumor, logR=NULL, min.coverage=15, weights=NULL, sdundo=NULL,
297
-undo.splits="sdundo", smooth=TRUE, alpha=0.01, sampleid=NULL, plot.cnv=TRUE,
298
-max.segments=NULL, chr.hash=chr.hash, verbose=TRUE) {
300
+function(normal, tumor, log.ratio=NULL, min.coverage=15, weights=NULL, sdundo=NULL,
301
+undo.splits="sdundo", alpha=0.01, sampleid=NULL, plot.cnv=TRUE,
302
+max.segments=NULL, chr.hash=chr.hash) {
299 303
 
300 304
     # first, do it for exons with enough coverage. MR: added less stringent 
301 305
     # cutoff in case normal looks great. these could be homozygous deletions 
... ...
@@ -303,51 +307,46 @@ max.segments=NULL, chr.hash=chr.hash, verbose=TRUE) {
303 307
     well.covered.exon.idx <- .getWellCoveredExons(normal, tumor, 
304 308
         min.coverage)
305 309
 
306
-    if (verbose) message("Removing ", sum(!well.covered.exon.idx), 
307
-        " low coverage exons.")
308
-    if (is.null(logR)) norm.log.ratio = calculateLogRatio(normal, tumor, verbose)
309
-    else norm.log.ratio = logR
310
+    flog.info("Removing %i low coverage exons.", sum(!well.covered.exon.idx))
310 311
     
311 312
     if (is.null(sdundo)) {
312
-        sdundo <- .getSDundo(norm.log.ratio[well.covered.exon.idx])
313
+        sdundo <- .getSDundo(log.ratio[well.covered.exon.idx])
313 314
     }   
314 315
      
315
-    CNA.obj <- CNA(norm.log.ratio[well.covered.exon.idx], 
316
+    CNA.obj <- CNA(log.ratio[well.covered.exon.idx], 
316 317
         .strip.chr.name(normal$chr[well.covered.exon.idx], chr.hash), 
317 318
         floor((normal$probe_start[well.covered.exon.idx] + 
318 319
         normal$probe_end[well.covered.exon.idx])/2), data.type="logratio", 
319 320
         sampleid=sampleid)
320 321
 
321
-    smoothed.CNA.obj = if (smooth) smooth.CNA(CNA.obj) else CNA.obj
322
-
323 322
     try.again <- 0
324 323
 
325 324
     while (try.again < 2) {
326
-        if (verbose) message("Setting undo.SD parameter to ", sdundo)
325
+        flog.info("Setting undo.SD parameter to %f.", sdundo)
327 326
         if (!is.null(weights)) { 
328 327
             weights <- weights[well.covered.exon.idx]
329 328
             # MR: this shouldn't happen. In doubt, count them as median.
330 329
             weights[is.na(weights)] <- median(weights, na.rm=TRUE)
331
-            segment.smoothed.CNA.obj <- segment(smoothed.CNA.obj, 
330
+            segment.CNA.obj <- segment(CNA.obj, 
332 331
                 undo.splits=undo.splits, undo.SD=sdundo, 
333
-                verbose=ifelse(verbose, 1, 0), alpha=alpha,weights=weights)
332
+                verbose=0, alpha=alpha,weights=weights)
334 333
         } else {
335
-            segment.smoothed.CNA.obj <- segment(smoothed.CNA.obj, 
334
+            segment.CNA.obj <- segment(CNA.obj, 
336 335
                 undo.splits=undo.splits, undo.SD=sdundo, 
337
-                verbose=ifelse(verbose, 1, 0), alpha=alpha)
336
+                verbose=0, alpha=alpha)
338 337
         } 
339
-        if (is.null(max.segments) || nrow(segment.smoothed.CNA.obj$output) 
338
+        if (is.null(max.segments) || nrow(segment.CNA.obj$output) 
340 339
             < max.segments) break
341 340
         sdundo <- sdundo * 1.5
342 341
         try.again <- try.again + 1
343 342
     }
344 343
 
345 344
     if (plot.cnv) {
346
-        plot(segment.smoothed.CNA.obj, plot.type="s")
347
-        plot(segment.smoothed.CNA.obj, plot.type="w")
345
+        plot(segment.CNA.obj, plot.type="s")
346
+        plot(segment.CNA.obj, plot.type="w")
348 347
     }
349 348
 
350
-    return(list(cna=segment.smoothed.CNA.obj, logR=norm.log.ratio))
349
+    return(list(cna=segment.CNA.obj, logR=log.ratio))
351 350
 }
352 351
 
353 352
 .getSegSizes <- function(seg) {
... ...
@@ -22,8 +22,6 @@
22 22
 #' function.
23 23
 #' @param undo.SD \code{undo.SD} for CBS, see documentation of the
24 24
 #' \code{segment} function. If \code{NULL}, try to find a sensible default.
25
-#' @param drop.outliers If \code{TRUE}, calls the 
26
-#' \code{dropSegmentationOutliers} function from PSCBS before segmentation.
27 25
 #' @param flavor Flavor value for PSCBS. See \code{segmentByNonPairedPSCBS}.
28 26
 #' @param tauA tauA argument for PSCBS. See \code{segmentByNonPairedPSCBS}.
29 27
 #' @param vcf Optional VCF object with germline allelic ratios.
... ...
@@ -43,11 +41,20 @@
43 41
 #' properly ordered.
44 42
 #' @param centromeres A \code{data.frame} with centromere positions in first
45 43
 #' three columns.  If not \code{NULL}, add breakpoints at centromeres. 
46
-#' @param verbose Verbose output.
47 44
 #' @param \dots Additional parameters passed to the 
48 45
 #' \code{segmentByNonPairedPSCBS} function.
49 46
 #' @return \code{data.frame} containing the segmentation.
50 47
 #' @author Markus Riester
48
+#' @references Olshen, A. B., Venkatraman, E. S., Lucito, R., Wigler, M. 
49
+#' (2004). Circular binary segmentation for the analysis of array-based DNA 
50
+#' copy number data. Biostatistics 5: 557-572.
51
+#'
52
+#' Venkatraman, E. S., Olshen, A. B. (2007). A faster circular binary 
53
+#' segmentation algorithm for the analysis of array CGH data. Bioinformatics 
54
+#' 23: 657-63.
55
+#'
56
+#' Olshen et al. (2011). Parent-specific copy number in paired tumor-normal 
57
+#' studies using circular binary segmentation. Bioinformatics.
51 58
 #' @seealso \code{\link{runAbsoluteCN}}
52 59
 #' @examples
53 60
 #' 
... ...
@@ -72,10 +79,10 @@
72 79
 #' @export segmentationPSCBS
73 80
 segmentationPSCBS <- function(normal, tumor, log.ratio, seg, plot.cnv, 
74 81
     min.coverage, sampleid, target.weight.file = NULL, alpha = 0.005, undo.SD =
75
-    NULL, drop.outliers=TRUE, flavor = "tcn&dh", tauA = 0.03, vcf = NULL,
82
+    NULL, flavor = "tcn&dh", tauA = 0.03, vcf = NULL,
76 83
     tumor.id.in.vcf = 1, normal.id.in.vcf = NULL, max.segments = NULL,
77 84
     prune.hclust.h = NULL, prune.hclust.method = "ward.D", chr.hash = NULL,
78
-    centromeres = NULL, verbose = TRUE, ...) {
85
+    centromeres = NULL, ...) {
79 86
 
80 87
     debug <- TRUE
81 88
         
... ...
@@ -95,8 +102,7 @@ segmentationPSCBS <- function(normal, tumor, log.ratio, seg, plot.cnv,
95 102
         target.weights <- read.delim(target.weight.file, as.is=TRUE)
96 103
         target.weights <- target.weights[match(as.character(tumor[,1]), 
97 104
             target.weights[,1]),2]
98
-        if (verbose) message(
99
-            "Target weights found, but currently not supported by PSCBS. ",
105
+         flog.info("Target weights found, but currently not supported by PSCBS. %s",
100 106
             "Will simply exclude targets with low weight.")
101 107
         lowWeightTargets <- target.weights < 1/3
102 108
         well.covered.exon.idx[which(lowWeightTargets)] <- FALSE
... ...
@@ -134,7 +140,7 @@ segmentationPSCBS <- function(normal, tumor, log.ratio, seg, plot.cnv,
134 140
     #} else {
135 141
     if (is.null(undo.SD)) {
136 142
         undo.SD <- .getSDundo(log.ratio)
137
-        if (verbose) message("Setting undo.SD parameter to ", undo.SD)
143
+        flog.info("Setting undo.SD parameter to %f.", undo.SD)
138 144
     }   
139 145
     knownSegments <- NULL
140 146
     if (!is.null(centromeres)) {
... ...
@@ -145,9 +151,6 @@ segmentationPSCBS <- function(normal, tumor, log.ratio, seg, plot.cnv,
145 151
             chr.hash)
146 152
         knownSegments <- PSCBS::gapsToSegments(knownSegments)
147 153
     }    
148
-    if (drop.outliers) {
149
-        d.f <- PSCBS::dropSegmentationOutliers(d.f)
150
-    }    
151 154
     seg <- PSCBS::segmentByNonPairedPSCBS(d.f, tauA=tauA, 
152 155
         flavor=flavor, undoTCN=undo.SD, knownSegments=knownSegments, 
153 156
         min.width=3,alphaTCN=alpha, ...)
... ...
@@ -157,7 +160,7 @@ segmentationPSCBS <- function(normal, tumor, log.ratio, seg, plot.cnv,
157 160
 
158 161
     if (!is.null(vcf)) {
159 162
         x <- .pruneByHclust(x, vcf, tumor.id.in.vcf, h=prune.hclust.h, 
160
-            method=prune.hclust.method, chr.hash=chr.hash, verbose=verbose)
163
+            method=prune.hclust.method, chr.hash=chr.hash)
161 164
     }
162 165
     x$cna$output
163 166
 }
... ...
@@ -19,7 +19,6 @@
19 19
 #' @param smooth Impute mapping bias of variants not found in the panel by
20 20
 #' smoothing of neighboring SNPs. Requires \code{normal.panel.vcf.file}.
21 21
 #' @param smooth.n Number of neighboring variants used for smoothing.
22
-#' @param verbose Verbose output.
23 22
 #' @return A \code{numeric(nrow(vcf))} vector with the mapping bias of for each
24 23
 #' variant in the \code{CollapsedVCF}. Mapping bias is expected as scaling
25 24
 #' factor. Adjusted allelic fraction is (observed allelic fraction)/(mapping
... ...
@@ -35,8 +34,7 @@
35 34
 #' 
36 35
 #' @export setMappingBiasVcf
37 36
 setMappingBiasVcf <- function(vcf, tumor.id.in.vcf = NULL,
38
-normal.panel.vcf.file = NULL, min.normals = 5, smooth = TRUE, smooth.n = 5,
39
-verbose = TRUE) {
37
+normal.panel.vcf.file = NULL, min.normals = 5, smooth = TRUE, smooth.n = 5) {
40 38
 
41 39
     if (is.null(tumor.id.in.vcf)) {
42 40
         tumor.id.in.vcf <- .getTumorIdInVcf(vcf)
... ...
@@ -46,12 +44,12 @@ verbose = TRUE) {
46 44
          normal.id.in.vcf <- .getNormalIdInVcf(vcf, tumor.id.in.vcf)
47 45
          faAll <- as.numeric(geno(vcf)$FA[!info(vcf)$SOMATIC,normal.id.in.vcf])
48 46
          mappingBias <- mean(faAll, na.rm=TRUE)*2
49
-         if (verbose) message("Found SOMATIC annotation in VCF. ",
50
-            "Setting mapping bias to ", round(mappingBias, digits=3)) 
47
+         flog.info("Found SOMATIC annotation in VCF. Setting mapping bias to %.3f.", 
48
+            mappingBias) 
51 49
     }     
52 50
     if (is.null(info(vcf)$SOMATIC) && is.null(normal.panel.vcf.file)) {
53
-        message(
54
-            "VCF does not contain somatic status. For best results, consider\n",
51
+        flog.info(
52
+            "VCF does not contain somatic status. For best results, consider%s%s",
55 53
             "providing normal.panel.vcf.file when matched normals are not ",
56 54
             "available.")
57 55
     }    
... ...
@@ -64,10 +62,9 @@ verbose = TRUE) {
64 62
     if (is.null(normal.panel.vcf.file)) {
65 63
         return(tmp)
66 64
     } 
67
-    nvcf <- .readNormalPanelVcfLarge(vcf, normal.panel.vcf.file, 
68
-        verbose=verbose)
65
+    nvcf <- .readNormalPanelVcfLarge(vcf, normal.panel.vcf.file)
69 66
     if (nrow(nvcf) < 1) {
70
-        warning("setMappingBiasVcf: no hits in ", normal.panel.vcf.file, ".")
67
+        flog.warn("setMappingBiasVcf: no hits in %s.", normal.panel.vcf.file)
71 68
         return(tmp)
72 69
     }
73 70
 
... ...
@@ -101,15 +98,15 @@ verbose = TRUE) {
101 98
     as.numeric(y)
102 99
 }
103 100
 
104
-.readNormalPanelVcfLarge <- function(vcf, normal.panel.vcf.file, max.file.size=1, verbose) {
101
+.readNormalPanelVcfLarge <- function(vcf, normal.panel.vcf.file, max.file.size=1) {
105 102
     genome <- genome(vcf)[1]    
106 103
     if (file.size(normal.panel.vcf.file)/1000^3 > max.file.size || nrow(vcf)< 1000) {
107
-        if (verbose) message("Scanning ", normal.panel.vcf.file, "...")
104
+        flog.info("Scanning %s...", normal.panel.vcf.file)
108 105
         nvcf <- readVcf(TabixFile(normal.panel.vcf.file), genome=genome, 
109 106
             ScanVcfParam(which = rowRanges(vcf), info=NA, fixed=NA, 
110 107
             geno="FA"))
111 108
     } else {
112
-        if (verbose) message("Loading ", normal.panel.vcf.file, "...")
109
+        flog.info("Loading %s...", normal.panel.vcf.file)
113 110
         nvcf <- readVcf(normal.panel.vcf.file, genome=genome,
114 111
             ScanVcfParam(info=NA, fixed=NA, geno="FA"))
115 112
         nvcf <- subsetByOverlaps(nvcf, rowRanges(vcf))
... ...
@@ -4,6 +4,7 @@ library('getopt')
4 4
 
5 5
 spec <- matrix(c(
6 6
 'help' , 'h', 0, "logical",
7
+'version',  'v', 0, "logical",
7 8
 'force' , 'f', 0, "logical",
8 9
 'bam', 'b', 1, "character",
9 10
 'gatkcoverage', 'g', 1, "character",
... ...
@@ -18,6 +19,11 @@ if ( !is.null(opt$help) ) {
18 19
     q(status=1)
19 20
 }
20 21
 
22
+if (!is.null(opt$version)) {
23
+    message(as.character(packageVersion("PureCN")))
24
+    q(status=1)
25
+}    
26
+
21 27
 force <- !is.null(opt$force)
22 28
 
23 29
 bam.file <- opt$bam
... ...
@@ -4,10 +4,11 @@ library('getopt')
4 4
 
5 5
 spec <- matrix(c(
6 6
 'help' ,  'h', 0, "logical",
7
+'version', 'v', 0, "logical",
7 8
 'force' , 'f', 0, "logical",
8 9
 'gcgene', 'c', 1, "character",
9 10
 'method', 'm', 1, "character",
10
-'coveragefiles', 'v', 1, "character",
11
+'coveragefiles', 'b', 1, "character",
11 12
 'assay',   'a',1, "character",
12 13
 'outdir' , 'o', 1, "character"
13 14
 ), byrow=TRUE, ncol=4)
... ...
@@ -18,6 +19,12 @@ if ( !is.null(opt$help) ) {
18 19
     q(status=1)
19 20
 }
20 21
 
22
+if (!is.null(opt$version)) {
23
+    message(as.character(packageVersion("PureCN")))
24
+    q(status=1)
25
+}    
26
+
27
+
21 28
 .checkFileList <- function(file) {
22 29
     files <- read.delim(file, as.is=TRUE, header=FALSE)[,1]
23 30
     numExists <- sum(file.exists(files), na.rm=TRUE)
... ...
@@ -4,10 +4,11 @@ library('getopt')
4 4
 
5 5
 spec <- matrix(c(
6 6
 'help',           'h', 0, "logical",
7
+'version',        'v', 0, "logical",
7 8
 'force' ,         'f', 0, "logical",
8 9
 'normal',         'n', 1, "character",
9 10
 'tumor',          't', 1, "character",
10
-'vcf',            'v', 1, "character",
11
+'vcf',            'b', 1, "character",
11 12
 'rds',            'r', 1, "character",
12 13
 'genome',         'g', 1, "character",
13 14
 'gcgene',         'c', 1, "character",
... ...
@@ -31,6 +32,11 @@ if ( !is.null(opt$help) ) {
31 32
     q(status=1)
32 33
 }
33 34
 
35
+if (!is.null(opt$version)) {
36
+    message(as.character(packageVersion("PureCN")))
37
+    q(status=1)
38
+}    
39
+
34 40
 force <- !is.null(opt$force)
35 41
 post.optimize <- !is.null(opt$postoptimize)
36 42
 normal.coverage.file <- opt$normal
... ...
@@ -54,7 +60,9 @@ file.rds <- opt$rds
54 60
 if (!is.null(file.rds) && file.exists(file.rds)) {
55 61
     if (is.null(outdir)) outdir <- dirname(file.rds)
56 62
 } else {
57
-    if (is.null(sampleid)) stop("Need sampleid.")
63
+    if (is.null(sampleid)) stop("Need --sampleid.")
64
+    if (is.null(genome)) stop("Need --genome")
65
+    genome <- as.character(genome)
58 66
     file.rds <- file.path(outdir, paste0(sampleid, '_purecn.rds'))
59 67
     if (is.null(seg.file)) {
60 68
         tumor.coverage.file <- normalizePath(tumor.coverage.file, 
... ...
@@ -94,6 +102,7 @@ if (file.exists(file.rds) && !force) {
94 102
     } else if (is.null(normal.coverage.file) && is.null(seg.file)) {
95 103
         stop("Need either normalDB or normal.coverage.file")
96 104
     }    
105
+    file.log <- file.path(outdir, paste0(sampleid, '_purecn.log'))
97 106
 
98 107
     pdf(paste(outdir,"/", sampleid, '_purecn_segmentation.pdf', sep=''), 
99 108
         width=10, height=11)
... ...
@@ -107,7 +116,7 @@ if (file.exists(file.rds) && !force) {
107 116
             args.setMappingBiasVcf=
108 117
                 list(normal.panel.vcf.file=normal.panel.vcf.file),
109 118
             normalDB=normalDB, model.homozygous=model.homozygous,
110
-            post.optimize=post.optimize)
119
+            log.file=file.log, post.optimize=post.optimize)
111 120
     dev.off()
112 121
     saveRDS(ret, file=file.rds)
113 122
 }
... ...
@@ -1,9 +1,9 @@
1 1
 test_getSexFromCoverage <- function() {
2 2
     tumor.coverage.file <- system.file("extdata", "example_tumor.txt", package="PureCN")
3 3
     coverage <- readCoverageGatk(tumor.coverage.file)
4
-    sex <- getSexFromCoverage(coverage, verbose=FALSE)
4
+    sex <- getSexFromCoverage(coverage)
5 5
     checkTrue(is.na(sex))
6
-    sex <- getSexFromCoverage(tumor.coverage.file, verbose=FALSE)
6
+    sex <- getSexFromCoverage(tumor.coverage.file)
7 7
     checkTrue(is.na(sex))
8 8
 
9 9
     chr22 <- coverage[which(coverage$chr=="chr22"),]
... ...
@@ -240,7 +240,7 @@ test_runAbsoluteCN <- function() {
240 240
     
241 241
     # test with a log.ratio and no tumor file
242 242
     log.ratio <- calculateLogRatio(readCoverageGatk(normal.coverage.file),
243
-        readCoverageGatk(tumor.coverage.file), verbose=FALSE)
243
+        readCoverageGatk(tumor.coverage.file))
244 244
 
245 245
     ret <- runAbsoluteCN( log.ratio=log.ratio,
246 246
         gc.gene.file=gc.gene.file,
... ...
@@ -4,7 +4,7 @@
4 4
 \alias{calculateLogRatio}
5 5
 \title{Calculate coverage log-ratio of tumor vs. normal}
6 6
 \usage{
7
-calculateLogRatio(normal, tumor, verbose = TRUE)
7
+calculateLogRatio(normal, tumor)
8 8
 }
9 9
 \arguments{
10 10
 \item{normal}{Normal coverage read in by the \code{\link{readCoverageGatk}}
... ...
@@ -12,8 +12,6 @@ function.}
12 12
 
13 13
 \item{tumor}{Tumor coverage read in by the \code{\link{readCoverageGatk}}
14 14
 function.}
15
-
16
-\item{verbose}{Verbose output.}
17 15
 }
18 16
 \value{
19 17
 \code{numeric(nrow(tumor))}, tumor vs. normal copy number log-ratios
... ...
@@ -33,7 +31,7 @@ tumor.coverage.file <- system.file("extdata", "example_tumor.txt",
33 31
     package="PureCN")
34 32
 normal <- readCoverageGatk(normal.coverage.file)
35 33
 tumor <- readCoverageGatk(tumor.coverage.file)
36
-log.ratio <- calculateLogRatio(normal, tumor, verbose=FALSE)
34
+log.ratio <- calculateLogRatio(normal, tumor)
37 35
 
38 36
 }
39 37
 \author{
... ...
@@ -70,7 +70,7 @@ legend("bottomright", legend=paste("Purity", purity), fill=seq_along(purity))
70 70
 Markus Riester
71 71
 }
72 72
 \references{
73
-Carter et al., Absolute quantification of somatic DNA
74
-alterations in human cancer. Nature Biotechnology 2012.
73
+Carter et al. (2012), Absolute quantification of somatic DNA
74
+alterations in human cancer. Nature Biotechnology.
75 75
 }
76 76
 
... ...
@@ -5,7 +5,7 @@
5 5
 \title{Calculate target weights}
6 6
 \usage{
7 7
 createTargetWeights(tumor.coverage.files, normal.coverage.files,
8
-  target.weight.file, verbose = TRUE)
8
+  target.weight.file)
9 9
 }
10 10
 \arguments{
11 11
 \item{tumor.coverage.files}{A small number (1-3) of GATK tumor or normal
... ...
@@ -16,8 +16,6 @@ coverage samples.}
16 16
 with files in \code{tumor.coverage.files}.}
17 17
 
18 18
 \item{target.weight.file}{Output filename.}
19
-
20
-\item{verbose}{Verbose output.}
21 19
 }
22 20
 \value{
23 21
 A \code{data.frame} with target weights.
... ...
@@ -5,8 +5,7 @@
5 5
 \title{Remove low quality targets}
6 6
 \usage{
7 7
 filterTargets(log.ratio, tumor, gc.data, seg.file, filter.lowhigh.gc = 0.001,
8
-  min.targeted.base = 5, normalDB = NULL, normalDB.min.coverage = 0.2,
9
-  verbose)
8
+  min.targeted.base = 5, normalDB = NULL, normalDB.min.coverage = 0.2)
10 9
 }
11 10
 \arguments{
12 11
 \item{log.ratio}{Copy number log-ratios, one for each target or interval in
... ...
@@ -33,8 +32,6 @@ likely very different from the true GC content of the probes.}
33 32
 
34 33
 \item{normalDB.min.coverage}{Exclude targets with coverage lower than 20
35 34
 percent of the chromosome median in the pool of normals.}
36
-
37
-\item{verbose}{Verbose output.}
38 35
 }
39 36
 \value{
40 37
 \code{logical(length(log.ratio))} specifying which targets should be
... ...
@@ -6,10 +6,10 @@
6 6
 \usage{
7 7
 filterVcfBasic(vcf, tumor.id.in.vcf = NULL, use.somatic.status = TRUE,
8 8
   snp.blacklist = NULL, af.range = c(0.03, 0.97),
9
-  contamination.cutoff = c(0.05, 0.075), min.coverage = 15,
9
+  contamination.cutoff = c(0.075, 0.02), min.coverage = 15,
10 10
   min.base.quality = 25, min.supporting.reads = NULL, error = 0.001,
11 11
   target.granges = NULL, remove.off.target.snvs = TRUE,
12
-  model.homozygous = FALSE, interval.padding = 50, verbose = TRUE)
12
+  model.homozygous = FALSE, interval.padding = 50)
13 13
 }
14 14
 \arguments{
15 15
 \item{vcf}{\code{CollapsedVCF} object, read in with the \code{readVcf}
... ...
@@ -34,8 +34,9 @@ contamination. If a matched normal is available, this value is ignored,
34 34
 because homozygosity can be confirmed in the normal.}
35 35
 
36 36
 \item{contamination.cutoff}{Count SNPs in dbSNP with allelic fraction
37
-smaller than the first value, if found on most chromosomes, remove all with
38
-AF smaller than the second value.}
37
+smaller than the first value or greater than 1-first value, if found on most chromosomes, mark sample
38
+as contaminated if the fraction of putative contamination SNPs exceeds
39
+the second value.}
39 40
 
40 41
 \item{min.coverage}{Minimum coverage in tumor. Variants with lower coverage
41 42
 are ignored.}
... ...
@@ -62,8 +63,6 @@ SNPs. Ignored in case a matched normal is provided in the VCF.}
62 63
 
63 64
 \item{interval.padding}{Include variants in the interval flanking regions of
64 65
 the specified size in bp. Requires \code{target.granges}.}
65
-
66
-\item{verbose}{Verbose output.}
67 66
 }
68 67
 \value{
69 68
 A list with elements \item{vcf}{The filtered \code{CollapsedVCF}
... ...
@@ -8,7 +8,7 @@ filterVcfMuTect(vcf, tumor.id.in.vcf = NULL, stats.file = NULL,
8 8
   ignore = c("clustered_read_position", "fstar_tumor_lod",
9 9
   "nearby_gap_events", "poor_mapping_region_alternate_allele_mapq",
10 10
   "poor_mapping_region_mapq0", "possible_contamination", "strand_artifact",
11
-  "seen_in_panel_of_normals"), verbose = TRUE, ...)
11
+  "seen_in_panel_of_normals"), ...)
12 12
 }
13 13
 \arguments{
14 14
 \item{vcf}{\code{CollapsedVCF} object, read in with the \code{readVcf}
... ...
@@ -20,8 +20,6 @@ function from the VariantAnnotation package.}
20 20
 
21 21
 \item{ignore}{MuTect flags that mark variants for exclusion.}
22 22
 
23
-\item{verbose}{Verbose output.}
24
-
25 23
 \item{\dots}{Additional arguments passed to \code{\link{filterVcfBasic}}.}
26 24
 }
27 25
 \value{
... ...
@@ -6,8 +6,7 @@
6 6
 \usage{
7 7
 findBestNormal(tumor.coverage.file, normalDB, pcs = 1:3, num.normals = 1,
8 8
   ignore.sex = FALSE, sex = NULL, normal.coverage.files = NULL,
9
-  pool = FALSE, pool.weights = c("voom", "equal"), plot.pool = FALSE,
10
-  verbose = TRUE, ...)
9
+  pool = FALSE, pool.weights = c("voom", "equal"), plot.pool = FALSE, ...)
11 10
 }
12 11
 \arguments{
13 12
 \item{tumor.coverage.file}{GATK coverage file of a tumor sample.}
... ...
@@ -39,8 +38,6 @@ weight all best normals equally.}
39 38
 
40 39
 \item{plot.pool}{Allows the pooling function to create plots.}
41 40
 
42
-\item{verbose}{Verbose output.}
43
-
44 41
 \item{\dots}{Additional arguments passed to \code{\link{poolCoverage}}.}
45 42
 }
46 43
 \value{
... ...
@@ -5,7 +5,7 @@
5 5
 \title{Get sample sex from coverage}
6 6
 \usage{
7 7
 getSexFromCoverage(coverage.file, min.ratio = 25, min.ratio.na = 20,
8
-  remove.outliers = TRUE, verbose = TRUE)
8
+  remove.outliers = TRUE)
9 9
 }
10 10
 \arguments{
11 11
 \item{coverage.file}{GATK coverage file or data read with
... ...
@@ -23,8 +23,6 @@ be considered when setting cutoffs.}
23 23
 
24 24
 \item{remove.outliers}{Removes coverage outliers before calculating mean
25 25
 chromosome coverages.}
26
-
27
-\item{verbose}{Verbose output.}
28 26
 }
29 27
 \value{
30 28
 Returns a \code{character(1)} with \code{M} for male, \code{F} for
... ...
@@ -5,8 +5,7 @@
5 5
 \title{Get sample sex from a VCF file}
6 6
 \usage{
7 7
 getSexFromVcf(vcf, tumor.id.in.vcf = NULL, min.or = 4, min.or.na = 2.5,
8
-  max.pv = 0.001, homozygous.cutoff = 0.95, af.cutoff = 0.2,
9
-  verbose = TRUE)
8
+  max.pv = 0.001, homozygous.cutoff = 0.95, af.cutoff = 0.2)
10 9
 }
11 10
 \arguments{
12 11
 \item{vcf}{CollapsedVCF object, read in with the \code{readVcf} function
... ...
@@ -29,8 +28,6 @@ homozygous.}
29 28
 
30 29
 \item{af.cutoff}{Remove all SNVs with allelic fraction lower than the
31 30
 specified value.}
32
-
33
-\item{verbose}{Verbose output.}
34 31
 }
35 32
 \value{
36 33
 Returns a \code{character(1)} with \code{M} for male, \code{F} for
... ...
@@ -6,7 +6,7 @@
6 6
 \usage{
7 7
 readCurationFile(file.rds, file.curation = gsub(".rds$", ".csv", file.rds),
8 8
   remove.failed = FALSE, report.best.only = FALSE, min.ploidy = NULL,
9
-  max.ploidy = NULL, verbose = FALSE)
9
+  max.ploidy = NULL)
10 10
 }
11 11
 \arguments{
12 12
 \item{file.rds}{Output of the \code{\link{runAbsoluteCN}} function,
... ...
@@ -25,8 +25,6 @@ be used to automatically ignore unlikely solutions.}
25 25
 
26 26
 \item{max.ploidy}{Maximum ploidy to be considered. If \code{NULL}, all. Can
27 27
 be used to automatically ignore unlikely solutions.}
28
-
29
-\item{verbose}{Verbose output.}
30 28
 }
31 29
 \value{
32 30
 The return value of the corresponding \code{\link{runAbsoluteCN}}
... ...
@@ -20,10 +20,11 @@ runAbsoluteCN(normal.coverage.file = NULL, tumor.coverage.file = NULL,
20 20
   max.coverage.vcf = 300, max.non.clonal = 0.2,
21 21
   max.homozygous.loss = c(0.05, 1e+07), non.clonal.M = 1/3,
22 22
   max.mapping.bias = 0.8, iterations = 30, log.ratio.calibration = 0.25,
23
-  remove.off.target.snvs = NULL, model.homozygous = FALSE, error = 0.001,
24
-  gc.gene.file = NULL, max.dropout = c(0.95, 1.1), max.logr.sdev = 0.75,
25
-  max.segments = 300, min.gof = 0.8, plot.cnv = TRUE,
26
-  cosmic.vcf.file = NULL, post.optimize = FALSE, verbose = TRUE)
23
+  smooth.log.ratio = TRUE, remove.off.target.snvs = NULL,
24
+  model.homozygous = FALSE, error = 0.001, gc.gene.file = NULL,
25
+  max.dropout = c(0.95, 1.1), max.logr.sdev = 0.75, max.segments = 300,
26
+  min.gof = 0.8, plot.cnv = TRUE, cosmic.vcf.file = NULL,
27
+  post.optimize = FALSE, log.file = NULL, verbose = TRUE)
27 28
 }
28 29
 \arguments{
29 30
 \item{normal.coverage.file}{GATK coverage file of normal control (optional
... ...
@@ -85,7 +86,7 @@ to \code{\link{filterVcfMuTect}}, which in turn also calls
85 86
 
86 87
 \item{args.filterVcf}{Arguments for variant filtering function. Arguments
87 88
 \code{vcf}, \code{tumor.id.in.vcf}, \code{min.coverage},
88
-\code{model.homozygous}, \code{error} and \code{verbose} are required in the
89
+\code{model.homozygous} and \code{error} are required in the
89 90
 filter function and are automatically set.}
90 91
 
91 92
 \item{fun.setPriorVcf}{Function to set prior for somatic status for each
... ...
@@ -103,17 +104,17 @@ coverage files. Needs to return a \code{logical} vector whether an interval
103 104
 should be used for segmentation. Defaults to \code{\link{filterTargets}}.}
104 105
 
105 106
 \item{args.filterTargets}{Arguments for target filtering function. Arguments
106
-\code{log.ratio}, \code{tumor}, \code{gc.data}, \code{seg.file},
107
-\code{normalDB} and \code{verbose} are required and automatically set}
107
+\code{log.ratio}, \code{tumor}, \code{gc.data}, \code{seg.file} and
108
+\code{normalDB} are required and automatically set}
108 109
 
109 110
 \item{fun.segmentation}{Function for segmenting the copy number log-ratios.
110 111
 Expected return value is a \code{data.frame} representation of the
111 112
 segmentation. Defaults to \code{\link{segmentationCBS}}.}
112 113
 
113 114
 \item{args.segmentation}{Arguments for segmentation function. Arguments
114
-\code{normal}, \code{tumor}, \code{log.ratio}, \code{plot.cnv},
115
+\code{normal}, \code{tumor}, \code{log.ratio}, \code{plot.cnv} and
115 116
 \code{min.coverage}, \code{sampleid}, \code{vcf}, \code{tumor.id.in.vcf},
116
-\code{centromeres} and \code{verbose} are required in the segmentation function 
117
+\code{centromeres} are required in the segmentation function 
117 118
 and automatically set.}
118 119
 
119 120
 \item{fun.focal}{Function for identifying focal amplifications. Defaults to
... ...
@@ -187,6 +188,9 @@ that should converge quickly. Allowed range is 10 to 250.}
187 188
 \item{log.ratio.calibration}{Re-calibrate log-ratios in the window
188 189
 \code{sd(log.ratio)*log.ratio.calibration}.}
189 190
 
191
+\item{smooth.log.ratio}{Smooth \code{log.ratio} using the \code{DNAcopy}
192
+package.}
193
+
190 194
 \item{remove.off.target.snvs}{Deprecated. Use the corresponding argument in
191 195
 \code{args.filterVcf}.}
192 196
 
... ...
@@ -238,6 +242,8 @@ typically result in a slightly more accurate purity, especially for rather
238 242
 silent genomes or very low purities. Otherwise, it will just use the purity
239 243
 determined via the SCNA-fit.}
240 244
 
245
+\item{log.file}{If not \code{NULL}, store verbose output to file.}
246
+
241 247
 \item{verbose}{Verbose output.}
242 248
 }
243 249
 \value{
... ...
@@ -292,6 +298,14 @@ res <- runAbsoluteCN(seg.file=seg.file, fun.segmentation=funSeg, max.ploidy = 4,
292 298
 \author{
293 299
 Markus Riester
294 300
 }
301
+\references{
302
+Riester et al. (2016). PureCN: Copy number calling and SNV 
303
+classification using targeted short read sequencing. Source Code for Biology 
304
+and Medicine, 11, pp. 13. 
305
+
306
+Carter et al. (2012), Absolute quantification of somatic DNA alterations in 
307
+human cancer. Nature Biotechnology.
308
+}
295 309
 \seealso{
296 310
 \code{\link{correctCoverageBias}} \code{\link{segmentationCBS}}
297 311
 \code{\link{calculatePowerDetectSomatic}}
... ...
@@ -8,7 +8,7 @@ segmentationCBS(normal, tumor, log.ratio, seg, plot.cnv, min.coverage, sampleid,
8 8
   target.weight.file = NULL, alpha = 0.005, undo.SD = NULL, vcf = NULL,
9 9
   tumor.id.in.vcf = 1, normal.id.in.vcf = NULL, max.segments = NULL,
10 10
   prune.hclust.h = NULL, prune.hclust.method = "ward.D", chr.hash = NULL,
11
-  centromeres = NULL, verbose = TRUE)
11
+  centromeres = NULL)
12 12
 }
13 13
 \arguments{
14 14
 \item{normal}{GATK coverage data for normal sample.}
... ...
@@ -60,8 +60,6 @@ properly ordered.}
60 60
 
61 61
 \item{centromeres}{A \code{data.frame} with centromere positions in first
62 62
 three columns.  Currently not supported in this function.}
63
-
64
-\item{verbose}{Verbose output.}
65 63
 }
66 64
 \value{
67 65
 \code{data.frame} containing the segmentation.
... ...
@@ -95,6 +93,15 @@ ret <-runAbsoluteCN(normal.coverage.file=normal.coverage.file,
95 93
 \author{
96 94
 Markus Riester
97 95
 }
96
+\references{
97
+Olshen, A. B., Venkatraman, E. S., Lucito, R., Wigler, M. 
98
+(2004). Circular binary segmentation for the analysis of array-based DNA 
99
+copy number data. Biostatistics 5: 557-572.
100
+
101
+Venkatraman, E. S., Olshen, A. B. (2007). A faster circular binary 
102
+segmentation algorithm for the analysis of array CGH data. Bioinformatics 
103
+23: 657-63.
104
+}
98 105
 \seealso{
99 106
 \code{\link{runAbsoluteCN}}
100 107
 }
... ...
@@ -6,10 +6,10 @@
6 6
 \usage{
7 7
 segmentationPSCBS(normal, tumor, log.ratio, seg, plot.cnv, min.coverage,
8 8
   sampleid, target.weight.file = NULL, alpha = 0.005, undo.SD = NULL,
9
-  drop.outliers = TRUE, flavor = "tcn&dh", tauA = 0.03, vcf = NULL,
10
-  tumor.id.in.vcf = 1, normal.id.in.vcf = NULL, max.segments = NULL,
11
-  prune.hclust.h = NULL, prune.hclust.method = "ward.D", chr.hash = NULL,
12
-  centromeres = NULL, verbose = TRUE, ...)
9
+  flavor = "tcn&dh", tauA = 0.03, vcf = NULL, tumor.id.in.vcf = 1,
10
+  normal.id.in.vcf = NULL, max.segments = NULL, prune.hclust.h = NULL,
11
+  prune.hclust.method = "ward.D", chr.hash = NULL, centromeres = NULL,
12
+  ...)
13 13
 }
14 14
 \arguments{
15 15
 \item{normal}{GATK coverage data for normal sample.}
... ...
@@ -38,9 +38,6 @@ function.}
38 38
 \item{undo.SD}{\code{undo.SD} for CBS, see documentation of the
39 39
 \code{segment} function. If \code{NULL}, try to find a sensible default.}
40 40
 
41
-\item{drop.outliers}{If \code{TRUE}, calls the 
42
-\code{dropSegmentationOutliers} function from PSCBS before segmentation.}
43
-
44 41
 \item{flavor}{Flavor value for PSCBS. See \code{segmentByNonPairedPSCBS}.}
45 42
 
46 43
 \item{tauA}{tauA argument for PSCBS. See \code{segmentByNonPairedPSCBS}.}
... ...
@@ -70,8 +67,6 @@ properly ordered.}
70 67
 \item{centromeres}{A \code{data.frame} with centromere positions in first
71 68
 three columns.  If not \code{NULL}, add breakpoints at centromeres.}
72 69
 
73
-\item{verbose}{Verbose output.}
74
-
75 70
 \item{\dots}{Additional parameters passed to the 
76 71
 \code{segmentByNonPairedPSCBS} function.}
77 72
 }
... ...
@@ -108,6 +103,18 @@ gc.gene.file <- system.file("extdata", "example_gc.gene.file.txt",
108 103
 \author{
109 104
 Markus Riester
110 105
 }
106
+\references{
107
+Olshen, A. B., Venkatraman, E. S., Lucito, R., Wigler, M. 
108
+(2004). Circular binary segmentation for the analysis of array-based DNA 
109
+copy number data. Biostatistics 5: 557-572.
110
+
111
+Venkatraman, E. S., Olshen, A. B. (2007). A faster circular binary 
112
+segmentation algorithm for the analysis of array CGH data. Bioinformatics 
113
+23: 657-63.
114
+
115
+Olshen et al. (2011). Parent-specific copy number in paired tumor-normal 
116
+studies using circular binary segmentation. Bioinformatics.
117
+}
111 118
 \seealso{
112 119
 \code{\link{runAbsoluteCN}}
113 120
 }
... ...
@@ -5,7 +5,7 @@
5 5
 \title{Set Mapping Bias VCF}
6 6
 \usage{
7 7
 setMappingBiasVcf(vcf, tumor.id.in.vcf = NULL, normal.panel.vcf.file = NULL,
8
-  min.normals = 5, smooth = TRUE, smooth.n = 5, verbose = TRUE)
8
+  min.normals = 5, smooth = TRUE, smooth.n = 5)
9 9
 }
10 10
 \arguments{
11 11
 \item{vcf}{\code{CollapsedVCF} object, read in with the \code{readVcf}
... ...
@@ -26,8 +26,6 @@ calculating position-specific mapping bias. Requires
26 26
 smoothing of neighboring SNPs. Requires \code{normal.panel.vcf.file}.}
27 27
 
28 28
 \item{smooth.n}{Number of neighboring variants used for smoothing.}
29
-
30
-\item{verbose}{Verbose output.}
31 29
 }
32 30
 \value{
33 31
 A \code{numeric(nrow(vcf))} vector with the mapping bias of for each
... ...
@@ -5,7 +5,7 @@
5 5
 \title{Set Somatic Prior VCF}
6 6
 \usage{
7 7
 setPriorVcf(vcf, prior.somatic = c(0.5, 5e-04, 0.999, 1e-04, 0.995, 0.01),
8
-  tumor.id.in.vcf = NULL, verbose = TRUE)
8
+  tumor.id.in.vcf = NULL)
9 9
 }
10 10
 \arguments{
11 11
 \item{vcf}{\code{CollapsedVCF} object, read in with the \code{readVcf}
... ...
@@ -24,8 +24,6 @@ value is for the case that variant is in both dbSNP and COSMIC > 2.}
24 24
 
25 25
 \item{tumor.id.in.vcf}{Id of tumor in case multiple samples are stored in
26 26
 VCF.}
27
-
28
-\item{verbose}{Verbose output.}
29 27
 }
30 28
 \value{
31 29
 A \code{numeric(nrow(vcf))} vector with the prior probability of
... ...
@@ -326,10 +326,10 @@ present in 5 or more samples.
326 326
 We next use coverage data of normal samples to estimate the expected variance
327 327
 in coverage per target:
328 328
 
329
-<<targetweightfile1>>=
329
+<<targetweightfile1, message=FALSE>>=
330 330
 target.weight.file <- "target_weights.txt"
331 331
 createTargetWeights(tumor.coverage.file, normal.coverage.files, 
332
-    target.weight.file, verbose=FALSE)
332
+    target.weight.file)
333 333
 @
334 334
 
335 335
 This function calculates target-level copy number log-ratios using all normal
... ...
@@ -858,11 +858,11 @@ target-level copy number log-ratios, and \Biocpkg{PureCN} should be used for
858 858
 segmentation and purity/ploidy inference only, it is possible to provide these
859 859
 log-ratios:
860 860
 
861
-<<customlogratio>>=
861
+<<customlogratio, message=FALSE>>=
862 862
 # We still use the log-ratio exactly as normalized by PureCN for this
863 863
 # example
864 864
 log.ratio <- calculateLogRatio(readCoverageGatk(normal.coverage.file),
865
-    readCoverageGatk(tumor.coverage.file), verbose=FALSE)
865
+    readCoverageGatk(tumor.coverage.file))
866 866
 
867 867
 retLogRatio <- runAbsoluteCN(log.ratio=log.ratio,
868 868
     gc.gene.file=gc.gene.file, vcf.file=vcf.file, 
... ...
@@ -964,6 +964,7 @@ Rscript Coverage.R --outdir ~/tmp/ --gatkcoverage example_tumor.txt \
964 964
 Argument name       & Corresponding PureCN argument & PureCN function \\
965 965
 \midrule
966 966
 --help -h           & & \\
967
+--version -v          & & \\
967 968
 --force -f          & & \\
968 969
 --outdir -o         & & \\
969 970
 --bam -b            & bam.file & \Rfunction{calculateBamCoverageByInterval} \\
... ...
@@ -1010,11 +1011,12 @@ Rscript PureCN.R --rds Sample1_purecn.rds
1010 1011
 Argument name       & Corresponding PureCN argument & PureCN function \\
1011 1012
 \midrule
1012 1013
 --help -h           & & \\
1014
+--version -v        & & \\
1013 1015
 --force -f          & & \\
1014 1016
 --outdir -o         &          &                            \\
1015 1017
 --normal -n         & normal.coverage.file   & \Rfunction{runAbsoluteCN}  \\
1016 1018
 --tumor -t          & tumor.coverage.file     & \Rfunction{runAbsoluteCN} \\
1019
+--vcf -b            & vcf.file          & \Rfunction{runAbsoluteCN}   \\
1017 1020
 --rds -r            & file.rds          & \Rfunction{readCurationFile}   \\
1018 1021
 --genome -g         & genome             & \Rfunction{runAbsoluteCN}  \\
1019 1022
 --gcgene -c         & gc.gene.file      & \Rfunction{runAbsoluteCN}   \\