git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/PureCN@125770 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -2,8 +2,8 @@ Package: PureCN |
2 | 2 |
Type: Package |
3 | 3 |
Title: Copy number calling and SNV classification using |
4 | 4 |
targeted short read sequencing |
5 |
-Version: 1.5.32 |
|
6 |
-Date: 2017-01-03 |
|
5 |
+Version: 1.5.33 |
|
6 |
+Date: 2017-01-06 |
|
7 | 7 |
Authors@R: c(person("Markus", "Riester", role=c("aut", "cre"), |
8 | 8 |
email="[email protected]"), |
9 | 9 |
person("Angad P.", "Singh", role="aut")) |
... | ... |
@@ -13,7 +13,7 @@ Description: This package estimates tumor purity, copy number, loss of |
13 | 13 |
with standard somatic variant detection pipelines, and has support for |
14 | 14 |
tumor samples without matching normal samples. |
15 | 15 |
Depends: |
16 |
- R (>= 3.2), |
|
16 |
+ R (>= 3.3), |
|
17 | 17 |
DNAcopy, |
18 | 18 |
VariantAnnotation (>= 1.14.1) |
19 | 19 |
Imports: |
... | ... |
@@ -32,6 +32,7 @@ Imports: |
32 | 32 |
Biostrings, |
33 | 33 |
rtracklayer, |
34 | 34 |
ggplot2, |
35 |
+ futile.logger, |
|
35 | 36 |
edgeR, |
36 | 37 |
limma |
37 | 38 |
Suggests: |
... | ... |
@@ -55,13 +55,22 @@ importFrom(S4Vectors,queryHits) |
55 | 55 |
importFrom(S4Vectors,subjectHits) |
56 | 56 |
importFrom(SummarizedExperiment,rowRanges) |
57 | 57 |
importFrom(data.table,data.table) |
58 |
+importFrom(futile.logger,appender.tee) |
|
59 |
+importFrom(futile.logger,flog.appender) |
|
60 |
+importFrom(futile.logger,flog.fatal) |
|
61 |
+importFrom(futile.logger,flog.info) |
|
62 |
+importFrom(futile.logger,flog.threshold) |
|
63 |
+importFrom(futile.logger,flog.warn) |
|
58 | 64 |
importFrom(ggplot2,aes) |
59 | 65 |
importFrom(ggplot2,aes_string) |
60 | 66 |
importFrom(ggplot2,element_text) |
61 | 67 |
importFrom(ggplot2,facet_wrap) |
68 |
+importFrom(ggplot2,geom_boxplot) |
|
69 |
+importFrom(ggplot2,geom_hline) |
|
62 | 70 |
importFrom(ggplot2,geom_line) |
63 | 71 |
importFrom(ggplot2,geom_point) |
64 | 72 |
importFrom(ggplot2,ggplot) |
73 |
+importFrom(ggplot2,labs) |
|
65 | 74 |
importFrom(ggplot2,scale_alpha_continuous) |
66 | 75 |
importFrom(ggplot2,stat_density2d) |
67 | 76 |
importFrom(ggplot2,theme) |
... | ... |
@@ -6,6 +6,7 @@ Changes in version 1.6.0 |
6 | 6 |
- Polished plots, added new GC-normalization and volcano plots |
7 | 7 |
- Support for cell lines |
8 | 8 |
- Improved somatic vs. germline status calling |
9 |
+- Contamination rate estimation. |
|
9 | 10 |
- Better mapping bias estimation and correction |
10 | 11 |
- Better copy number normalization using multiple best normals |
11 | 12 |
- New GC-normalization for smaller gene panels |
... | ... |
@@ -16,6 +17,8 @@ Changes in version 1.6.0 |
16 | 17 |
- Faster post.optimize=TRUE by not optimizing poor fits. |
17 | 18 |
- Tweaks to segmentationPSCBS |
18 | 19 |
- Lots of improvements to command line scripts. |
20 |
+- Code cleanups (switch from inlinedocs to roxygen, from message/warn to |
|
21 |
+ futile.logger) |
|
19 | 22 |
|
20 | 23 |
|
21 | 24 |
API CHANGES |
... | ... |
@@ -40,6 +43,14 @@ API CHANGES |
40 | 43 |
normalized so that w[1] is 1 |
41 | 44 |
- Removed ... from runAbsoluteCN |
42 | 45 |
- added centromeres to segmentation function. |
46 |
+ - contamination.cutoff now specifies fraction of SNPs necessary to |
|
47 |
+ call sample contaminated. Does not remove anymore, since these |
|
48 |
+ are obviously informative for contamination rate estimation in the |
|
49 |
+ PureCN model. |
|
50 |
+ - Removed verbose from most functions, since messages are now controlled |
|
51 |
+ with futile.logger. |
|
52 |
+ - Smoothing of log-ratios now optionally done by runAbsoluteCN, not |
|
53 |
+ segmentation function. |
|
43 | 54 |
|
44 | 55 |
|
45 | 56 |
PLANNED FEATURES |
... | ... |
@@ -50,3 +61,13 @@ PLANNED FEATURES |
50 | 61 |
MYC) |
51 | 62 |
- Switch to S4 data structures (maybe) |
52 | 63 |
- Whole dataset visualizations (maybe PureCN 1.8) |
64 |
+ |
|
65 |
+ |
|
66 |
+ROADMAP |
|
67 |
+ |
|
68 |
+- January 15: API freeze (no changes to existing API) |
|
69 |
+- February 1: Defaults freeze (no tuning of default parameters anymore) |
|
70 |
+- February 15: Feature freeze (only bugfixes, code cleanups and speedups |
|
71 |
+ and documentation improvements) |
|
72 |
+- March 15: Code freeze (only bugfixes and documentation improvements) |
|
73 |
+- April: Release |
... | ... |
@@ -64,7 +64,7 @@ c(test.num.copy, round(opt.C))[i], prior.K)) |
64 | 64 |
} |
65 | 65 |
|
66 | 66 |
.calcSNVLLik <- function(vcf, tumor.id.in.vcf, ov, p, test.num.copy, |
67 |
- C.posterior, C, opt.C, snv.model, prior.somatic, mapping.bias, snv.lr, |
|
67 |
+ C.likelihood, C, opt.C, snv.model, prior.somatic, mapping.bias, snv.lr, |
|
68 | 68 |
sampleid = NULL, cont.rate = 0.01, prior.K, max.coverage.vcf, non.clonal.M, |
69 | 69 |
model.homozygous=FALSE, error=0.001, max.mapping.bias=0.8) { |
70 | 70 |
|
... | ... |
@@ -84,9 +84,9 @@ c(test.num.copy, round(opt.C))[i], prior.K)) |
84 | 84 |
haploid.penalty <- 2 |
85 | 85 |
} |
86 | 86 |
|
87 |
- subclonal <- apply(C.posterior[queryHits(ov), ], 1, which.max) == ncol(C.posterior) |
|
87 |
+ subclonal <- apply(C.likelihood[queryHits(ov), ], 1, which.max) == ncol(C.likelihood) |
|
88 | 88 |
|
89 |
- seg.idx <- which(seq_len(nrow(C.posterior)) %in% queryHits(ov)) |
|
89 |
+ seg.idx <- which(seq_len(nrow(C.likelihood)) %in% queryHits(ov)) |
|
90 | 90 |
sd.ar <- sd(unlist(geno(vcf)$FA[, tumor.id.in.vcf])) |
91 | 91 |
|
92 | 92 |
# Fit variants in all segments |
... | ... |
@@ -104,13 +104,13 @@ c(test.num.copy, round(opt.C))[i], prior.K)) |
104 | 104 |
shape1 <- ar_all * dp_all + 1 |
105 | 105 |
shape2 <- (1 - ar_all) * dp_all + 1 |
106 | 106 |
|
107 |
- lapply(seq(ncol(C.posterior)), function(k) { |
|
107 |
+ lapply(seq(ncol(C.likelihood)), function(k) { |
|
108 | 108 |
Ci <- c(test.num.copy, opt.C[i])[k] |
109 | 109 |
priorM <- log(Ci + 1 + haploid.penalty) |
110 | 110 |
#priorM <- log(abs(2-Ci) + 1) |
111 | 111 |
priorHom <- ifelse(model.homozygous, -log(3), log(0)) |
112 | 112 |
|
113 |
- skip <- test.num.copy > Ci | C.posterior[i, k] <=0 |
|
113 |
+ skip <- test.num.copy > Ci | C.likelihood[i, k] <=0 |
|
114 | 114 |
|
115 | 115 |
p.ar <- lapply(c(0, 1), function(g) { |
116 | 116 |
cns <- test.num.copy |
... | ... |
@@ -166,8 +166,8 @@ c(test.num.copy, round(opt.C))[i], prior.K)) |
166 | 166 |
|
167 | 167 |
likelihoods <- do.call(rbind, |
168 | 168 |
lapply(seq_along(xx), function(i) Reduce("+", |
169 |
- lapply(seq(ncol(C.posterior)), function(j) |
|
170 |
- exp(xx[[i]][[j]]) * C.posterior[seg.idx[i], j])))) |
|
169 |
+ lapply(seq(ncol(C.likelihood)), function(j) |
|
170 |
+ exp(xx[[i]][[j]]) * C.likelihood[seg.idx[i], j])))) |
|
171 | 171 |
|
172 | 172 |
colnames(likelihoods) <- c(paste("SOMATIC.M", test.num.copy, sep = ""), |
173 | 173 |
paste("GERMLINE.M", test.num.copy, sep = ""), "GERMLINE.CONTHIGH", |
... | ... |
@@ -418,7 +418,7 @@ c(test.num.copy, round(opt.C))[i], prior.K)) |
418 | 418 |
.getGoF <- function(result) { |
419 | 419 |
if (is.null(result$SNV.posterior$beta.model)) return(0) |
420 | 420 |
r <- result$SNV.posterior$beta.model$posteriors |
421 |
- e <- (r$ML.AR-r$AR)^2 |
|
421 |
+ e <- (r$ML.AR-r$AR.ADJUSTED)^2 |
|
422 | 422 |
maxDist <- 0.2^2 |
423 | 423 |
r2 <- max(1-mean(e,na.rm=TRUE)/maxDist,0) |
424 | 424 |
return(r2) |
... | ... |
@@ -583,7 +583,7 @@ c(test.num.copy, round(opt.C))[i], prior.K)) |
583 | 583 |
|
584 | 584 |
|
585 | 585 |
.optimizeGrid <- function(test.purity, min.ploidy, max.ploidy, test.num.copy = 0:7, |
586 |
- exon.lrs, seg, sd.seg, li, max.exon.ratio, max.non.clonal, verbose = FALSE, debug = FALSE) { |
|
586 |
+ exon.lrs, seg, sd.seg, li, max.exon.ratio, max.non.clonal) { |
|
587 | 587 |
ploidy.grid <- seq(min.ploidy, max.ploidy, by = 0.2) |
588 | 588 |
if (min.ploidy < 1.8 && max.ploidy > 2.2) { |
589 | 589 |
ploidy.grid <- c(seq(min.ploidy, 1.8, by = 0.2), 1.9, 2, 2.1, seq(2.2, max.ploidy, |
... | ... |
@@ -592,16 +592,12 @@ c(test.num.copy, round(opt.C))[i], prior.K)) |
592 | 592 |
mm <- lapply(test.purity, function(p) { |
593 | 593 |
b <- 2 * (1 - p) |
594 | 594 |
log.ratio.offset <- 0 |
595 |
- if (debug) |
|
596 |
- message(paste(b, log.ratio.offset)) |
|
597 | 595 |
lapply(ploidy.grid, function(D) { |
598 | 596 |
dt <- p/D |
599 | 597 |
llik.all <- lapply(seq_along(exon.lrs), function(i) .calcLlikSegmentExonLrs(exon.lrs[[i]], |
600 | 598 |
log.ratio.offset, max.exon.ratio, sd.seg, dt, b, D, test.num.copy)) |
601 | 599 |
subclonal <- vapply(llik.all, which.max, double(1)) == 1 |
602 | 600 |
subclonal.f <- length(unlist(exon.lrs[subclonal]))/length(unlist(exon.lrs)) |
603 |
- if (debug) |
|
604 |
- message(paste(sum(subclonal), subclonal.f)) |
|
605 | 601 |
if (subclonal.f > max.non.clonal) |
606 | 602 |
return(-Inf) |
607 | 603 |
llik <- sum(vapply(llik.all, max, double(1))) |
... | ... |
@@ -857,6 +853,7 @@ c(test.num.copy, round(opt.C))[i], prior.K)) |
857 | 853 |
msg <- paste0(msg, "\n\nThis is most likely a user error due to", |
858 | 854 |
" invalid input data or parameters (PureCN ", |
859 | 855 |
packageVersion("PureCN"), ").") |
856 |
+ flog.fatal(paste(strwrap(msg),"\n")) |
|
860 | 857 |
stop( paste(strwrap(msg),"\n"), call.= FALSE) |
861 | 858 |
} |
862 | 859 |
.stopRuntimeError <- function(...) { |
... | ... |
@@ -864,25 +861,41 @@ c(test.num.copy, round(opt.C))[i], prior.K)) |
864 | 861 |
msg <- paste0(msg, "\n\nThis runtime error might be caused by", |
865 | 862 |
" invalid input data or parameters. Please report bug (PureCN ", |
866 | 863 |
packageVersion("PureCN"), ").") |
864 |
+ flog.fatal(paste(strwrap(msg),"\n")) |
|
867 | 865 |
stop( paste(strwrap(msg),"\n"), call.= FALSE) |
868 | 866 |
} |
869 |
- |
|
867 |
+.logHeader <- function(l) { |
|
868 |
+ flog.info(strrep("-", 60)) |
|
869 |
+ flog.info("PureCN %s", as.character(packageVersion("PureCN"))) |
|
870 |
+ flog.info(strrep("-", 60)) |
|
871 |
+ idxSupported <- sapply(l, function(x) class(eval(x))) %in% |
|
872 |
+ c("character", "numeric", "NULL", "list", "logical") & |
|
873 |
+ sapply(l, function(x) length(unlist(eval(x)))) < 12 |
|
874 |
+ |
|
875 |
+ l <- c(l[idxSupported],lapply(l[!idxSupported], function(x) "<data>")) |
|
876 |
+ argsStrings <- paste(sapply(seq_along(l), function(i) |
|
877 |
+ paste0("-", names(l)[i], " ", paste(eval(l[[i]]),collapse=","))), |
|
878 |
+ collapse=" ") |
|
879 |
+ flog.info("Arguments: %s", argsStrings) |
|
880 |
+} |
|
881 |
+.logFooter <- function() { |
|
882 |
+ flog.info("Done.") |
|
883 |
+ flog.info(strrep("-", 60)) |
|
884 |
+} |
|
870 | 885 |
.calcGCmetric <- function(gc.data, coverage) { |
871 | 886 |
gcbins <- split(coverage$average.coverage, gc.data$gc_bias < 0.5); |
872 | 887 |
mean(gcbins[[1]], na.rm=TRUE)/mean(gcbins[[2]], na.rm=TRUE) |
873 | 888 |
} |
874 |
-.checkGCBias <- function(normal, tumor, gc.data, max.dropout, verbose) { |
|
889 |
+.checkGCBias <- function(normal, tumor, gc.data, max.dropout) { |
|
875 | 890 |
gcMetricNormal <- .calcGCmetric(gc.data, normal) |
876 | 891 |
gcMetricTumor <- .calcGCmetric(gc.data, tumor) |
877 |
- if (verbose) { |
|
878 |
- message("AT/GC dropout: ", round(gcMetricTumor, digits=2), |
|
879 |
- " (tumor), ", round(gcMetricNormal, digits=2), " (normal).") |
|
880 |
- } |
|
892 |
+ flog.info("AT/GC dropout: %.2f (tumor), %.2f (normal). ", |
|
893 |
+ gcMetricTumor, gcMetricNormal) |
|
881 | 894 |
if (gcMetricNormal < max.dropout[1] || |
882 | 895 |
gcMetricNormal > max.dropout[2] || |
883 | 896 |
gcMetricTumor < max.dropout[1] || |
884 | 897 |
gcMetricTumor > max.dropout[2]) { |
885 |
- warning("High GC-bias in normal or tumor. Is data GC-normalized?") |
|
898 |
+ flog.warn("High GC-bias in normal or tumor. Is data GC-normalized?") |
|
886 | 899 |
return(TRUE) |
887 | 900 |
} |
888 | 901 |
return(FALSE) |
... | ... |
@@ -10,7 +10,6 @@ |
10 | 10 |
#' function. |
11 | 11 |
#' @param tumor Tumor coverage read in by the \code{\link{readCoverageGatk}} |
12 | 12 |
#' function. |
13 |
-#' @param verbose Verbose output. |
|
14 | 13 |
#' @return \code{numeric(nrow(tumor))}, tumor vs. normal copy number log-ratios |
15 | 14 |
#' for all targets. |
16 | 15 |
#' @author Markus Riester |
... | ... |
@@ -22,21 +21,17 @@ |
22 | 21 |
#' package="PureCN") |
23 | 22 |
#' normal <- readCoverageGatk(normal.coverage.file) |
24 | 23 |
#' tumor <- readCoverageGatk(tumor.coverage.file) |
25 |
-#' log.ratio <- calculateLogRatio(normal, tumor, verbose=FALSE) |
|
24 |
+#' log.ratio <- calculateLogRatio(normal, tumor) |
|
26 | 25 |
#' |
27 | 26 |
#' @export calculateLogRatio |
28 |
-calculateLogRatio <- function(normal, tumor, verbose=TRUE) { |
|
27 |
+calculateLogRatio <- function(normal, tumor) { |
|
29 | 28 |
# make sure that normal and tumor align |
30 | 29 |
if (!identical(as.character(normal[, 1]), as.character(tumor[, 1]))) { |
31 | 30 |
.stopUserError("Interval files in normal and tumor different.") |
32 | 31 |
} |
33 |
- if (verbose) { |
|
34 |
- message("Average coverage: ", |
|
35 |
- round(mean(tumor$average.coverage, na.rm=TRUE), digits=0), |
|
36 |
- "X (tumor), ", |
|
37 |
- round(mean(normal$average.coverage, na.rm=TRUE), digits=0), |
|
38 |
- "X (normal).") |
|
39 |
- } |
|
32 |
+ flog.info("Average coverage: %.0fX (tumor) %.0fX (normal).", |
|
33 |
+ mean(tumor$average.coverage, na.rm=TRUE), |
|
34 |
+ mean(normal$average.coverage, na.rm=TRUE)) |
|
40 | 35 |
total.cov.normal <- sum(as.numeric(normal$coverage), na.rm = TRUE) |
41 | 36 |
total.cov.tumor <- sum(as.numeric(tumor$coverage), na.rm = TRUE) |
42 | 37 |
|
... | ... |
@@ -57,10 +57,10 @@ log.ratio.cutoffs = c(-0.9, 0.9), failed = NULL, all.genes = FALSE) { |
57 | 57 |
|
58 | 58 |
bm <- res$results[[id]]$SNV.posterior$beta.model |
59 | 59 |
if (!is.null(bm)) { |
60 |
- segids <- bm$posterior$seg.id |
|
60 |
+ segids <- bm$posteriors$seg.id |
|
61 | 61 |
calls$num.snps.segment <- sapply(calls$seg.id, function(i) |
62 | 62 |
sum(segids==i,na.rm=TRUE)) |
63 |
- calls$loh <- bm$posterior$ML.M.SEGMENT[match(calls$seg.id, segids)] == 0 |
|
63 |
+ calls$loh <- bm$posteriors$ML.M.SEGMENT[match(calls$seg.id, segids)] == 0 |
|
64 | 64 |
} |
65 | 65 |
|
66 | 66 |
calls <- calls[, !grepl("^\\.",colnames(calls))] |
... | ... |
@@ -1,5 +1,5 @@ |
1 | 1 |
# Make CMD check happy |
2 |
-globalVariables(names=c("gcIndex", "gcNum", "..level..")) |
|
2 |
+globalVariables(names=c("..level..")) |
|
3 | 3 |
|
4 | 4 |
#' Correct for GC bias |
5 | 5 |
#' |
... | ... |
@@ -110,7 +110,7 @@ plot.max.density = 50000) { |
110 | 110 |
if (density == "Low") { |
111 | 111 |
print(ggplot(gcPlot, aes_string(x="gc_bias", y="average.coverage")) + |
112 | 112 |
geom_point(color='red', alpha=0.2) + |
113 |
- geom_line(data = plotMed, aes(x = gcIndex, y = gcNum), color = 'blue') + |
|
113 |
+ geom_line(data = plotMed, aes_string(x = 'gcIndex', y = 'gcNum'), color = 'blue') + |
|
114 | 114 |
xlab("GC content") + ylab("Coverage") + |
115 | 115 |
theme(axis.text = element_text(size= 6), axis.title = element_text(size=16)) + |
116 | 116 |
facet_wrap(~ norm_status, nrow=1)) |
... | ... |
@@ -119,7 +119,7 @@ plot.max.density = 50000) { |
119 | 119 |
geom_point(color="blue", alpha = 0.1) + |
120 | 120 |
stat_density2d(aes(fill = ..level..), geom="polygon") + |
121 | 121 |
scale_alpha_continuous(limits=c(0.1, 0), breaks=seq(0, 0.1, by = 0.025)) + |
122 |
- geom_line(data = plotMed, aes(x = gcIndex,y = gcNum), color = 'red') + |
|
122 |
+ geom_line(data = plotMed, aes_string(x = 'gcIndex',y = 'gcNum'), color = 'red') + |
|
123 | 123 |
xlab("GC content") + ylab("Coverage") + |
124 | 124 |
theme(axis.text = element_text(size = 16), axis.title = element_text(size = 16)) + |
125 | 125 |
facet_wrap(~norm_status, nrow=1)) |
... | ... |
@@ -46,7 +46,7 @@ max.mean.coverage = 100, ... ) { |
46 | 46 |
normals.m <- scale(normals.m, 1/z, center=FALSE) |
47 | 47 |
|
48 | 48 |
normals.pca <- prcomp(t(normals.m[idx,]), ...) |
49 |
- sex.determined <- sapply(normals,getSexFromCoverage, verbose=is.null(sex)) |
|
49 |
+ sex.determined <- sapply(normals,getSexFromCoverage) |
|
50 | 50 |
if (is.null(sex)) { |
51 | 51 |
sex <- sex.determined |
52 | 52 |
} else { |
... | ... |
@@ -59,9 +59,8 @@ max.mean.coverage = 100, ... ) { |
59 | 59 |
for (i in seq_along(sex.determined)) { |
60 | 60 |
if (!is.na(sex.determined[i]) && sex[i] != "diploid" && |
61 | 61 |
sex.determined[i] != sex[i]) { |
62 |
- warning("Sex mismatch in ", normal.coverage.files[i], |
|
63 |
- ". Sex provided is ", sex, ", but could be ", |
|
64 |
- sex.determined[i]) |
|
62 |
+ flog.warn("Sex mismatch in %s. Sex provided is %s, but could be %s.", |
|
63 |
+ normal.coverage.files[i], sex[i], sex.determined[i]) |
|
65 | 64 |
} |
66 | 65 |
} |
67 | 66 |
} |
... | ... |
@@ -109,7 +108,6 @@ createExonWeightFile <- function() { |
109 | 108 |
#' (>20) to estimate target log-ratio standard deviations. Should not overlap |
110 | 109 |
#' with files in \code{tumor.coverage.files}. |
111 | 110 |
#' @param target.weight.file Output filename. |
112 |
-#' @param verbose Verbose output. |
|
113 | 111 |
#' @return A \code{data.frame} with target weights. |
114 | 112 |
#' @author Markus Riester |
115 | 113 |
#' @examples |
... | ... |
@@ -127,12 +125,12 @@ createExonWeightFile <- function() { |
127 | 125 |
#' |
128 | 126 |
#' @export createTargetWeights |
129 | 127 |
createTargetWeights <- function(tumor.coverage.files, normal.coverage.files, |
130 |
-target.weight.file, verbose = TRUE) { |
|
131 |
- if (verbose) message("Loading coverage data...") |
|
128 |
+target.weight.file) { |
|
129 |
+ flog.info("Loading coverage data...") |
|
132 | 130 |
tumor.coverage <- lapply(tumor.coverage.files, readCoverageGatk) |
133 | 131 |
normal.coverage <- lapply(normal.coverage.files, readCoverageGatk) |
134 | 132 |
lrs <- lapply(tumor.coverage, function(tc) sapply(normal.coverage, |
135 |
- function(nc) calculateLogRatio(nc, tc, verbose=verbose))) |
|
133 |
+ function(nc) calculateLogRatio(nc, tc))) |
|
136 | 134 |
|
137 | 135 |
lrs <- do.call(cbind, lrs) |
138 | 136 |
|
... | ... |
@@ -23,7 +23,6 @@ |
23 | 23 |
#' \code{\link{createNormalDatabase}}. |
24 | 24 |
#' @param normalDB.min.coverage Exclude targets with coverage lower than 20 |
25 | 25 |
#' percent of the chromosome median in the pool of normals. |
26 |
-#' @param verbose Verbose output. |
|
27 | 26 |
#' @return \code{logical(length(log.ratio))} specifying which targets should be |
28 | 27 |
#' used in segmentation. |
29 | 28 |
#' @author Markus Riester |
... | ... |
@@ -55,7 +54,7 @@ |
55 | 54 |
#' @export filterTargets |
56 | 55 |
filterTargets <- function(log.ratio, tumor, gc.data, seg.file, |
57 | 56 |
filter.lowhigh.gc = 0.001, min.targeted.base = 5, normalDB = NULL, |
58 |
- normalDB.min.coverage = 0.2, verbose) { |
|
57 |
+ normalDB.min.coverage = 0.2) { |
|
59 | 58 |
# NA's in log.ratio confuse the CBS function |
60 | 59 |
targetsUsed <- !is.na(log.ratio) & !is.infinite(log.ratio) |
61 | 60 |
# With segmentation file, ignore all filters |
... | ... |
@@ -64,12 +63,12 @@ filterTargets <- function(log.ratio, tumor, gc.data, seg.file, |
64 | 63 |
if (!is.null(gc.data)) { |
65 | 64 |
.checkFraction(filter.lowhigh.gc, "filter.lowhigh.gc") |
66 | 65 |
targetsUsed <- .filterTargetsLowHighGC(targetsUsed, tumor, |
67 |
- gc.data, filter.lowhigh.gc, verbose) |
|
66 |
+ gc.data, filter.lowhigh.gc) |
|
68 | 67 |
} |
69 | 68 |
targetsUsed <- .filterTargetsNormalDB(targetsUsed, tumor, normalDB, |
70 |
- normalDB.min.coverage, verbose) |
|
69 |
+ normalDB.min.coverage) |
|
71 | 70 |
targetsUsed <- .filterTargetsTargetedBase(targetsUsed, tumor, |
72 |
- min.targeted.base, verbose) |
|
71 |
+ min.targeted.base) |
|
73 | 72 |
} |
74 | 73 |
|
75 | 74 |
.checkNormalDB <- function(tumor, normalDB) { |
... | ... |
@@ -90,7 +89,7 @@ filterTargets <- function(log.ratio, tumor, gc.data, seg.file, |
90 | 89 |
} |
91 | 90 |
|
92 | 91 |
.filterTargetsNormalDB <- function(targetsUsed, tumor, normalDB, |
93 |
-normalDB.min.coverage, verbose) { |
|
92 |
+normalDB.min.coverage) { |
|
94 | 93 |
if (is.null(normalDB)) return(targetsUsed) |
95 | 94 |
# make sure that normalDB matches tumor |
96 | 95 |
if (!.checkNormalDB(tumor, normalDB)) { |
... | ... |
@@ -113,40 +112,40 @@ normalDB.min.coverage, verbose) { |
113 | 112 |
|
114 | 113 |
nAfter <- sum(targetsUsed) |
115 | 114 |
|
116 |
- if (verbose && nAfter < nBefore) { |
|
117 |
- message("Removing ", nBefore-nAfter, " targets with low coverage ", |
|
118 |
- "in normalDB.") |
|
115 |
+ if (nAfter < nBefore) { |
|
116 |
+ flog.info("Removing %i targets with low coverage in normalDB.", |
|
117 |
+ nBefore-nAfter) |
|
119 | 118 |
} |
120 | 119 |
targetsUsed |
121 | 120 |
} |
122 | 121 |
|
123 |
-.filterTargetsChrHash <- function(targetsUsed, tumor, chr.hash, verbose) { |
|
122 |
+.filterTargetsChrHash <- function(targetsUsed, tumor, chr.hash) { |
|
124 | 123 |
if (is.null(chr.hash)) return(targetsUsed) |
125 | 124 |
nBefore <- sum(targetsUsed) |
126 | 125 |
targetsUsed <- targetsUsed & tumor$chr %in% chr.hash$chr |
127 | 126 |
nAfter <- sum(targetsUsed) |
128 | 127 |
|
129 |
- if (verbose && nAfter < nBefore) { |
|
130 |
- message("Removing ", nBefore-nAfter, " targets on chromosomes ", |
|
131 |
- "outside chr.hash.") |
|
128 |
+ if ( nAfter < nBefore) { |
|
129 |
+ flog.info("Removing %i targets on chromosomes outside chr.hash.", |
|
130 |
+ nBefore-nAfter) |
|
132 | 131 |
} |
133 | 132 |
targetsUsed |
134 | 133 |
} |
135 | 134 |
|
136 |
-.filterTargetsTargetedBase <- function(targetsUsed, tumor, min.targeted.base, |
|
137 |
- verbose) { |
|
135 |
+.filterTargetsTargetedBase <- function(targetsUsed, tumor, min.targeted.base) { |
|
138 | 136 |
if (is.null(min.targeted.base)) return(targetsUsed) |
139 | 137 |
nBefore <- sum(targetsUsed) |
140 | 138 |
targetsUsed <- targetsUsed & !is.na(tumor$targeted.base) & |
141 | 139 |
tumor$targeted.base >= min.targeted.base |
142 | 140 |
nAfter <- sum(targetsUsed) |
143 |
- if (verbose && nAfter < nBefore) message("Removing ", nBefore-nAfter, |
|
144 |
- " small targets") |
|
141 |
+ if (nAfter < nBefore) { |
|
142 |
+ flog.info("Removing %i small targets.", nBefore-nAfter) |
|
143 |
+ } |
|
145 | 144 |
targetsUsed |
146 | 145 |
} |
147 | 146 |
|
148 | 147 |
.filterTargetsLowHighGC <- function(targetsUsed, tumor, gc.data, |
149 |
- filter.lowhigh.gc, verbose) { |
|
148 |
+ filter.lowhigh.gc) { |
|
150 | 149 |
gc.data <- gc.data[match(as.character(tumor[,1]), gc.data[,1]),] |
151 | 150 |
qq <- quantile(gc.data$gc_bias, p=c(filter.lowhigh.gc, |
152 | 151 |
1-filter.lowhigh.gc), na.rm=TRUE) |
... | ... |
@@ -156,8 +155,8 @@ normalDB.min.coverage, verbose) { |
156 | 155 |
!(gc.data$gc_bias < qq[1] | gc.data$gc_bias > qq[2]) |
157 | 156 |
nAfter <- sum(targetsUsed) |
158 | 157 |
|
159 |
- if (verbose && nAfter < nBefore) message("Removing ", |
|
160 |
- nBefore-nAfter, " low/high GC targets") |
|
161 |
- |
|
158 |
+ if (nAfter < nBefore) { |
|
159 |
+ flog.info("Removing %i low/high GC targets.", nBefore-nAfter) |
|
160 |
+ } |
|
162 | 161 |
targetsUsed |
163 | 162 |
} |
... | ... |
@@ -20,8 +20,9 @@ |
20 | 20 |
#' contamination. If a matched normal is available, this value is ignored, |
21 | 21 |
#' because homozygosity can be confirmed in the normal. |
22 | 22 |
#' @param contamination.cutoff Count SNPs in dbSNP with allelic fraction |
23 |
-#' smaller than the first value, if found on most chromosomes, remove all with |
|
24 |
-#' AF smaller than the second value. |
|
23 |
+#' smaller than the first value or greater than 1-first value, if found on most chromosomes, mark sample |
|
24 |
+#' as contaminated if the fraction of putative contamination SNPs exceeds |
|
25 |
+#' the second value. |
|
25 | 26 |
#' @param min.coverage Minimum coverage in tumor. Variants with lower coverage |
26 | 27 |
#' are ignored. |
27 | 28 |
#' @param min.base.quality Minimim base quality in tumor. Requires a \code{BQ} |
... | ... |
@@ -40,7 +41,6 @@ |
40 | 41 |
#' SNPs. Ignored in case a matched normal is provided in the VCF. |
41 | 42 |
#' @param interval.padding Include variants in the interval flanking regions of |
42 | 43 |
#' the specified size in bp. Requires \code{target.granges}. |
43 |
-#' @param verbose Verbose output. |
|
44 | 44 |
#' @return A list with elements \item{vcf}{The filtered \code{CollapsedVCF} |
45 | 45 |
#' object.} \item{flag}{A flag (\code{logical(1)}) if problems were |
46 | 46 |
#' identified.} \item{flag_comment}{A comment describing the flagging.} |
... | ... |
@@ -61,10 +61,10 @@ |
61 | 61 |
#' @importFrom stats pbeta |
62 | 62 |
filterVcfBasic <- function(vcf, tumor.id.in.vcf = NULL, |
63 | 63 |
use.somatic.status = TRUE, snp.blacklist = NULL, af.range = c(0.03, 0.97), |
64 |
-contamination.cutoff = c(0.05,0.075), min.coverage = 15, min.base.quality = 25, |
|
64 |
+contamination.cutoff = c(0.075,0.02), min.coverage = 15, min.base.quality = 25, |
|
65 | 65 |
min.supporting.reads = NULL, error = 0.001, target.granges = NULL, |
66 |
-remove.off.target.snvs = TRUE, model.homozygous = FALSE, interval.padding = 50, |
|
67 |
-verbose=TRUE) { |
|
66 |
+remove.off.target.snvs = TRUE, model.homozygous = FALSE, |
|
67 |
+interval.padding = 50) { |
|
68 | 68 |
flag <- NA |
69 | 69 |
flag_comment <- NA |
70 | 70 |
|
... | ... |
@@ -74,25 +74,73 @@ verbose=TRUE) { |
74 | 74 |
if (use.somatic.status) { |
75 | 75 |
n <- nrow(vcf) |
76 | 76 |
vcf <- .testGermline(vcf, tumor.id.in.vcf) |
77 |
- if (verbose) message(paste("Removing", n-nrow(vcf), |
|
78 |
- "non heterozygous (in matched normal) germline SNPs.")) |
|
77 |
+ flog.info("Removing %i non heterozygous (in matched normal) germline SNPs.", |
|
78 |
+ n-nrow(vcf)) |
|
79 | 79 |
} else { |
80 | 80 |
info(vcf)$SOMATIC <- NULL |
81 | 81 |
} |
82 | 82 |
|
83 |
+ # find supporting read cutoffs based on coverage and sequencing error |
|
84 |
+ #-------------------------------------------------------------------------- |
|
83 | 85 |
if (is.null(min.supporting.reads)) { |
84 |
- min.supporting.reads <- calculatePowerDetectSomatic( |
|
85 |
- mean(geno(vcf)$DP[,tumor.id.in.vcf], na.rm=TRUE), |
|
86 |
- purity=1,ploidy=2, error=error, verbose=FALSE)$k |
|
87 |
- } |
|
86 |
+ depths <- geno(vcf)$DP[,tumor.id.in.vcf] |
|
87 |
+ minDepth <- round(log2(mean(depths))) |
|
88 |
+ |
|
89 |
+ depths <- sort(unique(round(log2(depths)))) |
|
90 |
+ depths <- depths[depths>=minDepth] |
|
91 |
+ depths <- 2^depths |
|
88 | 92 |
|
93 |
+ cutoffs <- sapply(depths, function(d) |
|
94 |
+ calculatePowerDetectSomatic(d, ploidy=2, purity=1, error=error, |
|
95 |
+ verbose=FALSE)$k) |
|
96 |
+ depths <- c(0,depths) |
|
97 |
+ } else { |
|
98 |
+ depths <- 0 |
|
99 |
+ cutoffs <- min.supporting.reads |
|
100 |
+ } |
|
89 | 101 |
n <- nrow(vcf) |
90 | 102 |
|
91 |
- # remove variants with insufficient reads. This includes reference alleles |
|
92 |
- # to remove homozygous germline variants. |
|
93 |
- vcf <- vcf[do.call(rbind, |
|
94 |
- geno(vcf)$AD[,tumor.id.in.vcf])[,2] >= min.supporting.reads] |
|
95 |
- |
|
103 |
+ .sufficientReads <- function(vcf, ref, depths, cutoffs) { |
|
104 |
+ idx <- rep(TRUE, nrow(vcf)) |
|
105 |
+ |
|
106 |
+ .filterVcfByAD <- function(vcf, min.supporting.reads, depth) { |
|
107 |
+ # remove variants with insufficient reads. This includes reference alleles |
|
108 |
+ # to remove homozygous germline variants. |
|
109 |
+ do.call(rbind, geno(vcf)$AD[,tumor.id.in.vcf])[,ifelse(ref,1,2)] >= |
|
110 |
+ min.supporting.reads | |
|
111 |
+ geno(vcf)$DP[,tumor.id.in.vcf] < depth |
|
112 |
+ } |
|
113 |
+ |
|
114 |
+ for (i in seq_along(cutoffs)) { |
|
115 |
+ idx <- idx & .filterVcfByAD(vcf, cutoffs[i], depths[i]) |
|
116 |
+ } |
|
117 |
+ idx |
|
118 |
+ } |
|
119 |
+ idxNotReference <- .sufficientReads(vcf,ref=FALSE, depths, cutoffs) |
|
120 |
+ vcf <- vcf[idxNotReference] |
|
121 |
+ idxNotHomozygous <- .sufficientReads(vcf,ref=TRUE, depths, cutoffs) |
|
122 |
+ #-------------------------------------------------------------------------- |
|
123 |
+ |
|
124 |
+ idx <- info(vcf)$DB & idxNotHomozygous & |
|
125 |
+ ( unlist(geno(vcf)$FA[,tumor.id.in.vcf]) < contamination.cutoff[1] | |
|
126 |
+ unlist(geno(vcf)$FA[,tumor.id.in.vcf]) > (1 - contamination.cutoff[1])) |
|
127 |
+ |
|
128 |
+ fractionContaminated <- sum(idx)/sum(info(vcf)$DB) |
|
129 |
+ if (fractionContaminated > 0) { |
|
130 |
+ # cntContPerChr <- sapply(seqlevelsInUse(vcf), function(seq) nrow(keepSeqlevels(vcf[idx],seq))) |
|
131 |
+ # cntAllPerChr <- sapply(seqlevelsInUse(vcf), function(seq) nrow(keepSeqlevels(vcf,seq))) |
|
132 |
+ # flog.info("Contamination p-value across chromosomes: %.2f.", |
|
133 |
+ # fisher.test(cntContPerChr, cntAllPerChr)$p.value) |
|
134 |
+ } |
|
135 |
+ |
|
136 |
+ # do we have many low allelic fraction calls that are in dbSNP on basically |
|
137 |
+ # all chromosomes? then we found some contamination |
|
138 |
+ if (sum(runLength(seqnames(rowRanges(vcf[idx])))>3) >= 20 && |
|
139 |
+ fractionContaminated >= contamination.cutoff[2]) { |
|
140 |
+ flag <- TRUE |
|
141 |
+ flag_comment <- "POTENTIAL SAMPLE CONTAMINATION" |
|
142 |
+ } |
|
143 |
+ |
|
96 | 144 |
# If we have a matched normal, we can distinguish LOH from homozygous |
97 | 145 |
# in 100% pure samples. If not we need to see a sufficient number |
98 | 146 |
# of alt alleles to believe a heterozygous normal genotype. |
... | ... |
@@ -100,20 +148,15 @@ verbose=TRUE) { |
100 | 148 |
af.range[2] <- 1 |
101 | 149 |
if (model.homozygous) af.range[2] <- Inf |
102 | 150 |
} else { |
103 |
- vcf <- vcf[do.call(rbind, |
|
104 |
- geno(vcf)$AD[,tumor.id.in.vcf])[,1] >= min.supporting.reads] |
|
151 |
+ vcf <- vcf[idxNotHomozygous] |
|
105 | 152 |
} |
106 | 153 |
|
107 | 154 |
vcf <- vcf[unlist(geno(vcf)$DP[,tumor.id.in.vcf]) >= min.coverage] |
108 | 155 |
vcf <- vcf[unlist(geno(vcf)$FA[,tumor.id.in.vcf]) >= af.range[1]] |
109 | 156 |
# remove homozygous germline |
110 | 157 |
vcf <- vcf[!info(vcf)$DB | geno(vcf)$FA[,tumor.id.in.vcf] < af.range[2]] |
111 |
- if (verbose) message("Removing ", n-nrow(vcf), |
|
112 |
- " SNPs with AF < ", af.range[1], |
|
113 |
- " or AF >= ", af.range[2], |
|
114 |
- " or less than ", min.supporting.reads, |
|
115 |
- " supporting reads or depth < ", |
|
116 |
- min.coverage, ".") |
|
158 |
+ flog.info("Removing %i SNPs with AF < %.3f or AF >= %.3f or less than %i supporting reads or depth < %i.", |
|
159 |
+ n-nrow(vcf), af.range[1], af.range[2], cutoffs[1], min.coverage) |
|
117 | 160 |
|
118 | 161 |
if (!is.null(snp.blacklist)) { |
119 | 162 |
for (i in seq_along(snp.blacklist)) { |
... | ... |
@@ -124,7 +167,7 @@ verbose=TRUE) { |
124 | 167 |
} |
125 | 168 |
n <- nrow(vcf) |
126 | 169 |
if (sum( rownames(vcf) %in% snp.blacklist.data[,1]) > 1 ) { |
127 |
- warning("Old SNP blacklists are deprecated. ", |
|
170 |
+ flog.warn("Old SNP blacklists are deprecated. %s", |
|
128 | 171 |
"Use either a BED file or a normal.panel.vcf.file.") |
129 | 172 |
vcf <- vcf[!rownames(vcf) %in% snp.blacklist.data[,1],] |
130 | 173 |
} else { |
... | ... |
@@ -132,79 +175,37 @@ verbose=TRUE) { |
132 | 175 |
ov <- suppressWarnings(overlapsAny(vcf, blackBed)) |
133 | 176 |
vcf <- vcf[!ov] |
134 | 177 |
} |
135 |
- if (verbose) message("Removing ", n-nrow(vcf), |
|
136 |
- " blacklisted SNPs.") |
|
137 |
- } |
|
138 |
- } |
|
139 |
- idx <- info(vcf)$DB & unlist(geno(vcf)$FA[,tumor.id.in.vcf]) < |
|
140 |
- contamination.cutoff[1] |
|
141 |
- |
|
142 |
- # do we have many low allelic fraction calls that are in dbSNP on basically |
|
143 |
- # all chromosomes? then we found some contamination |
|
144 |
- if (sum(runLength(seqnames(rowRanges(vcf[idx])))>1) >= 20) { |
|
145 |
- idx <- info(vcf)$DB & unlist(geno(vcf)$FA[,tumor.id.in.vcf]) < |
|
146 |
- contamination.cutoff[2] |
|
147 |
- vcf <- vcf[which(!idx)] |
|
148 |
- if (verbose) message("Removing ", sum(idx, na.rm=TRUE), |
|
149 |
- " contamination SNPs.") |
|
150 |
- flag <- TRUE |
|
151 |
- flag_comment <- "POTENTIAL SAMPLE CONTAMINATION" |
|
152 |
- } |
|
153 |
- |
|
154 |
- # if we have a matched normal, we can filter homozygous germline SNPs |
|
155 |
- # easily, if not, we still should remove most of them by removing everything |
|
156 |
- # with AF >= 0.97. But sometimes samples are very noisy, for example due to |
|
157 |
- # contamination, and homozygous germline SNPs are between 0.9 and 1. This |
|
158 |
- # code finds samples which have a lot of dbSNPs between 0.95 and 0.97 on |
|
159 |
- # almost all chromosomes. If found, then remove everything that's in dbSNP |
|
160 |
- # above 0.9. |
|
161 |
- if (!use.somatic.status && !model.homozygous) { |
|
162 |
- idx <- info(vcf)$DB & unlist(geno(vcf)$FA[,tumor.id.in.vcf]) > |
|
163 |
- (1 - contamination.cutoff[1]) |
|
164 |
- |
|
165 |
- # do we have many high allelic fraction calls that are in dbSNP on |
|
166 |
- # basically all chromosomes? then we found a very noisy sample |
|
167 |
- if (sum(runLength(seqnames(rowRanges(vcf[idx])))>2) >= 20) { |
|
168 |
- idx <- info(vcf)$DB & unlist(geno(vcf)$FA[,tumor.id.in.vcf]) > |
|
169 |
- (1 - contamination.cutoff[2]) |
|
170 |
- vcf <- vcf[which(!idx)] |
|
171 |
- if (verbose) message("Removing ", sum(idx, na.rm=TRUE), |
|
172 |
- " noisy homozygous germline SNPs.") |
|
173 |
- flag <- TRUE |
|
174 |
- flag_comment <- .appendComment(flag_comment, |
|
175 |
- "NOISY HOMOZYGOUS GERMLINE CALLS") |
|
178 |
+ flog.info("Removing %i blacklisted SNPs.", n-nrow(vcf)) |
|
176 | 179 |
} |
177 | 180 |
} |
178 | 181 |
|
179 | 182 |
if (!is.null(geno(vcf)$BQ)) { |
180 |
- n.vcf.before.filter <- nrow(vcf) |
|
181 |
- vcf <- vcf[which(as.numeric(geno(vcf)$BQ[,tumor.id.in.vcf])>=min.base.quality)] |
|
182 |
- if (verbose) message("Removing ", n.vcf.before.filter - nrow(vcf), |
|
183 |
- " low quality variants with BQ<",min.base.quality,".") |
|
183 |
+ n.vcf.before.filter <- nrow(vcf) |
|
184 |
+ vcf <- vcf[which(as.numeric(geno(vcf)$BQ[,tumor.id.in.vcf])>=min.base.quality)] |
|
185 |
+ flog.info("Removing %i low quality variants with BQ < %i.", |
|
186 |
+ n.vcf.before.filter - nrow(vcf), min.base.quality) |
|
184 | 187 |
} else if (!is.null(rowRanges(vcf)$QUAL)) { |
185 |
- n.vcf.before.filter <- nrow(vcf) |
|
186 |
- idx <- which(as.numeric(rowRanges(vcf)$QUAL)>=min.base.quality) |
|
187 |
- if (length(idx)) vcf <- vcf[idx] |
|
188 |
- if (verbose) message("Removing ", n.vcf.before.filter - nrow(vcf), |
|
189 |
- " low quality variants with BQ<",min.base.quality,".") |
|
188 |
+ n.vcf.before.filter <- nrow(vcf) |
|
189 |
+ idx <- which(as.numeric(rowRanges(vcf)$QUAL)>=min.base.quality) |
|
190 |
+ if (length(idx)) vcf <- vcf[idx] |
|
191 |
+ flog.info("Removing %i low quality variants with BQ < %i.", |
|
192 |
+ n.vcf.before.filter - nrow(vcf), min.base.quality) |
|
190 | 193 |
} |
191 | 194 |
|
192 | 195 |
if (!is.null(target.granges)) { |
193 |
- vcf <- .annotateVcfTarget(vcf, target.granges, interval.padding, verbose) |
|
196 |
+ vcf <- .annotateVcfTarget(vcf, target.granges, interval.padding) |
|
194 | 197 |
if (remove.off.target.snvs) { |
195 | 198 |
n.vcf.before.filter <- nrow(vcf) |
196 | 199 |
# make sure all SNVs are in covered exons |
197 | 200 |
vcf <- vcf[info(vcf)$OnTarget>0] |
198 |
- if (verbose) message("Removing ", n.vcf.before.filter - nrow(vcf), |
|
199 |
- " variants outside intervals.", |
|
200 |
- " Set remove.off.target.snvs=FALSE to include.") |
|
201 |
+ flog.info("Removing %i variants outside intervals.", |
|
202 |
+ n.vcf.before.filter - nrow(vcf)) |
|
201 | 203 |
} |
202 | 204 |
} |
203 |
- ##value<< A list with elements |
|
204 | 205 |
list( |
205 |
- vcf=vcf, ##<< The filtered \code{CollapsedVCF} object. |
|
206 |
- flag=flag, ##<< A flag (\code{logical(1)}) if problems were identified. |
|
207 |
- flag_comment=flag_comment ##<< A comment describing the flagging. |
|
206 |
+ vcf=vcf, |
|
207 |
+ flag=flag, |
|
208 |
+ flag_comment=flag_comment |
|
208 | 209 |
) |
209 | 210 |
} |
210 | 211 |
|
... | ... |
@@ -222,7 +223,6 @@ verbose=TRUE) { |
222 | 223 |
#' @param tumor.id.in.vcf The tumor id in the VCF file, optional. |
223 | 224 |
#' @param stats.file MuTect stats file. |
224 | 225 |
#' @param ignore MuTect flags that mark variants for exclusion. |
225 |
-#' @param verbose Verbose output. |
|
226 | 226 |
#' @param \dots Additional arguments passed to \code{\link{filterVcfBasic}}. |
227 | 227 |
#' @return A list with elements \code{vcf}, \code{flag} and |
228 | 228 |
#' \code{flag_comment}. \code{vcf} contains the filtered \code{CollapsedVCF}, |
... | ... |
@@ -244,15 +244,15 @@ filterVcfMuTect <- function(vcf, tumor.id.in.vcf = NULL, stats.file = NULL, |
244 | 244 |
ignore=c("clustered_read_position", "fstar_tumor_lod", "nearby_gap_events", |
245 | 245 |
"poor_mapping_region_alternate_allele_mapq", "poor_mapping_region_mapq0", |
246 | 246 |
"possible_contamination", "strand_artifact", "seen_in_panel_of_normals"), |
247 |
-verbose=TRUE, ... ){ |
|
247 |
+... ){ |
|
248 | 248 |
if (is.null(stats.file)) return( |
249 |
- filterVcfBasic(vcf, tumor.id.in.vcf, verbose=verbose, ...)) |
|
249 |
+ filterVcfBasic(vcf, tumor.id.in.vcf, ...)) |
|
250 | 250 |
|
251 | 251 |
stats <- read.delim(stats.file, as.is=TRUE, skip=1) |
252 | 252 |
|
253 | 253 |
if (is.null(stats$contig) || is.null(stats$position)) { |
254 | 254 |
warning("MuTect stats file lacks contig and position columns.") |
255 |
- return(filterVcfBasic(vcf, tumor.id.in.vcf, verbose=verbose, ...)) |
|
255 |
+ return(filterVcfBasic(vcf, tumor.id.in.vcf, ...)) |
|
256 | 256 |
} |
257 | 257 |
|
258 | 258 |
gr.stats <- GRanges(seqnames=stats$contig, |
... | ... |
@@ -265,13 +265,13 @@ verbose=TRUE, ... ){ |
265 | 265 |
n <- nrow(vcf) |
266 | 266 |
stats <- stats[subjectHits(ov),] |
267 | 267 |
vcf <- vcf[queryHits(ov)] |
268 |
- warning("MuTect stats file and VCF file do not align perfectly. ", |
|
269 |
- "Will remove ", n-nrow(vcf), " unmatched variants.") |
|
268 |
+ flog.warn("MuTect stats file and VCF file do not align perfectly. Will remove %i unmatched variants.", |
|
269 |
+ n-nrow(vcf)) |
|
270 | 270 |
} |
271 | 271 |
if (is.null(stats$failure_reasons)) { |
272 |
- warning("MuTect stats file lacks failure_reasons column.", |
|
272 |
+ flog.warn("MuTect stats file lacks failure_reasons column.", |
|
273 | 273 |
" Keeping all variants listed in stats file.") |
274 |
- return(filterVcfBasic(vcf, tumor.id.in.vcf, verbose=verbose, ...)) |
|
274 |
+ return(filterVcfBasic(vcf, tumor.id.in.vcf, ...)) |
|
275 | 275 |
} |
276 | 276 |
|
277 | 277 |
n <- nrow(vcf) |
... | ... |
@@ -279,9 +279,9 @@ verbose=TRUE, ... ){ |
279 | 279 |
ids <- sort(unique(unlist(sapply(ignore, grep, stats$failure_reasons)))) |
280 | 280 |
vcf <- vcf[-ids] |
281 | 281 |
|
282 |
- if (verbose) message("Removing ", n-nrow(vcf), |
|
283 |
- " MuTect calls due to blacklisted failure reasons.") |
|
284 |
- filterVcfBasic(vcf, tumor.id.in.vcf, verbose=verbose, ...) |
|
282 |
+ flog.info("Removing %i MuTect calls due to blacklisted failure reasons.", |
|
283 |
+ n-nrow(vcf)) |
|
284 |
+ filterVcfBasic(vcf, tumor.id.in.vcf, ...) |
|
285 | 285 |
} |
286 | 286 |
|
287 | 287 |
|
... | ... |
@@ -305,7 +305,6 @@ verbose=TRUE, ... ){ |
305 | 305 |
#' value is for the case that variant is in both dbSNP and COSMIC > 2. |
306 | 306 |
#' @param tumor.id.in.vcf Id of tumor in case multiple samples are stored in |
307 | 307 |
#' VCF. |
308 |
-#' @param verbose Verbose output. |
|
309 | 308 |
#' @return A \code{numeric(nrow(vcf))} vector with the prior probability of |
310 | 309 |
#' somatic status for each variant in the \code{CollapsedVCF}. |
311 | 310 |
#' @author Markus Riester |
... | ... |
@@ -320,7 +319,7 @@ verbose=TRUE, ... ){ |
320 | 319 |
#' @export setPriorVcf |
321 | 320 |
setPriorVcf <- function(vcf, |
322 | 321 |
prior.somatic=c(0.5, 0.0005, 0.999, 0.0001, 0.995, 0.01), |
323 |
-tumor.id.in.vcf=NULL, verbose=TRUE) { |
|
322 |
+tumor.id.in.vcf=NULL) { |
|
324 | 323 |
if (is.null(tumor.id.in.vcf)) { |
325 | 324 |
tumor.id.in.vcf <- .getTumorIdInVcf(vcf) |
326 | 325 |
} |
... | ... |
@@ -329,24 +328,24 @@ tumor.id.in.vcf=NULL, verbose=TRUE) { |
329 | 328 |
prior.somatic <- ifelse(info(vcf)$SOMATIC, |
330 | 329 |
prior.somatic[3],prior.somatic[4]) |
331 | 330 |
|
332 |
- if (verbose) message("Found SOMATIC annotation in VCF. ", |
|
333 |
- "Setting somatic prior probabilities for somatic variants to ", |
|
334 |
- tmp[3]," or to ", tmp[4], " otherwise.") |
|
331 |
+ flog.info("Found SOMATIC annotation in VCF.") |
|
332 |
+ flog.info("Setting somatic prior probabilities for somatic variants to %f or to %f otherwise.", |
|
333 |
+ tmp[3], tmp[4]) |
|
335 | 334 |
} else { |
336 | 335 |
tmp <- prior.somatic |
337 | 336 |
prior.somatic <- ifelse(info(vcf)$DB, |
338 | 337 |
prior.somatic[2], prior.somatic[1]) |
339 | 338 |
if (!is.null(info(vcf)$Cosmic.CNT)) { |
340 |
- if (verbose) message("Found COSMIC annotation in VCF. ", |
|
341 |
- "Setting somatic prior probabilities for hits to\n", tmp[5], |
|
342 |
- " or to ", tmp[6], " if in both COSMIC and dbSNP.") |
|
339 |
+ flog.info("Found COSMIC annotation in VCF.") |
|
340 |
+ flog.info("Setting somatic prior probabilities for hits to %f or to %f if in both COSMIC and dbSNP.", |
|
341 |
+ tmp[5], tmp[6]) |
|
343 | 342 |
|
344 | 343 |
prior.somatic[which(info(vcf)$Cosmic.CNT>2)] <- tmp[5] |
345 | 344 |
prior.somatic[which(info(vcf)$Cosmic.CNT>2 & |
346 | 345 |
info(vcf)$DB)] <- tmp[6] |
347 | 346 |
} else { |
348 |
- if (verbose) message("Setting somatic prior probabilities ", |
|
349 |
- "for dbSNP hits to ", tmp[2]," or to ", tmp[1], " otherwise.") |
|
347 |
+ flog.info("Setting somatic prior probabilities for dbSNP hits to %f or to %f otherwise.", |
|
348 |
+ tmp[2], tmp[1]) |
|
350 | 349 |
} |
351 | 350 |
} |
352 | 351 |
prior.somatic |
... | ... |
@@ -452,7 +451,7 @@ function(vcf, tumor.id.in.vcf, allowed=0.05) { |
452 | 451 |
} |
453 | 452 |
return(TRUE) |
454 | 453 |
} |
455 |
-.readAndCheckVcf <- function(vcf.file, genome, verbose) { |
|
454 |
+.readAndCheckVcf <- function(vcf.file, genome) { |
|
456 | 455 |
if (class(vcf.file) == "character") { |
457 | 456 |
vcf <- readVcf(vcf.file, genome) |
458 | 457 |
} else if (class(vcf.file) != "CollapsedVCF") { |
... | ... |
@@ -465,7 +464,7 @@ function(vcf, tumor.id.in.vcf, allowed=0.05) { |
465 | 464 |
if (sum(triAllelic)) { |
466 | 465 |
n <- nrow(vcf) |
467 | 466 |
vcf <- vcf[which(!triAllelic)] |
468 |
- if (verbose) message("Removing ",n-nrow(vcf), " triallelic sites.") |
|
467 |
+ flog.info("Removing %i triallelic sites.",n-nrow(vcf)) |
|
469 | 468 |
} |
470 | 469 |
if (is.null(info(vcf)$DB)) { |
471 | 470 |
# try to add an DB field based on rownames |
... | ... |
@@ -525,9 +524,9 @@ function(vcf, tumor.id.in.vcf, allowed=0.05) { |
525 | 524 |
vcf |
526 | 525 |
} |
527 | 526 |
|
528 |
-.addCosmicCNT <- function(vcf, cosmic.vcf.file, verbose=TRUE) { |
|
527 |
+.addCosmicCNT <- function(vcf, cosmic.vcf.file) { |
|
529 | 528 |
if (!is.null(info(vcf)$Cosmic.CNT)) { |
530 |
- if (verbose) message("VCF already COSMIC annotated. Skipping.") |
|
529 |
+ flog.info("VCF already COSMIC annotated. Skipping.") |
|
531 | 530 |
return(vcf) |
532 | 531 |
} |
533 | 532 |
cosmicSeqStyle <- seqlevelsStyle(headerTabix( |
... | ... |
@@ -537,7 +536,7 @@ function(vcf, tumor.id.in.vcf, allowed=0.05) { |
537 | 536 |
if (!length(intersect(seqlevelsStyle(vcf), cosmicSeqStyle))) { |
538 | 537 |
seqlevelsStyle(vcfRenamedSL) <- cosmicSeqStyle[1] |
539 | 538 |
} |
540 |
- if (verbose) message("Reading COSMIC VCF...") |
|
539 |
+ flog.info("Reading COSMIC VCF...") |
|
541 | 540 |
# look-up the variants in COSMIC |
542 | 541 |
cosmic.vcf <- readVcf(cosmic.vcf.file, genome=genome(vcf)[1], |
543 | 542 |
ScanVcfParam(which = rowRanges(vcfRenamedSL), |
... | ... |
@@ -569,20 +568,18 @@ function(vcf, tumor.id.in.vcf, allowed=0.05) { |
569 | 568 |
vcf |
570 | 569 |
} |
571 | 570 |
|
572 |
-.annotateVcfTarget <- function(vcf, target.granges, interval.padding, verbose) { |
|
571 |
+.annotateVcfTarget <- function(vcf, target.granges, interval.padding) { |
|
573 | 572 |
target.granges.padding <- target.granges |
574 | 573 |
start(target.granges.padding) <- start(target.granges.padding)-interval.padding |
575 | 574 |
end(target.granges.padding) <- end(target.granges.padding)+interval.padding |
576 | 575 |
.calcTargetedGenome <- function(granges) { |
577 | 576 |
tmp <- reduce(granges) |
578 |
- round(sum(width(tmp))/(1000^2),digits=2) |
|
579 |
- } |
|
580 |
- if (verbose) { |
|
581 |
- message("Total size of targeted genomic region: ", |
|
582 |
- .calcTargetedGenome(target.granges), "Mb (", |
|
583 |
- .calcTargetedGenome(target.granges.padding), |
|
584 |
- "Mb with ", interval.padding, "bp padding)") |
|
577 |
+ sum(width(tmp))/(1000^2) |
|
585 | 578 |
} |
579 |
+ flog.info("Total size of targeted genomic region: %.2fMb (%.2fMb with %ibp padding).", |
|
580 |
+ .calcTargetedGenome(target.granges), |
|
581 |
+ .calcTargetedGenome(target.granges.padding), |
|
582 |
+ interval.padding) |
|
586 | 583 |
|
587 | 584 |
idxTarget <- overlapsAny(vcf, target.granges) |
588 | 585 |
idxPadding <- overlapsAny(vcf, target.granges.padding) |
... | ... |
@@ -596,16 +593,17 @@ function(vcf, tumor.id.in.vcf, allowed=0.05) { |
596 | 593 |
info(vcf)$OnTarget <- 0 |
597 | 594 |
info(vcf)$OnTarget[idxPadding] <- 2 |
598 | 595 |
info(vcf)$OnTarget[idxTarget] <- 1 |
599 |
- if (verbose) { |
|
600 |
- targetsWithSNVs <- overlapsAny(target.granges.padding, vcf) |
|
601 |
- percentTargetsWithSNVs <- round(sum(targetsWithSNVs,na.rm=TRUE)/ |
|
602 |
- length(targetsWithSNVs)*100, digits=1) |
|
603 |
- tmp <- "" |
|
604 |
- if (percentTargetsWithSNVs > 20) { |
|
605 |
- tmp <- " segmentationPSCBS might produce better results." |
|
606 |
- } |
|
607 |
- message(percentTargetsWithSNVs,"% of targets contain heterozygous ", |
|
608 |
- "SNVs.",tmp) |
|
609 |
- } |
|
596 |
+ |
|
597 |
+ # report stats in log file |
|
598 |
+ targetsWithSNVs <- overlapsAny(target.granges.padding, vcf) |
|
599 |
+ percentTargetsWithSNVs <- sum(targetsWithSNVs,na.rm=TRUE)/ |
|
600 |
+ length(targetsWithSNVs)*100 |
|
601 |
+ tmp <- "" |
|
602 |
+ if (percentTargetsWithSNVs > 20) { |
|
603 |
+ tmp <- " segmentationPSCBS might produce better results." |
|
604 |
+ } |
|
605 |
+ flog.info("%.1f%% of targets contain SNVs. %s", |
|
606 |
+ percentTargetsWithSNVs, tmp) |
|
607 |
+ |
|
610 | 608 |
vcf |
611 | 609 |
} |
... | ... |
@@ -22,7 +22,6 @@ |
22 | 22 |
#' @param pool.weights Either find good pooling weights by optimization or |
23 | 23 |
#' weight all best normals equally. |
24 | 24 |
#' @param plot.pool Allows the pooling function to create plots. |
25 |
-#' @param verbose Verbose output. |
|
26 | 25 |
#' @param \dots Additional arguments passed to \code{\link{poolCoverage}}. |
27 | 26 |
#' @return Filename of the best matching normal. |
28 | 27 |
#' @author Markus Riester |
... | ... |
@@ -48,7 +47,7 @@ |
48 | 47 |
findBestNormal <- function(tumor.coverage.file, normalDB, pcs=1:3, |
49 | 48 |
num.normals = 1, ignore.sex = FALSE, sex = NULL, |
50 | 49 |
normal.coverage.files = NULL, pool = FALSE, |
51 |
- pool.weights = c("voom", "equal"), plot.pool = FALSE, verbose = TRUE, |
|
50 |
+ pool.weights = c("voom", "equal"), plot.pool = FALSE, |
|
52 | 51 |
...) { |
53 | 52 |
if (is.character(tumor.coverage.file)) { |
54 | 53 |
tumor <- readCoverageGatk(tumor.coverage.file) |
... | ... |
@@ -71,15 +70,15 @@ findBestNormal <- function(tumor.coverage.file, normalDB, pcs=1:3, |
71 | 70 |
if (!ignore.sex && !is.null(normalDB$sex) && |
72 | 71 |
sum(!is.na(normalDB$sex))>0) { |
73 | 72 |
if (is.null(sex)) { |
74 |
- sex <- getSexFromCoverage(tumor, verbose=FALSE) |
|
73 |
+ sex <- getSexFromCoverage(tumor) |
|
75 | 74 |
} |
76 |
- if (verbose) message("Sex of sample: ", sex) |
|
75 |
+ flog.info("Sample sex: %s", sex) |
|
77 | 76 |
if (!is.na(sex)) { |
78 | 77 |
idx.normals <- which(normalDB$sex == sex) |
79 | 78 |
} |
80 | 79 |
if (length(idx.normals) < 2) { |
81 |
- warning("Not enough samples of sex ", sex, |
|
82 |
- " in database. Ignoring sex.") |
|
80 |
+ flog.warn("Not enough samples of sex %s %s", sex, |
|
81 |
+ "in database. Ignoring sex.") |
|
83 | 82 |
idx.normals <- seq_along(normalDB$normal.coverage.files) |
84 | 83 |
} |
85 | 84 |
} |
... | ... |
@@ -98,10 +97,8 @@ findBestNormal <- function(tumor.coverage.file, normalDB, pcs=1:3, |
98 | 97 |
if (pool) { |
99 | 98 |
normals <- lapply(normal.coverage.files, readCoverageGatk) |
100 | 99 |
pool.weights <- match.arg(pool.weights) |
101 |
- if (verbose) { |
|
102 |
- message("Pooling ", paste(basename(normal.coverage.files), |
|
103 |
- collapse=", ")) |
|
104 |
- } |
|
100 |
+ flog.info("Pooling %s.", paste(basename(normal.coverage.files), |
|
101 |
+ collapse=", ")) |
|
105 | 102 |
w <- NULL |
106 | 103 |
if (pool.weights == "voom" && num.normals > 1) { |
107 | 104 |
logRatio <- .voomLogRatio(tumor, |
... | ... |
@@ -19,7 +19,6 @@ |
19 | 19 |
#' be considered when setting cutoffs. |
20 | 20 |
#' @param remove.outliers Removes coverage outliers before calculating mean |
21 | 21 |
#' chromosome coverages. |
22 |
-#' @param verbose Verbose output. |
|
23 | 22 |
#' @return Returns a \code{character(1)} with \code{M} for male, \code{F} for |
24 | 23 |
#' female, or \code{NA} if unknown. |
25 | 24 |
#' @author Markus Riester |
... | ... |
@@ -32,7 +31,7 @@ |
32 | 31 |
#' |
33 | 32 |
#' @export getSexFromCoverage |
34 | 33 |
getSexFromCoverage <- function(coverage.file, min.ratio = 25, min.ratio.na = 20, |
35 |
- remove.outliers = TRUE, verbose = TRUE) { |
|
34 |
+ remove.outliers = TRUE) { |
|
36 | 35 |
if (is.character(coverage.file)) { |
37 | 36 |
x <- readCoverageGatk(coverage.file) |
38 | 37 |
} else { |
... | ... |
@@ -51,7 +50,7 @@ getSexFromCoverage <- function(coverage.file, min.ratio = 25, min.ratio.na = 20, |
51 | 50 |
} |
52 | 51 |
|
53 | 52 |
if (is.na(avg.coverage[sex.chr[1]]) || is.na(avg.coverage[sex.chr[2]]) ) { |
54 |
- if (verbose) message( |
|
53 |
+ flog.warn( |
|
55 | 54 |
"Allosome coverage appears to be missing, cannot determine sex.") |
56 | 55 |
return(NA) |
57 | 56 |
} |
... | ... |
@@ -60,18 +59,13 @@ getSexFromCoverage <- function(coverage.file, min.ratio = 25, min.ratio.na = 20, |
60 | 59 |
names(avg.coverage))],na.rm=TRUE) |
61 | 60 |
autosome.ratio <- avg.autosome.coverage/(avg.coverage[sex.chr[1]]+0.0001) |
62 | 61 |
if (autosome.ratio > 5) { |
63 |
- if (verbose) message( |
|
64 |
- "Allosome coverage very low, cannot determine sex.") |
|
62 |
+ flog.info("Allosome coverage very low, cannot determine sex.") |
|
65 | 63 |
return(NA) |
66 | 64 |
} |
67 | 65 |
XY.ratio <- avg.coverage[sex.chr[1]]/ (avg.coverage[sex.chr[2]]+ 0.0001) |
68 |
- if (verbose) { |
|
69 |
- message("Mean coverages:", |
|
70 |
- " chrX: ", round(avg.coverage[sex.chr[1]], digits=2), |
|
71 |
- " chrY: ", round(avg.coverage[sex.chr[2]], digits=2), |
|
72 |
- " chr1-22: ",round(avg.autosome.coverage, digits=2),"." |
|
73 |
- ) |
|
74 |
- } |
|
66 |
+ flog.info("Mean coverages: chrX: %.2f, chrY: %.2f, chr1-22: %.2f.", |
|
67 |
+ avg.coverage[sex.chr[1]], avg.coverage[sex.chr[2]], |
|
68 |
+ avg.autosome.coverage) |
|
75 | 69 |
if (XY.ratio > min.ratio) return("F") |
76 | 70 |
if (XY.ratio > min.ratio.na) return(NA) |
77 | 71 |
return("M") |
... | ... |
@@ -112,7 +106,6 @@ getSexFromCoverage <- function(coverage.file, min.ratio = 25, min.ratio.na = 20, |
112 | 106 |
#' homozygous. |
113 | 107 |
#' @param af.cutoff Remove all SNVs with allelic fraction lower than the |
114 | 108 |
#' specified value. |
115 |
-#' @param verbose Verbose output. |
|
116 | 109 |
#' @return Returns a \code{character(1)} with \code{M} for male, \code{F} for |
117 | 110 |
#' female, or \code{NA} if unknown. |
118 | 111 |
#' @author Markus Riester |
... | ... |
@@ -129,7 +122,7 @@ getSexFromCoverage <- function(coverage.file, min.ratio = 25, min.ratio.na = 20, |
129 | 122 |
#' @importFrom stats fisher.test |
130 | 123 |
getSexFromVcf <- function(vcf, tumor.id.in.vcf=NULL, min.or = 4, |
131 | 124 |
min.or.na = 2.5, max.pv = 0.001, homozygous.cutoff = 0.95, |
132 |
- af.cutoff = 0.2, verbose=TRUE) { |
|
125 |
+ af.cutoff = 0.2) { |
|
133 | 126 |
if (is.null(tumor.id.in.vcf)) { |
134 | 127 |
tumor.id.in.vcf <- .getTumorIdInVcf(vcf) |
135 | 128 |
} |
... | ... |
@@ -144,41 +137,34 @@ getSexFromVcf <- function(vcf, tumor.id.in.vcf=NULL, min.or = 4, |
144 | 137 |
|
145 | 138 |
homozygous <- geno(vcf)$FA[,tumor.id.in.vcf] > homozygous.cutoff |
146 | 139 |
if ( sum(homozygous)/length(homozygous) < 0.001 ) { |
147 |
- if (verbose) { |
|
148 |
- message("No homozygous variants in VCF, provide unfiltered VCF.") |
|
149 |
- } |
|
140 |
+ flog.info("No homozygous variants in VCF, provide unfiltered VCF.") |
|
150 | 141 |
return(NA) |
151 | 142 |
} |
152 | 143 |
|
153 | 144 |
if (!sum(chrX)) { |
154 |
- if (verbose) { |
|
155 |
- message("No variants on chrX in VCF.") |
|
156 |
- } |
|
145 |
+ flog.info("No variants on chrX in VCF.") |
|
157 | 146 |
return(NA) |
158 | 147 |
} |
159 | 148 |
res <- fisher.test(homozygous, as.vector(chrX)) |
160 |
- if (verbose) message(sum( homozygous & as.vector(chrX)), |
|
161 |
- " homozygous and ", sum( !homozygous & as.vector(chrX)), |
|
162 |
- " heterozygous variants on chromosome X.") |
|
149 |
+ flog.info("%i homozygous and %i heterozygous variants on chrX.", |
|
150 |
+ sum( homozygous & as.vector(chrX)), |
|
151 |
+ sum( !homozygous & as.vector(chrX))) |
|
163 | 152 |
sex <- "F" |
164 | 153 |
if (res$estimate >= min.or.na) sex <- NA |
165 | 154 |
if (res$estimate >= min.or && res$p.value > max.pv) sex <- NA |
166 | 155 |
if (res$p.value <= max.pv && res$estimate >= min.or) sex <- "M" |
167 |
- if (verbose) { |
|
168 |
- message("Sex from VCF: ", sex, " (Fisher's p-value: ", |
|
169 |
- ifelse(res$p.value < 0.0001, "< 0.0001", |
|
170 |
- round(res$p.value, digits=3)), |
|
171 |
- " odds-ratio: ", round(res$estimate, digits=2), ")") |
|
172 |
- } |
|
156 |
+ flog.info("Sex from VCF: %s (Fisher's p-value: %s, odds-ratio: %.2f).", |
|
157 |
+ sex, ifelse(res$p.value < 0.0001, "< 0.0001", round(res$p.value, digits=3)), |
|
158 |
+ res$estimate) |
|
173 | 159 |
return(sex) |
174 | 160 |
} |
175 | 161 |
|
176 | 162 |
.getSex <- function(sex, normal, tumor) { |
177 | 163 |
if (sex != "?") return(sex) |
178 |
- sex.tumor <- getSexFromCoverage(tumor, verbose=FALSE) |
|
179 |
- sex.normal <- getSexFromCoverage(normal, verbose=FALSE) |
|
164 |
+ sex.tumor <- getSexFromCoverage(tumor) |
|
165 |
+ sex.normal <- getSexFromCoverage(normal) |
|
180 | 166 |
if (!identical(sex.tumor, sex.normal)) { |
181 |
- warning("Sex tumor/normal mismatch: tumor = ", sex.tumor, |
|
167 |
+ flog.warn("Sex tumor/normal mismatch: tumor = ", sex.tumor, |
|
182 | 168 |
" normal = ", sex.normal) |
183 | 169 |
} |
184 | 170 |
sex <- sex.tumor |
... | ... |
@@ -57,6 +57,7 @@ |
57 | 57 |
#' @importFrom graphics abline axis boxplot contour hist image |
58 | 58 |
#' legend lines par plot text mtext polygon points |
59 | 59 |
#' rect strwidth symbols barplot |
60 |
+#' @importFrom ggplot2 geom_boxplot geom_hline labs |
|
60 | 61 |
plotAbs <- function(res, ids = NULL, |
61 | 62 |
type = c("hist", "overview", "overview2", "BAF", "AF", "volcano", "all"), |
62 | 63 |
chr = NULL, germline.only = TRUE, show.contour = FALSE, purity = NULL, |
... | ... |
@@ -152,7 +153,7 @@ max.mapping.bias = 0.8, palette.name = "Paired", ... ) { |
152 | 153 |
"Purity:", round(res$results[[i]]$purity[[1]], digits=2), |
153 | 154 |
" Tumor ploidy:", round( res$results[[i]]$ploidy, digits=3), |
154 | 155 |
" SNV log-likelihood:", |
155 |
- round(res$results[[i]]$SNV.posterior$beta$llik, digits=2), |
|
156 |
+ round(res$results[[i]]$SNV.posterior$beta.model$llik, digits=2), |
|
156 | 157 |
" GoF:", r2, |
157 | 158 |
" Mean coverage:", |
158 | 159 |
paste(round(apply(geno(res$input$vcf)$DP,2,mean)), |
... | ... |
@@ -315,7 +316,7 @@ max.mapping.bias = 0.8, palette.name = "Paired", ... ) { |
315 | 316 |
r <- .getVariantPosteriors(res, i, max.mapping.bias) |
316 | 317 |
if (is.null(r)) next |
317 | 318 |
|
318 |
- vcf <- res$input$vcf[res$results[[i]]$SNV.posterior$beta$vcf.ids] |
|
319 |
+ vcf <- res$input$vcf[res$results[[i]]$SNV.posterior$beta.model$vcf.ids] |
|
319 | 320 |
# brwer.pal requires at least 3 levels |
320 | 321 |
|
321 | 322 |
r <- .getAFPlotGroups(r, is.null(info(vcf)$SOMATIC)) |
... | ... |
@@ -418,7 +419,15 @@ max.mapping.bias = 0.8, palette.name = "Paired", ... ) { |
418 | 419 |
legend("topright", legend=as.character(mycol.palette$group), |
419 | 420 |
col=mycol.palette$color, |
420 | 421 |
pch=mycol.palette$pch, cex=0.8) |
421 |
- |
|
422 |
+ |
|
423 |
+ estimatedContRate <- |
|
424 |
+ res$results[[i]]$SNV.posterior$beta.model$posterior.contamination |
|
425 |
+ if (!is.null(estimatedContRate) && |
|
426 |
+ estimatedContRate > min(r$AR[r$ML.SOMATIC])) { |
|
427 |
+ abline(h=estimatedContRate, col="red") |
|
428 |
+ text(x=mylogratio.xlim[1], y=estimatedContRate+0.02, |
|
429 |
+ labels="Contamination", col="red", pos=4) |
|
430 |
+ } |
|
422 | 431 |
text(x=peak.ideal.means[ |
423 | 432 |
as.character(r$ML.C[r$ML.SOMATIC])][idx.labels], |
424 | 433 |
y=r$ML.AR[r$ML.SOMATIC][idx.labels], |
... | ... |
@@ -597,7 +606,7 @@ ss) { |
597 | 606 |
} |
598 | 607 |
|
599 | 608 |
.getVariantPosteriors <- function(res, i, max.mapping.bias=NULL) { |
600 |
- r <- res$results[[i]]$SNV.posterior$beta.model$posterior |
|
609 |
+ r <- res$results[[i]]$SNV.posterior$beta.model$posteriors |
|
601 | 610 |
if (!is.null(r) && !is.null(max.mapping.bias)) { |
602 | 611 |
r <- r[r$MAPPING.BIAS >= max.mapping.bias,] |
603 | 612 |
} |
... | ... |
@@ -607,22 +616,56 @@ ss) { |
607 | 616 |
.getAFPlotGroups <- function(r, single.mode) { |
608 | 617 |
if (single.mode) { |
609 | 618 |
groupLevels <- c("dbSNP/germline", "dbSNP/somatic", "novel/somatic", |
610 |
- "novel/germline", "COSMIC/germline", "COSMIC/somatic") |
|
619 |
+ "novel/germline", "COSMIC/germline", "COSMIC/somatic", "contamination") |
|
611 | 620 |
r$group <- groupLevels[1] |
612 | 621 |
r$group[r$prior.somatic < 0.1 & r$ML.SOMATIC] <- groupLevels[2] |
613 | 622 |
r$group[r$prior.somatic >= 0.1 & r$ML.SOMATIC] <- groupLevels[3] |
614 | 623 |
r$group[r$prior.somatic >= 0.1 & !r$ML.SOMATIC] <- groupLevels[4] |
615 | 624 |
r$group[r$prior.somatic >= 0.9 & !r$ML.SOMATIC] <- groupLevels[5] |
616 | 625 |
r$group[r$prior.somatic >= 0.9 & r$ML.SOMATIC] <- groupLevels[6] |
626 |
+ r$group[r$GERMLINE.CONTLOW > 0.5 | |
|
627 |
+ r$GERMLINE.CONTHIGH > 0.5] <- groupLevels[7] |
|
617 | 628 |
r$group <- factor(r$group, levels=groupLevels) |
618 | 629 |
return(r) |
619 | 630 |
} |
620 | 631 |
groupLevels <- c("germline", "germline/ML somatic", "somatic", |
621 |
- "somatic/ML germline") |
|
632 |
+ "somatic/ML germline", "contamination") |
|
622 | 633 |
r$group <- groupLevels[1] |
623 | 634 |
r$group[r$prior.somatic < 0.1 & r$ML.SOMATIC] <- groupLevels[2] |
624 | 635 |
r$group[r$prior.somatic >= 0.1 & r$ML.SOMATIC] <- groupLevels[3] |
625 | 636 |
r$group[r$prior.somatic >= 0.1 & !r$ML.SOMATIC] <- groupLevels[4] |
637 |
+ r$group[r$GERMLINE.CONTLOW > 0.5 | |
|
638 |
+ r$GERMLINE.CONTHIGH > 0.5] <- groupLevels[5] |
|
626 | 639 |
r$group <- factor(r$group, levels=groupLevels) |
627 | 640 |
r |
628 | 641 |
} |
642 |
+ |
|
643 |
+ |
|
644 |
+.plotContamination <- function(pp, max.mapping.bias=NULL, plot=TRUE) { |
|
645 |
+ if (is.null(max.mapping.bias)) max.mapping.bias=0 |
|
646 |
+ |
|
647 |
+ idx <- pp$GERMLINE.CONTHIGH+pp$GERMLINE.CONTLOW > 0.5 & |
|
648 |
+ pp$MAPPING.BIAS >= max.mapping.bias |
|
649 |
+ if (!length(which(idx))) return(0) |
|
650 |
+ df <- data.frame( |
|
651 |
+ chr=pp$chr[idx], |
|
652 |
+ AR=sapply(pp$AR.ADJUSTED[idx], function(x) ifelse(x>0.5, 1-x,x)), |
|
653 |
+ HIGHLOW=ifelse(pp$GERMLINE.CONTHIGH>pp$GERMLINE.CONTLOW, |
|
654 |
+ "HIGH", "LOW")[idx] |
|
655 |
+ ) |
|
656 |
+ df$chr <- factor(df$chr, levels=unique(df$chr)) |
|
657 |
+ # take the chromosome median and then average. the low count |
|
658 |
+ # might be biased in case contamination rate is < AR cutoff |
|
659 |
+ estimatedRate <- weighted.mean( |
|
660 |
+ sapply(split(df$AR, df$HIGHLOW), median), |
|
661 |
+ sapply(split(df$AR, df$HIGHLOW), length) |
|
662 |
+ ) |
|
663 |
+ if (plot) { |
|
664 |
+ gp <- ggplot(df, aes_string(x="chr",y="AR",fill="HIGHLOW"))+geom_boxplot()+ |
|
665 |
+ geom_hline(yintercept=estimatedRate, color="grey", linetype="dashed")+ |
|
666 |
+ labs(fill="") |
|
667 |
+ print(gp) |
|
668 |
+ } |
|
669 |
+ estimatedRate |
|
670 |
+} |
|
671 |
+ |
... | ... |
@@ -19,8 +19,8 @@ |
19 | 19 |
#' mutations.} \item{k}{Minimum number of supporting reads.} \item{f}{Expected |
20 | 20 |
#' allelic fraction. } |
21 | 21 |
#' @author Markus Riester |
22 |
-#' @references Carter et al., Absolute quantification of somatic DNA |
|
23 |
-#' alterations in human cancer. Nature Biotechnology 2012. |
|
22 |
+#' @references Carter et al. (2012), Absolute quantification of somatic DNA |
|
23 |
+#' alterations in human cancer. Nature Biotechnology. |
|
24 | 24 |
#' @examples |
25 | 25 |
#' |
26 | 26 |
#' purity <- c(0.1,0.15,0.2,0.25,0.4,0.6,1) |
... | ... |
@@ -15,7 +15,6 @@ |
15 | 15 |
#' be used to automatically ignore unlikely solutions. |
16 | 16 |
#' @param max.ploidy Maximum ploidy to be considered. If \code{NULL}, all. Can |
17 | 17 |
#' be used to automatically ignore unlikely solutions. |
18 |
-#' @param verbose Verbose output. |
|
19 | 18 |
#' @return The return value of the corresponding \code{\link{runAbsoluteCN}} |
20 | 19 |
#' call, but with the results array manipulated according the curation CSV file |
21 | 20 |
#' and arguments of this function. |
... | ... |
@@ -35,8 +34,8 @@ |
35 | 34 |
readCurationFile <- function(file.rds, |
36 | 35 |
file.curation = gsub(".rds$", ".csv", file.rds), |
37 | 36 |
remove.failed = FALSE, report.best.only=FALSE, min.ploidy = NULL, |
38 |
-max.ploidy = NULL, verbose=FALSE) { |
|
39 |
- if (verbose) message("Reading ", file.rds, "...") |
|
37 |
+max.ploidy = NULL) { |
|
38 |
+ flog.info("Reading %s...", file.rds) |
|
40 | 39 |
res <- readRDS(file.rds) |
41 | 40 |
curation <- read.csv(file.curation, as.is=TRUE, nrows=1) |
42 | 41 |
.checkLogical <- function(field) { |
... | ... |
@@ -58,7 +58,7 @@ |
58 | 58 |
#' \code{\link{filterVcfBasic}}. |
59 | 59 |
#' @param args.filterVcf Arguments for variant filtering function. Arguments |
60 | 60 |
#' \code{vcf}, \code{tumor.id.in.vcf}, \code{min.coverage}, |
61 |
-#' \code{model.homozygous}, \code{error} and \code{verbose} are required in the |
|
61 |
+#' \code{model.homozygous} and \code{error} are required in the |
|
62 | 62 |
#' filter function and are automatically set. |
63 | 63 |
#' @param fun.setPriorVcf Function to set prior for somatic status for each |
64 | 64 |
#' variant in the VCF. Defaults to \code{\link{setPriorVcf}}. |
... | ... |
@@ -70,15 +70,15 @@ |
70 | 70 |
#' coverage files. Needs to return a \code{logical} vector whether an interval |
71 | 71 |
#' should be used for segmentation. Defaults to \code{\link{filterTargets}}. |
72 | 72 |
#' @param args.filterTargets Arguments for target filtering function. Arguments |
73 |
-#' \code{log.ratio}, \code{tumor}, \code{gc.data}, \code{seg.file}, |
|
74 |
-#' \code{normalDB} and \code{verbose} are required and automatically set |
|
73 |
+#' \code{log.ratio}, \code{tumor}, \code{gc.data}, \code{seg.file} and |
|
74 |
+#' \code{normalDB} are required and automatically set |
|
75 | 75 |
#' @param fun.segmentation Function for segmenting the copy number log-ratios. |
76 | 76 |
#' Expected return value is a \code{data.frame} representation of the |
77 | 77 |
#' segmentation. Defaults to \code{\link{segmentationCBS}}. |
78 | 78 |
#' @param args.segmentation Arguments for segmentation function. Arguments |
79 |
-#' \code{normal}, \code{tumor}, \code{log.ratio}, \code{plot.cnv}, |
|
79 |
+#' \code{normal}, \code{tumor}, \code{log.ratio}, \code{plot.cnv} and |
|
80 | 80 |
#' \code{min.coverage}, \code{sampleid}, \code{vcf}, \code{tumor.id.in.vcf}, |
81 |
-#' \code{centromeres} and \code{verbose} are required in the segmentation function |
|
81 |
+#' \code{centromeres} are required in the segmentation function |
|
82 | 82 |
#' and automatically set. |
83 | 83 |
#' @param fun.focal Function for identifying focal amplifications. Defaults to |
84 | 84 |
#' \code{\link{findFocal}}. |
... | ... |
@@ -133,6 +133,8 @@ |
133 | 133 |
#' \code{sd(log.ratio)*log.ratio.calibration}. |
134 | 134 |
#' @param remove.off.target.snvs Deprecated. Use the corresponding argument in |
135 | 135 |
#' \code{args.filterVcf}. |
136 |
+#' @param smooth.log.ratio Smooth \code{log.ratio} using the \code{DNAcopy} |
|
137 |
+#' package. |
|
136 | 138 |
#' @param model.homozygous Homozygous germline SNPs are uninformative and by |
137 | 139 |
#' default removed. In 100 percent pure samples such as cell lines, however, |
138 | 140 |
#' heterozygous germline SNPs appear homozygous in case of LOH. Setting this |
... | ... |
@@ -171,11 +173,19 @@ |
171 | 173 |
#' typically result in a slightly more accurate purity, especially for rather |
172 | 174 |
#' silent genomes or very low purities. Otherwise, it will just use the purity |
173 | 175 |
#' determined via the SCNA-fit. |
176 |
+#' @param log.file If not \code{NULL}, store verbose output to file. |
|
174 | 177 |
#' @param verbose Verbose output. |
175 | 178 |
#' @return A list with elements \item{candidates}{Results of the grid search.} |
176 | 179 |
#' \item{results}{All local optima, sorted by final rank.} \item{input}{The |
177 | 180 |
#' input data.} |
178 | 181 |
#' @author Markus Riester |
182 |
+#' @references Riester et al. (2016). PureCN: Copy number calling and SNV |
|
183 |
+#' classification using targeted short read sequencing. Source Code for Biology |
|
184 |
+#' and Medicine, 11, pp. 13. |
|
185 |
+#' |
|
186 |
+#' Carter et al. (2012), Absolute quantification of somatic DNA alterations in |
|
187 |
+#' human cancer. Nature Biotechnology. |
|
188 |
+#' |
|
179 | 189 |
#' @seealso \code{\link{correctCoverageBias}} \code{\link{segmentationCBS}} |
180 | 190 |
#' \code{\link{calculatePowerDetectSomatic}} |
181 | 191 |
#' @examples |
... | ... |
@@ -221,6 +231,8 @@ |
221 | 231 |
#' @importFrom utils data read.delim tail packageVersion |
222 | 232 |
#' @importFrom S4Vectors queryHits subjectHits DataFrame |
223 | 233 |
#' @importFrom data.table data.table |
234 |
+#' @importFrom futile.logger flog.info flog.warn flog.fatal |
|
235 |
+#' flog.threshold flog.appender appender.tee |
|
224 | 236 |
runAbsoluteCN <- function(normal.coverage.file = NULL, |
225 | 237 |
tumor.coverage.file = NULL, log.ratio = NULL, seg.file = NULL, |
226 | 238 |
seg.file.sdev = 0.4, vcf.file = NULL, normalDB = NULL, genome, |
... | ... |
@@ -237,29 +249,32 @@ runAbsoluteCN <- function(normal.coverage.file = NULL, |
237 | 249 |
candidates = NULL, min.coverage = 15, max.coverage.vcf = 300, |
238 | 250 |
max.non.clonal = 0.2, max.homozygous.loss = c(0.05, 1e07) , non.clonal.M = 1/3, |
239 | 251 |
max.mapping.bias = 0.8, iterations = 30, log.ratio.calibration = 0.25, |
240 |
- remove.off.target.snvs = NULL, model.homozygous = FALSE, error = 0.001, |
|
241 |
- gc.gene.file = NULL, max.dropout = c(0.95, 1.1), max.logr.sdev = 0.75, |
|
252 |
+ smooth.log.ratio = TRUE, remove.off.target.snvs = NULL, |
|
253 |
+ model.homozygous = FALSE, error = 0.001, gc.gene.file = NULL, |
|
254 |
+ max.dropout = c(0.95, 1.1), max.logr.sdev = 0.75, |
|
242 | 255 |
max.segments = 300, min.gof = 0.8, plot.cnv = TRUE, cosmic.vcf.file = NULL, |
243 |
- post.optimize = FALSE, verbose = TRUE) { |
|
244 |
- debug <- FALSE |
|
245 |
- |
|
256 |
+ post.optimize = FALSE, log.file = NULL, verbose = TRUE) { |
|
257 |
+ |
|
258 |
+ if (!verbose) flog.threshold("WARN") |
|
259 |
+ if (!is.null(log.file)) flog.appender(appender.tee(log.file)) |
|
260 |
+ |
|
261 |
+ # log function arguments |
|
262 |
+ try(.logHeader(as.list(match.call())[-1]), silent=TRUE) |
|
263 |
+ |
|
246 | 264 |
# TODO: remove in PureCN 1.8 |
247 | 265 |
if (!is.null(remove.off.target.snvs)) { |
248 | 266 |
args.filterVcf$remove.off.target.snvs <- remove.off.target.snvs |
249 |
- message("remove.off.target.snvs is deprecated. ", |
|
250 |
- "Please use it in args.filterVcf instead.") |
|
267 |
+ flog.warn("remove.off.target.snvs is deprecated. Please use it in args.filterVcf instead.") |
|
251 | 268 |
} |
252 | 269 |
# TODO: remove in PureCN 1.8 |
253 | 270 |
if (length(max.homozygous.loss)==1){ |
254 | 271 |
max.homozygous.loss <- c(max.homozygous.loss, 1e07) |
255 |
- message("max.homozygous.loss now a double(2) vector. ", |
|
256 |
- "Please provide both values.") |
|
272 |
+ flog.warn("max.homozygous.loss now a double(2) vector. Please provide both values.") |
|
257 | 273 |
} |
258 | 274 |
# TODO: remove in PureCN 1.8 |
259 | 275 |
if (is.null(normalDB) && !is.null(args.filterTargets$normalDB)) { |
260 | 276 |
normalDB <- args.filterTargets$normalDB |
261 |
- message("normalDB now a runAbsoluteCN argument. ", |
|
262 |
- "Please provide it there, not in args.filterTargets.") |
|
277 |
+ flog.warn("normalDB now a runAbsoluteCN argument. Please provide it there, not in args.filterTargets.") |
|
263 | 278 |
} |
264 | 279 |
centromeres <- .getCentromerePositions(centromeres, genome) |
265 | 280 |
|
... | ... |
@@ -274,8 +289,7 @@ runAbsoluteCN <- function(normal.coverage.file = NULL, |
274 | 289 |
|
275 | 290 |
test.num.copy <- sort(test.num.copy) |
276 | 291 |
|
277 |
- if (verbose) |
|
278 |
- message("Loading GATK coverage files...") |
|
292 |
+ flog.info("Loading GATK coverage files...") |
|
279 | 293 |
|
280 | 294 |
if (!is.null(normal.coverage.file)) { |
281 | 295 |
if (is.character(normal.coverage.file)) { |
... | ... |
@@ -298,9 +312,6 @@ runAbsoluteCN <- function(normal.coverage.file = NULL, |
298 | 312 |
if (is.null(sampleid)) |
299 | 313 |
sampleid <- basename(tumor.coverage.file) |
300 | 314 |
} else { |
301 |
- if (verbose) |
|
302 |
- message("tumor.coverage.file does not appear to be a filename, assuming", |
|
303 |
- " it is valid GATK coverage data.") |
|
304 | 315 |
tumor <- tumor.coverage.file |
305 | 316 |
if (is.null(sampleid)) |
306 | 317 |
sampleid <- "Sample.1" |
... | ... |
@@ -326,14 +337,15 @@ runAbsoluteCN <- function(normal.coverage.file = NULL, |
326 | 337 |
if (is.null(normal.coverage.file)) |
327 | 338 |
normal <- tumor |
328 | 339 |
log.ratio <- .createFakeLogRatios(tumor, seg.file, chr.hash) |
340 |
+ smooth.log.ratio <- FALSE |
|
329 | 341 |
if (is.null(sampleid)) |
330 |
- sampleid <- read.delim(seg.file)[1, 1] |
|
342 |
+ sampleid <- read.delim(seg.file, as.is=TRUE)[1, 1] |
|
331 | 343 |
} else { |
332 | 344 |
if (is.null(normal.coverage.file)) { |
333 | 345 |
.stopUserError("Need a normal coverage file if log.ratio and seg.file are not", |
334 | 346 |
" provided.") |
335 | 347 |
} |
336 |
- log.ratio <- calculateLogRatio(normal, tumor, verbose = verbose) |
|
348 |
+ log.ratio <- calculateLogRatio(normal, tumor) |
|
337 | 349 |
} |
338 | 350 |
} else { |
339 | 351 |
# the segmentation algorithm will remove targets with low coverage in both tumor |
... | ... |
@@ -362,14 +374,14 @@ runAbsoluteCN <- function(normal.coverage.file = NULL, |
362 | 374 |
} |
363 | 375 |
|
364 | 376 |
args.filterTargets <- c(list(log.ratio = log.ratio, tumor = tumor, |
365 |
- gc.data = gc.data, seg.file = seg.file, normalDB = normalDB, |
|
366 |
- verbose = verbose), args.filterTargets) |
|
377 |
+ gc.data = gc.data, seg.file = seg.file, normalDB = normalDB), |
|
378 |
+ args.filterTargets) |
|
367 | 379 |
|
368 | 380 |
targetsUsed <- do.call(fun.filterTargets, |
369 | 381 |
.checkArgs(args.filterTargets, "filterTargets")) |
370 | 382 |
|
371 | 383 |
# chr.hash is an internal data structure, so we need to do this separately. |
372 |
- targetsUsed <- .filterTargetsChrHash(targetsUsed, tumor, chr.hash, verbose) |
|
384 |
+ targetsUsed <- .filterTargetsChrHash(targetsUsed, tumor, chr.hash) |
|
373 | 385 |
targetsUsed <- which(targetsUsed) |
374 | 386 |
if (nrow(tumor) != nrow(normal) || nrow(tumor) != length(log.ratio) || (!is.null(gc.gene.file) && |
375 | 387 |
nrow(tumor) != nrow(gc.data))) { |
... | ... |
@@ -381,28 +393,31 @@ runAbsoluteCN <- function(normal.coverage.file = NULL, |
381 | 393 |
if (!is.null(gc.gene.file)) { |
382 | 394 |
gc.data <- gc.data[targetsUsed, ] |
383 | 395 |
} |
384 |
- if (verbose) |
|
385 |
- message("Using ", nrow(tumor), " targets.") |
|
396 |
+ flog.info("Using %i targets.", nrow(tumor)) |
|
397 |
+ |
|
398 |
+ if (smooth.log.ratio) { |
|
399 |
+ CNA.obj <- smooth.CNA(CNA(log.ratio, |
|
400 |
+ .strip.chr.name(normal$chr, chr.hash), |
|
401 |
+ floor((normal$probe_start + normal$probe_end)/2), |
|
402 |
+ data.type="logratio", sampleid="sample")) |
|
403 |
+ log.ratio <- CNA.obj$sample |
|
404 |
+ } |
|
386 | 405 |
|
387 | 406 |
dropoutWarning <- FALSE |
388 | 407 |
# clean up noisy targets, but not if the segmentation was already provided. |
389 | 408 |
if (is.null(seg.file)) { |
390 | 409 |
if (!is.null(gc.gene.file)) { |
391 |
- dropoutWarning <- .checkGCBias(normal, tumor, gc.data, max.dropout, verbose) |
|
392 |
- } else if (verbose) { |
|
393 |
- message("No gc.gene.file provided. Cannot check if data was ", |
|
394 |
- "GC-normalized. Was it?") |
|
410 |
+ dropoutWarning <- .checkGCBias(normal, tumor, gc.data, max.dropout) |
|
411 |
+ } else { |
|
412 |
+ flog.info("No gc.gene.file provided. Cannot check if data was GC-normalized. Was it?") |
|
395 | 413 |
} |
396 | 414 |
} |
397 | 415 |
|
398 | 416 |
if (!is.null(gc.gene.file) && is.null(gc.data$Gene)) { |
399 |
- if (verbose) |
|
400 |
- message("No Gene column in gc.gene.file.", |
|
401 |
- " You won't get gene-level calls.") |
|
417 |
+ flog.info("No Gene column in gc.gene.file. You won't get gene-level calls.") |
|
402 | 418 |
gc.gene.file <- NULL |
403 | 419 |
} |
404 | 420 |
|
405 |
- |
|
406 | 421 |
exon.gr <- GRanges(seqnames=tumor$chr, IRanges(start=tumor$probe_start, |
407 | 422 |
end=tumor$probe_end)) |
408 | 423 |
|
... | ... |
@@ -416,9 +431,8 @@ runAbsoluteCN <- function(normal.coverage.file = NULL, |
416 | 431 |
sex.vcf <- NULL |
417 | 432 |
|
418 | 433 |
if (!is.null(vcf.file)) { |
419 |
- if (verbose) |
|
420 |
- message("Loading VCF...") |
|
421 |
- vcf <- .readAndCheckVcf(vcf.file, genome=genome, verbose=verbose) |
|
434 |
+ flog.info("Loading VCF...") |
|
435 |
+ vcf <- .readAndCheckVcf(vcf.file, genome=genome) |
|
422 | 436 |
|
423 | 437 |
if (length(intersect(tumor$chr, seqlevels(vcf))) < 1) { |
424 | 438 |
.stopUserError("Different chromosome names in coverage and VCF.") |
... | ... |
@@ -429,7 +443,6 @@ runAbsoluteCN <- function(normal.coverage.file = NULL, |
429 | 443 |
} |
430 | 444 |
|
431 | 445 |
if (sum(colSums(geno(vcf)$DP) > 0) == 1 && args.filterVcf$use.somatic.status) { |
432 |
- message("VCF file seems to have only one sample. ", "Using SNVs in single mode.") |
|
433 | 446 |
args.filterVcf$use.somatic.status <- FALSE |
434 | 447 |
} |
435 | 448 |
|
... | ... |
@@ -439,23 +452,21 @@ runAbsoluteCN <- function(normal.coverage.file = NULL, |
439 | 452 |
normal.id.in.vcf <- .getNormalIdInVcf(vcf, tumor.id.in.vcf) |
440 | 453 |
} |
441 | 454 |
|
442 |
- if (verbose) |
|
443 |
- message("Assuming ", tumor.id.in.vcf, " is tumor in VCF file.") |
|
455 |
+ flog.info("%s is tumor in VCF file.", tumor.id.in.vcf) |
|
444 | 456 |
if (sex != "diploid") { |
445 |
- sex.vcf <- getSexFromVcf(vcf, tumor.id.in.vcf, verbose = verbose) |
|
457 |
+ sex.vcf <- getSexFromVcf(vcf, tumor.id.in.vcf) |
|
446 | 458 |
if (!is.na(sex.vcf) && sex %in% c("F", "M") && sex.vcf != sex) { |
447 |
- warning("Sex mismatch of coverage and VCF. ", |
|
459 |
+ flog.warn("Sex mismatch of coverage and VCF. %s%s", |
|
448 | 460 |
"Could be because of noisy data, contamination, ", |
449 | 461 |
"loss of chrY or a mis-alignment of coverage and VCF.") |
450 | 462 |
} |
451 | 463 |
} |
452 | 464 |
n.vcf.before.filter <- nrow(vcf) |
453 |
- if (verbose) |
|
454 |
- message("Found ", n.vcf.before.filter, " variants in VCF file.") |
|
465 |
+ flog.info("Found %i variants in VCF file.", n.vcf.before.filter) |
|
455 | 466 |
|
456 | 467 |
args.filterVcf <- c(list(vcf = vcf, tumor.id.in.vcf = tumor.id.in.vcf, |
457 | 468 |
model.homozygous = model.homozygous, error = error, |
458 |
- target.granges = exon.gr, verbose = verbose), args.filterVcf) |
|
469 |
+ target.granges = exon.gr), args.filterVcf) |
|
459 | 470 |
if (is.null(args.filterVcf$min.coverage)) { |
460 | 471 |
args.filterVcf$min.coverage <- min.coverage |
461 | 472 |
} |
... | ... |
@@ -465,34 +476,31 @@ runAbsoluteCN <- function(normal.coverage.file = NULL, |
465 | 476 |
vcf <- vcf.filtering$vcf |
466 | 477 |
|
467 | 478 |
if (!is.null(cosmic.vcf.file)) { |
468 |
- vcf <- .addCosmicCNT(vcf, cosmic.vcf.file, verbose = verbose) |
|
479 |
+ vcf <- .addCosmicCNT(vcf, cosmic.vcf.file) |
|
469 | 480 |
} |
470 | 481 |
|
471 |
- args.setPriorVcf <- c(list(vcf = vcf, tumor.id.in.vcf = tumor.id.in.vcf, |
|
472 |
- verbose = verbose), args.setPriorVcf) |
|
482 |
+ args.setPriorVcf <- c(list(vcf = vcf, tumor.id.in.vcf = tumor.id.in.vcf), |
|
483 |
+ args.setPriorVcf) |
|
473 | 484 |
prior.somatic <- do.call(fun.setPriorVcf, |
474 | 485 |
.checkArgs(args.setPriorVcf, "setPriorVcf")) |
475 | 486 |
|
476 | 487 |
# get mapping bias |
477 | 488 |
args.setMappingBiasVcf$vcf <- vcf |
478 | 489 |
args.setMappingBiasVcf$tumor.id.in.vcf <- tumor.id.in.vcf |
479 |
- args.setMappingBiasVcf$verbose <- verbose |
|
480 | 490 |
mapping.bias <- do.call(fun.setMappingBiasVcf, |
481 | 491 |
.checkArgs(args.setMappingBiasVcf, "setMappingBiasVcf")) |
482 | 492 |
idxHqGermline <- prior.somatic < 0.5 & mapping.bias >= max.mapping.bias |
483 | 493 |
vcf.germline <- vcf[idxHqGermline] |
484 | 494 |
} |
485 | 495 |
|
486 |
- if (verbose) |
|
487 |
- message("Sex of sample: ", sex) |
|
488 |
- if (verbose) |
|
489 |
- message("Segmenting data...") |
|
496 |
+ flog.info("Sample sex: %s", sex) |
|
497 |
+ flog.info("Segmenting data...") |
|
490 | 498 |
|
491 | 499 |
args.segmentation <- c(list(normal = normal, tumor = tumor, log.ratio = log.ratio, |
492 | 500 |
seg = .loadSegFile(seg.file), plot.cnv = plot.cnv, min.coverage = ifelse(is.null(seg.file), |
493 | 501 |
min.coverage, -1), sampleid = sampleid, vcf = vcf.germline, tumor.id.in.vcf = tumor.id.in.vcf, |
494 | 502 |
normal.id.in.vcf = normal.id.in.vcf, max.segments = max.segments, chr.hash = chr.hash, |
495 |
- centromeres = centromeres, verbose = verbose), args.segmentation) |
|
503 |
+ centromeres = centromeres), args.segmentation) |
|
496 | 504 |
|
497 | 505 |
vcf.germline <- NULL |
498 | 506 |
seg <- do.call(fun.segmentation, |
... | ... |
@@ -516,20 +524,13 @@ runAbsoluteCN <- function(normal.coverage.file = NULL, |
516 | 524 |
n.vcf.before.filter <- nrow(vcf) |
517 | 525 |
vcf <- vcf[!is.na(snv.lr)] |
518 | 526 |
mapping.bias <- mapping.bias[!is.na(snv.lr)] |
527 |
+ prior.somatic <- prior.somatic[!is.na(snv.lr)] |
|
519 | 528 |
|
520 | 529 |
# make sure all SNVs are in covered segments |
521 |
- if (verbose) |
|
522 |
- message("Removing ", n.vcf.before.filter - nrow(vcf), " variants outside segments.") |
|
530 |
+ flog.info("Removing %i variants outside segments.", n.vcf.before.filter - nrow(vcf)) |
|
523 | 531 |
} |
524 | 532 |
ov <- findOverlaps(seg.gr, vcf) |
525 |
- if (verbose) |
|
526 |
- message("Using ", nrow(vcf), " variants.") |
|
527 |
- |
|
528 |
- # get final somatic priors |
|
529 |
- args.setPriorVcf$vcf <- vcf |
|
530 |
- args.setPriorVcf$verbose <- FALSE |
|
531 |
- prior.somatic <- do.call(fun.setPriorVcf, |
|
532 |
- .checkArgs(args.setPriorVcf, "setPriorVcf")) |
|
533 |
+ flog.info("Using %i variants.", nrow(vcf)) |
|
533 | 534 |
} |
534 | 535 |
|
535 | 536 |
# get target log-ratios for all segments |
... | ... |
@@ -554,7 +555,7 @@ runAbsoluteCN <- function(normal.coverage.file = NULL, |
554 | 555 |
if (!is.null(seg.file)) { |
555 | 556 |
sd.seg <- seg.file.sdev |
556 | 557 |
} else { |
557 |
- exon.lrs <- lapply(exon.lrs, .smoothOutliers) |
|
558 |
+ # exon.lrs <- lapply(exon.lrs, .smoothOutliers) |
|
558 | 559 |
} |
559 | 560 |
|
560 | 561 |
# renormalize, in case segmentation function changed means |
... | ... |
@@ -587,14 +588,10 @@ runAbsoluteCN <- function(normal.coverage.file = NULL, |
587 | 588 |
|
588 | 589 |
if (sum(li < 0) > 0) |
589 | 590 |
.stopRuntimeError("Some segments have negative size.") |
590 |
- |
|
591 |
- if (verbose) { |
|
592 |
- message("Mean standard deviation of log-ratios: ", round(sd.seg, digits = 2)) |
|
593 |
- } |
|
591 |
+ flog.info("Mean standard deviation of log-ratios: %.2f", sd.seg) |
|
594 | 592 |
log.ratio.offset <- rep(0, nrow(seg)) |
595 | 593 |
|
596 |
- if (verbose) |
|
597 |
- message("Optimizing purity and ploidy. ", "Will take a minute or two...") |
|
594 |
+ flog.info("2D-grid search of purity and ploidy...") |
|
598 | 595 |
|
599 | 596 |
# find local maxima. use a coarser grid for purity, otherwise we will get far too |
600 | 597 |
# many solutions, which we will need to cluster later anyways. |
... | ... |
@@ -603,7 +600,7 @@ runAbsoluteCN <- function(normal.coverage.file = NULL, |
603 | 600 |
} else { |
604 | 601 |
candidate.solutions <- .optimizeGrid(test.purity = seq(max(0.1, min(test.purity)), |
605 | 602 |
min(0.99, max(test.purity)), by = 1/30), min.ploidy, max.ploidy, test.num.copy = test.num.copy, |
606 |
- exon.lrs, seg, sd.seg, li, max.exon.ratio, max.non.clonal, verbose, debug) |
|
603 |
+ exon.lrs, seg, sd.seg, li, max.exon.ratio, max.non.clonal) |
|
607 | 604 |
|
608 | 605 |
# if we have > 20 somatic mutations, we can try estimating purity based on |
609 | 606 |
# allelic fractions and assuming diploid genomes. |
... | ... |
@@ -623,11 +620,9 @@ runAbsoluteCN <- function(normal.coverage.file = NULL, |
623 | 620 |
candidate.solutions$candidates <- candidate.solutions$candidates[idx.keep, |
624 | 621 |
] |
625 | 622 |
} |
626 |
- |
|
627 |
- if (verbose) |
|
628 |
- message(paste(strwrap(paste("Local optima:", paste(round(candidate.solutions$candidates$purity, |
|
629 |
- digits = 2), round(candidate.solutions$candidates$ploidy, digits = 2), |
|
630 |
- sep = "/", collapse = ", "))), collapse = "\n")) |
|
623 |
+ flog.info(paste(strwrap(paste("Local optima:\n", paste(round(candidate.solutions$candidates$purity, |
|
624 |
+ digits = 2), round(candidate.solutions$candidates$ploidy, digits = 2), |
|
625 |
+ sep = "/", collapse = ", "))), collapse = "\n")) |
|
631 | 626 |
|
632 | 627 |
simulated.annealing <- TRUE |
633 | 628 |
|
... | ... |
@@ -640,16 +635,14 @@ runAbsoluteCN <- function(normal.coverage.file = NULL, |
640 | 635 |
total.ploidy <- candidate.solutions$candidates$ploidy[cpi] |
641 | 636 |
p <- candidate.solutions$candidates$purity[cpi] |
642 | 637 |
|
643 |
- if (verbose) |
|
644 |
- message("Testing local optimum ", cpi, "/", nrow(candidate.solutions$candidates), |
|
645 |
- " at purity ", round(p, digits = 2), " and total ploidy ", round(total.ploidy, |
|
646 |
- digits = 2), "...") |
|
638 |
+ flog.info("Testing local optimum %i/%i at purity %.2f and total ploidy %.2f...", |
|
639 |
+ cpi, nrow(candidate.solutions$candidates), p, total.ploidy) |
|
647 | 640 |
|
648 | 641 |
subclonal <- rep(FALSE, nrow(seg)) |
649 | 642 |
old.llik <- -1 |
650 | 643 |
cnt.llik.equal <- 0 |
651 |
- C.posterior <- matrix(ncol = length(test.num.copy) + 1, nrow = nrow(seg)) |
|
652 |
- colnames(C.posterior) <- c(test.num.copy, "Subclonal") |
|
644 |
+ C.likelihood <- matrix(ncol = length(test.num.copy) + 1, nrow = nrow(seg)) |
|
645 |
+ colnames(C.likelihood) <- c(test.num.copy, "Subclonal") |
|
653 | 646 |
for (iter in seq_len(iterations)) { |
654 | 647 |
# test for convergence |
655 | 648 |
if (abs(old.llik - llik) < 0.0001) { |
... | ... |
@@ -710,12 +703,6 @@ runAbsoluteCN <- function(normal.coverage.file = NULL, |
710 | 703 |
p, " and total ploidy ", total.ploidy, ".") |
711 | 704 |
} |
712 | 705 |
|
713 |
- if (debug) |
|
714 |
- message(paste("Iteration:", iter, " Log-likelihood: ", llik, " Purity:", |
|
715 |
- p, " Total Ploidy:", total.ploidy, " Tumor Ploidy:", sum(li * |
|
716 |
- (C))/sum(li), " Fraction sub-clonal:", subclonal.f, " Mean log-ratio offset", |
|
717 |
- mean(log.ratio.offset))) |
|
718 |
- |
|
719 | 706 |
for (i in seq_len(nrow(seg))) { |
720 | 707 |
# Gibbs sample copy number Step 1: calculate log-likelihoods of fits In the first |
721 | 708 |
# iteration, we do not have the integer copy numbers yet, so calculate ploidy |
... | ... |
@@ -743,8 +730,9 @@ runAbsoluteCN <- function(normal.coverage.file = NULL, |
743 | 730 |
ifelse(C[-i] == 0, 1, 0)) + li[i] * ifelse(Ci == 0, 1, 0))/sum(li), |
744 | 731 |
double(1)) |
745 | 732 |
|
746 |
- # set maximimum homozygous loss size to 10mb. |
|
747 |
- if (li[i]>max.homozygous.loss[2] && test.num.copy[1]<1) frac.homozygous.loss[1] <- 1 |
|
733 |
+ if (li[i] > max.homozygous.loss[2] && test.num.copy[1]<1) { |
|
734 |
+ frac.homozygous.loss[1] <- 1 |
|
735 |
+ } |
|
748 | 736 |
log.prior.homozygous.loss <- log(ifelse(frac.homozygous.loss > |
749 | 737 |
max.homozygous.loss[1], 0, 1)) |
750 | 738 |
if (iter > 1) |
... | ... |
@@ -754,7 +742,7 @@ runAbsoluteCN <- function(normal.coverage.file = NULL, |
754 | 742 |
p.rij <- c(p.rij, .calcLlikSegmentSubClonal(exon.lrs[[i]] + log.ratio.offset[i], |
755 | 743 |
max.exon.ratio)) |
756 | 744 |
|
757 |
- C.posterior[i, ] <- exp(p.rij - max(p.rij)) |
|
745 |
+ C.likelihood[i, ] <- exp(p.rij - max(p.rij)) |
|
758 | 746 |
|
759 | 747 |
if (simulated.annealing) |
760 | 748 |
p.rij <- p.rij * exp(iter/4) |
... | ... |
@@ -772,8 +760,6 @@ runAbsoluteCN <- function(normal.coverage.file = NULL, |
772 | 760 |
old.C <- C[i] |
773 | 761 |
opt.C <- (2^(seg$seg.mean + log.ratio.offset) * total.ploidy)/p - |
774 | 762 |
((2 * (1 - p))/p) |
775 |
- # message(opt.C[i], ' seg: ', seg$seg.mean[i], ' offset: ', log.ratio.offset[i], |
|
776 |
- # ' purity: ', p) |
|
777 | 763 |
opt.C[opt.C < 0] <- 0 |
778 | 764 |
if (id > length(test.num.copy)) { |
779 | 765 |
# optimal non-integer copy number |
... | ... |
@@ -783,16 +769,14 @@ runAbsoluteCN <- function(normal.coverage.file = NULL, |
783 | 769 |
C[i] <- test.num.copy[id] |
784 | 770 |
subclonal[i] <- FALSE |
785 | 771 |
} |
786 |
- if (old.C != C[i] && debug) |
|
787 |
- message("Old: ", old.C, " New: ", C[i], " LR: ", mean(exon.lrs[[i]])) |
|
788 | 772 |
} |
789 | 773 |
} |
790 | 774 |
if (subclonal.f < max.non.clonal && abs(total.ploidy - candidate.solutions$candidates$ploidy[cpi]) < |
791 | 775 |
1) |
792 | 776 |
break |
793 | 777 |
log.ratio.calibration <- log.ratio.calibration + 0.25 |
794 |
- if (verbose && attempt < max.attempts) { |
|
795 |
- message("Recalibrating log-ratios...") |
|
778 |
+ if (attempt < max.attempts) { |
|
779 |
+ flog.info("Recalibrating log-ratios...") |
|
796 | 780 |
} |
797 | 781 |
} |
798 | 782 |
seg.adjusted <- seg |
... | ... |
@@ -807,7 +791,7 @@ runAbsoluteCN <- function(normal.coverage.file = NULL, |
807 | 791 |
SNV.posterior <- list(beta.model = list(llik = -Inf)) |
808 | 792 |
} |
809 | 793 |
return(list(log.likelihood = llik, purity = p, ploidy = weighted.mean(C, |
810 |
- li), total.ploidy = total.ploidy, seg = seg.adjusted, C.posterior = data.frame(C.posterior/rowSums(C.posterior), |
|
794 |
+ li), total.ploidy = total.ploidy, seg = seg.adjusted, C.likelihood = data.frame(C.likelihood/rowSums(C.likelihood), |
|
811 | 795 |
ML.C = C, ML.Subclonal = subclonal), SNV.posterior = SNV.posterior, |
812 | 796 |
fraction.subclonal = subclonal.f, fraction.homozygous.loss = sum(li[which(C < |
813 | 797 |
0.01)])/sum(li), gene.calls = NA, log.ratio.offset = log.ratio.offset, |
... | ... |
@@ -833,15 +817,15 @@ runAbsoluteCN <- function(normal.coverage.file = NULL, |
833 | 817 |
tp <- p |
834 | 818 |
pp <- 1 |
835 | 819 |
} |
820 |
+ cont.rate <- prior.contamination |
|
836 | 821 |
.fitSNV <- function(tp, pp) { |
837 |
- .fitSNVp <- function(px) { |
|
838 |
- if (verbose) |
|
839 |
- message("Fitting SNVs for purity ", round(px, digits = 2), " and tumor ploidy ", |
|
840 |
- round(weighted.mean(C, li), digits = 2), ".") |
|
822 |
+ .fitSNVp <- function(px, cont.rate=prior.contamination) { |
|
823 |
+ flog.info("Fitting SNVs for purity %.2f, tumor ploidy %.2f and contamination %.2f.", |
|
824 |
+ px, weighted.mean(C, li), cont.rate) |
|
841 | 825 |
|
842 | 826 |
list(beta.model = .calcSNVLLik(vcf, tumor.id.in.vcf, ov, px, test.num.copy, |
843 |
- C.posterior, C, opt.C, snv.model = "beta", prior.somatic, mapping.bias, |
|
844 |
- snv.lr, sampleid, cont.rate = prior.contamination, prior.K = prior.K, |
|
827 |
+ C.likelihood, C, opt.C, snv.model = "beta", prior.somatic, mapping.bias, |
|
828 |
+ snv.lr, sampleid, cont.rate = cont.rate, prior.K = prior.K, |
|
845 | 829 |
max.coverage.vcf = max.coverage.vcf, non.clonal.M = non.clonal.M, |
846 | 830 |
model.homozygous = model.homozygous, error = error, max.mapping.bias = max.mapping.bias)) |
847 | 831 |
} |
... | ... |
@@ -852,9 +836,7 @@ runAbsoluteCN <- function(normal.coverage.file = NULL, |
852 | 836 |
GoF <- .getGoF(list(SNV.posterior=res.snvllik[[1]])) |
853 | 837 |
idx <- 1 |
854 | 838 |
if (GoF < (min.gof-0.05)) { |
855 |
- if (verbose) message("Poor goodness-of-fit (", |
|
856 |
- round(GoF, digits=3), |
|
857 |
- "). Skipping post-optimization.") |
|
839 |
+ flog.info("Poor goodness-of-fit (%.3f). Skipping post-optimization.", GoF) |
|
858 | 840 |
} else { |
859 | 841 |
res.snvllik <- c(res.snvllik, lapply(tp[-1], .fitSNVp)) |
860 | 842 |
px.rij <- lapply(tp, function(px) vapply(which(!is.na(C)), function(i) .calcLlikSegment(subclonal = subclonal[i], |
... | ... |
@@ -870,8 +852,7 @@ runAbsoluteCN <- function(normal.coverage.file = NULL, |
870 | 852 |
idx <- 1 |
871 | 853 |
} |
872 | 854 |
p <- tp[idx] |
873 |
- if (verbose) |
|
874 |
- message("Optimized purity: ", p) |
|
855 |
+ flog.info("Optimized purity: %.2f", p) |
|
875 | 856 |
SNV.posterior <- res.snvllik[[idx]] |
876 | 857 |
list(p = p, SNV.posterior = SNV.posterior, llik = px.rij.s[idx]) |
877 | 858 |
} |
... | ... |
@@ -879,21 +860,57 @@ runAbsoluteCN <- function(normal.coverage.file = NULL, |
879 | 860 |
p <- fitRes$p |
880 | 861 |
SNV.posterior <- fitRes$SNV.posterior |
881 | 862 |
} |
882 |
- |
|
883 |
- list(log.likelihood = llik, purity = p, ploidy = weighted.mean(C, li), total.ploidy = total.ploidy, |
|
884 |
- seg = seg.adjusted, C.posterior = data.frame(C.posterior/rowSums(C.posterior), |
|
885 |
- ML.C = C, ML.Subclonal = subclonal), SNV.posterior = SNV.posterior, |
|
886 |
- fraction.subclonal = subclonal.f, fraction.homozygous.loss = sum(li[which(C < |
|
887 |
- 0.01)])/sum(li), gene.calls = gene.calls, log.ratio.offset = log.ratio.offset, |
|
888 |
- SA.iterations = iter, failed = FALSE) |
|
863 |
+ |
|
864 |
+ list(log.likelihood = llik, purity = p, ploidy = weighted.mean(C, li), |
|
865 |
+ total.ploidy = total.ploidy, seg = seg.adjusted, |
|
866 |
+ C.posterior = data.frame(C.likelihood/rowSums(C.likelihood), ML.C = |
|
867 |
+ C, Opt.C = opt.C, ML.Subclonal = subclonal), |
|
868 |
+ C.likelihood=C.likelihood, SNV.posterior = SNV.posterior, |
|
869 |
+ fraction.subclonal = subclonal.f, fraction.homozygous.loss = |
|
870 |
+ sum(li[which(C < 0.01)])/sum(li), gene.calls = gene.calls, |
|
871 |
+ log.ratio.offset = log.ratio.offset, SA.iterations = iter, failed = |
|
872 |
+ FALSE) |
|
889 | 873 |
} |
890 | 874 |
|
891 | 875 |
results <- lapply(seq_len(nrow(candidate.solutions$candidates)), .optimizeSolution) |
892 |
- if (verbose) |
|
893 |
- message("Remember, posterior probabilities assume a correct SCNA fit.") |
|
894 | 876 |
|
895 | 877 |
results <- .rankResults(results) |
896 | 878 |
results <- .filterDuplicatedResults(results) |
879 |
+ |
|
880 |
+ if (grepl("CONTAMINATION", vcf.filtering$flag_comment)) { |
|
881 |
+ cont.rate <- .plotContamination( |
|
882 |
+ results[[1]]$SNV.posterior$beta.model$posteriors, |
|
883 |
+ max.mapping.bias, plot=FALSE) |
|
884 |
+ if (cont.rate > prior.contamination) { |
|
885 |
+ flog.info("Initial guess of contamination rate: %.3f", cont.rate) |
|
886 |
+ } |
|
887 |
+ } |
|
888 |
+ ## optimize contamination. we just re-run the fitting |
|
889 |
+ if (grepl("CONTAMINATION", vcf.filtering$flag_comment) && |
|
890 |
+ cont.rate>prior.contamination) { |
|
891 |
+ flog.info("Optimizing contamination rate...") |
|
892 |
+ |
|
893 |
+ res.snvllik <- |
|
894 |
+ .calcSNVLLik(vcf, tumor.id.in.vcf, |
|
895 |
+ ov, results[[1]]$purity, test.num.copy, results[[1]]$C.likelihood, |
|
896 |
+ results[[1]]$C.posterior$ML.C, |
|
897 |
+ results[[1]]$C.posterior$Opt.C, |
|
898 |
+ snv.model = "beta", prior.somatic, mapping.bias, |
|
899 |
+ snv.lr, sampleid, cont.rate = cont.rate, prior.K = prior.K, |
|
900 |
+ max.coverage.vcf = max.coverage.vcf, non.clonal.M = non.clonal.M, |
|
901 |
+ model.homozygous = model.homozygous, error = error, |
|
902 |
+ max.mapping.bias = max.mapping.bias) |
|
903 |
+ results[[1]]$SNV.posterior$beta.model <- res.snvllik |
|
904 |
+ cont.rate <- .plotContamination( |
|
905 |
+ results[[1]]$SNV.posterior$beta.model$posteriors, |
|
906 |
+ max.mapping.bias, plot=FALSE) |
|
907 |
+ flog.info("Optimized contamination rate: %.3f", cont.rate) |
|
908 |
+ results[[1]]$SNV.posterior$beta.model$posterior.contamination <- cont.rate |
|
909 |
+ # add contamination rate to flag comment |
|
910 |
+ vcf.filtering$flag_comment <- gsub("POTENTIAL SAMPLE CONTAMINATION", |
|
911 |
+ paste0("POTENTIAL SAMPLE CONTAMINATION (", round(cont.rate*100, digits=1),"%)"), |
|
912 |
+ vcf.filtering$flag_comment) |
|
913 |
+ } |
|
897 | 914 |
results <- .flagResults(results, max.non.clonal = max.non.clonal, max.logr.sdev = max.logr.sdev, |
898 | 915 |
logr.sdev = sd.seg, max.segments = max.segments, min.gof = min.gof, flag = vcf.filtering$flag, |
899 | 916 |
flag_comment = vcf.filtering$flag_comment, dropout = dropoutWarning, use.somatic.status = args.filterVcf$use.somatic.status, |
... | ... |
@@ -905,10 +922,13 @@ runAbsoluteCN <- function(normal.coverage.file = NULL, |
905 | 922 |
} |
906 | 923 |
|
907 | 924 |
if (length(results) < 1) { |
908 |
- warning("Could not find valid purity and ploidy solution.") |
|
925 |
+ flog.warn("Could not find valid purity and ploidy solution.") |
|
909 | 926 |
} |
927 |
+ .logFooter() |
|
910 | 928 |
list(candidates = candidate.solutions, results = results, input = list(tumor = tumor.coverage.file, |
911 | 929 |
normal = normal.coverage.file, log.ratio = data.frame(probe = normal[, 1], |
912 | 930 |
log.ratio = log.ratio), log.ratio.sdev = sd.seg, vcf = vcf, sampleid = sampleid, |
913 | 931 |
sex = sex, sex.vcf = sex.vcf, chr.hash = chr.hash, centromeres = centromeres)) |
914 | 932 |
} |
933 |
+ |
|
934 |
+ |
... | ... |
@@ -37,9 +37,16 @@ |
37 | 37 |
#' properly ordered. |
38 | 38 |
#' @param centromeres A \code{data.frame} with centromere positions in first |
39 | 39 |
#' three columns. Currently not supported in this function. |
40 |
-#' @param verbose Verbose output. |
|
41 | 40 |
#' @return \code{data.frame} containing the segmentation. |
42 | 41 |
#' @author Markus Riester |
42 |
+#' @references Olshen, A. B., Venkatraman, E. S., Lucito, R., Wigler, M. |
|
43 |
+#' (2004). Circular binary segmentation for the analysis of array-based DNA |
|
44 |
+#' copy number data. Biostatistics 5: 557-572. |
|
45 |
+#' |
|
46 |
+#' Venkatraman, E. S., Olshen, A. B. (2007). A faster circular binary |
|
47 |
+#' segmentation algorithm for the analysis of array CGH data. Bioinformatics |
|
48 |
+#' 23: 657-63. |
|
49 |
+#' |
|
43 | 50 |
#' @seealso \code{\link{runAbsoluteCN}} |
44 | 51 |
#' @examples |
45 | 52 |
#' |
... | ... |
@@ -67,7 +74,7 @@ segmentationCBS <- function(normal, tumor, log.ratio, seg, plot.cnv, |
67 | 74 |
min.coverage, sampleid, target.weight.file = NULL, alpha = 0.005, undo.SD = |
68 | 75 |
NULL, vcf = NULL, tumor.id.in.vcf = 1, normal.id.in.vcf = NULL, |
69 | 76 |
max.segments = NULL, prune.hclust.h = NULL, prune.hclust.method = "ward.D", |
70 |
- chr.hash = NULL, centromeres = NULL, verbose = TRUE) { |
|
77 |
+ chr.hash = NULL, centromeres = NULL) { |
|
71 | 78 |
|
72 | 79 |
if (is.null(chr.hash)) chr.hash <- .getChrHash(tumor$chr) |
73 | 80 |
|
... | ... |
@@ -76,24 +83,21 @@ segmentationCBS <- function(normal, tumor, log.ratio, seg, plot.cnv, |
76 | 83 |
target.weights <- read.delim(target.weight.file, as.is=TRUE) |
77 | 84 |
target.weights <- target.weights[match(as.character(tumor[,1]), |
78 | 85 |
target.weights[,1]),2] |
79 |
- if (verbose) message("Target weights found, will use weighted CBS.") |
|
86 |
+ flog.info("Target weights found, will use weighted CBS.") |
|
80 | 87 |
} |
81 |
- x <- .CNV.analyze2(normal, tumor, logR=log.ratio, plot.cnv=plot.cnv, |
|
88 |
+ x <- .CNV.analyze2(normal, tumor, log.ratio=log.ratio, plot.cnv=plot.cnv, |
|
82 | 89 |
min.coverage=min.coverage, sampleid=sampleid, alpha=alpha, |
83 | 90 |
weights=target.weights, sdundo=undo.SD, max.segments=max.segments, |
84 |
- chr.hash=chr.hash, verbose=verbose) |
|
91 |
+ chr.hash=chr.hash) |
|
85 | 92 |
if (!is.null(vcf)) { |
86 | 93 |
x <- .pruneByVCF(x, vcf, tumor.id.in.vcf, chr.hash=chr.hash) |
87 | 94 |
x <- .findCNNLOH(x, vcf, tumor.id.in.vcf, alpha=alpha, |
88 | 95 |
chr.hash=chr.hash) |
89 | 96 |
x <- .pruneByHclust(x, vcf, tumor.id.in.vcf, h=prune.hclust.h, |
90 |
- method=prune.hclust.method, chr.hash=chr.hash, verbose=verbose) |
|
97 |
+ method=prune.hclust.method, chr.hash=chr.hash) |
|
91 | 98 |
} |
92 | 99 |
idx.enough.markers <- x$cna$output$num.mark > 1 |
93 | 100 |
rownames(x$cna$output) <- NULL |
94 |
- if (verbose) { |
|
95 |
- print(x$cna$output[idx.enough.markers,]) |
|
96 |
- } |
|
97 | 101 |
x$cna$output[idx.enough.markers,] |
98 | 102 |
} |
99 | 103 |
|
... | ... |
@@ -159,7 +163,7 @@ iterations=2, chr.hash ) { |
159 | 163 |
} |
160 | 164 |
|
161 | 165 |
.pruneByHclust <- function(x, vcf, tumor.id.in.vcf, h=NULL, method="ward.D", |
162 |
- min.variants=5, chr.hash, iterations=2, verbose=TRUE) { |
|
166 |
+ min.variants=5, chr.hash, iterations=2) { |
|
163 | 167 |
for (iter in seq_len(iterations)) { |
164 | 168 |
seg <- x$cna$output |
165 | 169 |
#message("HCLUST: ", iter, " Num segment LRs: ", length(table(x$cna$output$seg.mean))) |
... | ... |
@@ -180,7 +184,7 @@ iterations=2, chr.hash ) { |
180 | 184 |
|
181 | 185 |
if (is.null(h)) { |
182 | 186 |
h <- .getPruneH(seg) |
183 |
- if (verbose) message("Setting prune.hclust.h parameter to ", h) |
|
187 |
+ flog.info("Setting prune.hclust.h parameter to %f.", h) |
|
184 | 188 |
} |
185 | 189 |
|
186 | 190 |
numVariants <- sapply(seq_len(nrow(seg)), function(i) |
... | ... |
@@ -293,9 +297,9 @@ iterations=2, chr.hash ) { |
293 | 297 |
|
294 | 298 |
# ExomeCNV version without the x11() calls |
295 | 299 |
.CNV.analyze2 <- |
296 |
-function(normal, tumor, logR=NULL, min.coverage=15, weights=NULL, sdundo=NULL, |
|
297 |
-undo.splits="sdundo", smooth=TRUE, alpha=0.01, sampleid=NULL, plot.cnv=TRUE, |
|
298 |
-max.segments=NULL, chr.hash=chr.hash, verbose=TRUE) { |
|
300 |
+function(normal, tumor, log.ratio=NULL, min.coverage=15, weights=NULL, sdundo=NULL, |
|
301 |
+undo.splits="sdundo", alpha=0.01, sampleid=NULL, plot.cnv=TRUE, |
|
302 |
+max.segments=NULL, chr.hash=chr.hash) { |
|
299 | 303 |
|
300 | 304 |
# first, do it for exons with enough coverage. MR: added less stringent |
301 | 305 |
# cutoff in case normal looks great. these could be homozygous deletions |
... | ... |
@@ -303,51 +307,46 @@ max.segments=NULL, chr.hash=chr.hash, verbose=TRUE) { |
303 | 307 |
well.covered.exon.idx <- .getWellCoveredExons(normal, tumor, |
304 | 308 |
min.coverage) |
305 | 309 |
|
306 |
- if (verbose) message("Removing ", sum(!well.covered.exon.idx), |
|
307 |
- " low coverage exons.") |
|
308 |
- if (is.null(logR)) norm.log.ratio = calculateLogRatio(normal, tumor, verbose) |
|
309 |
- else norm.log.ratio = logR |
|
310 |
+ flog.info("Removing %i low coverage exons.", sum(!well.covered.exon.idx)) |
|
310 | 311 |
|
311 | 312 |
if (is.null(sdundo)) { |
312 |
- sdundo <- .getSDundo(norm.log.ratio[well.covered.exon.idx]) |
|
313 |
+ sdundo <- .getSDundo(log.ratio[well.covered.exon.idx]) |
|
313 | 314 |
} |
314 | 315 |
|
315 |
- CNA.obj <- CNA(norm.log.ratio[well.covered.exon.idx], |
|
316 |
+ CNA.obj <- CNA(log.ratio[well.covered.exon.idx], |
|
316 | 317 |
.strip.chr.name(normal$chr[well.covered.exon.idx], chr.hash), |
317 | 318 |
floor((normal$probe_start[well.covered.exon.idx] + |
318 | 319 |
normal$probe_end[well.covered.exon.idx])/2), data.type="logratio", |
319 | 320 |
sampleid=sampleid) |
320 | 321 |
|
321 |
- smoothed.CNA.obj = if (smooth) smooth.CNA(CNA.obj) else CNA.obj |
|
322 |
- |
|
323 | 322 |
try.again <- 0 |
324 | 323 |
|
325 | 324 |
while (try.again < 2) { |
326 |
- if (verbose) message("Setting undo.SD parameter to ", sdundo) |
|
325 |
+ flog.info("Setting undo.SD parameter to %f.", sdundo) |
|
327 | 326 |
if (!is.null(weights)) { |
328 | 327 |
weights <- weights[well.covered.exon.idx] |
329 | 328 |
# MR: this shouldn't happen. In doubt, count them as median. |
330 | 329 |
weights[is.na(weights)] <- median(weights, na.rm=TRUE) |
331 |
- segment.smoothed.CNA.obj <- segment(smoothed.CNA.obj, |
|
330 |
+ segment.CNA.obj <- segment(CNA.obj, |
|
332 | 331 |
undo.splits=undo.splits, undo.SD=sdundo, |
333 |
- verbose=ifelse(verbose, 1, 0), alpha=alpha,weights=weights) |
|
332 |
+ verbose=0, alpha=alpha,weights=weights) |
|
334 | 333 |
} else { |
335 |
- segment.smoothed.CNA.obj <- segment(smoothed.CNA.obj, |
|
334 |
+ segment.CNA.obj <- segment(CNA.obj, |
|
336 | 335 |
undo.splits=undo.splits, undo.SD=sdundo, |
337 |
- verbose=ifelse(verbose, 1, 0), alpha=alpha) |
|
336 |
+ verbose=0, alpha=alpha) |
|
338 | 337 |
} |
339 |
- if (is.null(max.segments) || nrow(segment.smoothed.CNA.obj$output) |
|
338 |
+ if (is.null(max.segments) || nrow(segment.CNA.obj$output) |
|
340 | 339 |
< max.segments) break |
341 | 340 |
sdundo <- sdundo * 1.5 |
342 | 341 |
try.again <- try.again + 1 |
343 | 342 |
} |
344 | 343 |
|
345 | 344 |
if (plot.cnv) { |
346 |
- plot(segment.smoothed.CNA.obj, plot.type="s") |
|
347 |
- plot(segment.smoothed.CNA.obj, plot.type="w") |
|
345 |
+ plot(segment.CNA.obj, plot.type="s") |
|
346 |
+ plot(segment.CNA.obj, plot.type="w") |
|
348 | 347 |
} |
349 | 348 |
|
350 |
- return(list(cna=segment.smoothed.CNA.obj, logR=norm.log.ratio)) |
|
349 |
+ return(list(cna=segment.CNA.obj, logR=log.ratio)) |
|
351 | 350 |
} |
352 | 351 |
|
353 | 352 |
.getSegSizes <- function(seg) { |
... | ... |
@@ -22,8 +22,6 @@ |
22 | 22 |
#' function. |
23 | 23 |
#' @param undo.SD \code{undo.SD} for CBS, see documentation of the |
24 | 24 |
#' \code{segment} function. If \code{NULL}, try to find a sensible default. |
25 |
-#' @param drop.outliers If \code{TRUE}, calls the |
|
26 |
-#' \code{dropSegmentationOutliers} function from PSCBS before segmentation. |
|
27 | 25 |
#' @param flavor Flavor value for PSCBS. See \code{segmentByNonPairedPSCBS}. |
28 | 26 |
#' @param tauA tauA argument for PSCBS. See \code{segmentByNonPairedPSCBS}. |
29 | 27 |
#' @param vcf Optional VCF object with germline allelic ratios. |
... | ... |
@@ -43,11 +41,20 @@ |
43 | 41 |
#' properly ordered. |
44 | 42 |
#' @param centromeres A \code{data.frame} with centromere positions in first |
45 | 43 |
#' three columns. If not \code{NULL}, add breakpoints at centromeres. |
46 |
-#' @param verbose Verbose output. |
|
47 | 44 |
#' @param \dots Additional parameters passed to the |
48 | 45 |
#' \code{segmentByNonPairedPSCBS} function. |
49 | 46 |
#' @return \code{data.frame} containing the segmentation. |
50 | 47 |
#' @author Markus Riester |
48 |
+#' @references Olshen, A. B., Venkatraman, E. S., Lucito, R., Wigler, M. |
|
49 |
+#' (2004). Circular binary segmentation for the analysis of array-based DNA |
|
50 |
+#' copy number data. Biostatistics 5: 557-572. |
|
51 |
+#' |
|
52 |
+#' Venkatraman, E. S., Olshen, A. B. (2007). A faster circular binary |
|
53 |
+#' segmentation algorithm for the analysis of array CGH data. Bioinformatics |
|
54 |
+#' 23: 657-63. |
|
55 |
+#' |
|
56 |
+#' Olshen et al. (2011). Parent-specific copy number in paired tumor-normal |
|
57 |
+#' studies using circular binary segmentation. Bioinformatics. |
|
51 | 58 |
#' @seealso \code{\link{runAbsoluteCN}} |
52 | 59 |
#' @examples |
53 | 60 |
#' |
... | ... |
@@ -72,10 +79,10 @@ |
72 | 79 |
#' @export segmentationPSCBS |
73 | 80 |
segmentationPSCBS <- function(normal, tumor, log.ratio, seg, plot.cnv, |
74 | 81 |
min.coverage, sampleid, target.weight.file = NULL, alpha = 0.005, undo.SD = |
75 |
- NULL, drop.outliers=TRUE, flavor = "tcn&dh", tauA = 0.03, vcf = NULL, |
|
82 |
+ NULL, flavor = "tcn&dh", tauA = 0.03, vcf = NULL, |
|
76 | 83 |
tumor.id.in.vcf = 1, normal.id.in.vcf = NULL, max.segments = NULL, |
77 | 84 |
prune.hclust.h = NULL, prune.hclust.method = "ward.D", chr.hash = NULL, |
78 |
- centromeres = NULL, verbose = TRUE, ...) { |
|
85 |
+ centromeres = NULL, ...) { |
|
79 | 86 |
|
80 | 87 |
debug <- TRUE |
81 | 88 |
|
... | ... |
@@ -95,8 +102,7 @@ segmentationPSCBS <- function(normal, tumor, log.ratio, seg, plot.cnv, |
95 | 102 |
target.weights <- read.delim(target.weight.file, as.is=TRUE) |
96 | 103 |
target.weights <- target.weights[match(as.character(tumor[,1]), |
97 | 104 |
target.weights[,1]),2] |
98 |
- if (verbose) message( |
|
99 |
- "Target weights found, but currently not supported by PSCBS. ", |
|
105 |
+ flog.info("Target weights found, but currently not supported by PSCBS. %s", |
|
100 | 106 |
"Will simply exclude targets with low weight.") |
101 | 107 |
lowWeightTargets <- target.weights < 1/3 |
102 | 108 |
well.covered.exon.idx[which(lowWeightTargets)] <- FALSE |
... | ... |
@@ -134,7 +140,7 @@ segmentationPSCBS <- function(normal, tumor, log.ratio, seg, plot.cnv, |
134 | 140 |
#} else { |
135 | 141 |
if (is.null(undo.SD)) { |
136 | 142 |
undo.SD <- .getSDundo(log.ratio) |
137 |
- if (verbose) message("Setting undo.SD parameter to ", undo.SD) |
|
143 |
+ flog.info("Setting undo.SD parameter to %f.", undo.SD) |
|
138 | 144 |
} |
139 | 145 |
knownSegments <- NULL |
140 | 146 |
if (!is.null(centromeres)) { |
... | ... |
@@ -145,9 +151,6 @@ segmentationPSCBS <- function(normal, tumor, log.ratio, seg, plot.cnv, |
145 | 151 |
chr.hash) |
146 | 152 |
knownSegments <- PSCBS::gapsToSegments(knownSegments) |
147 | 153 |
} |
148 |
- if (drop.outliers) { |
|
149 |
- d.f <- PSCBS::dropSegmentationOutliers(d.f) |
|
150 |
- } |
|
151 | 154 |
seg <- PSCBS::segmentByNonPairedPSCBS(d.f, tauA=tauA, |
152 | 155 |
flavor=flavor, undoTCN=undo.SD, knownSegments=knownSegments, |
153 | 156 |
min.width=3,alphaTCN=alpha, ...) |
... | ... |
@@ -157,7 +160,7 @@ segmentationPSCBS <- function(normal, tumor, log.ratio, seg, plot.cnv, |
157 | 160 |
|
158 | 161 |
if (!is.null(vcf)) { |
159 | 162 |
x <- .pruneByHclust(x, vcf, tumor.id.in.vcf, h=prune.hclust.h, |
160 |
- method=prune.hclust.method, chr.hash=chr.hash, verbose=verbose) |
|
163 |
+ method=prune.hclust.method, chr.hash=chr.hash) |
|
161 | 164 |
} |
162 | 165 |
x$cna$output |
163 | 166 |
} |
... | ... |
@@ -19,7 +19,6 @@ |
19 | 19 |
#' @param smooth Impute mapping bias of variants not found in the panel by |
20 | 20 |
#' smoothing of neighboring SNPs. Requires \code{normal.panel.vcf.file}. |
21 | 21 |
#' @param smooth.n Number of neighboring variants used for smoothing. |
22 |
-#' @param verbose Verbose output. |
|
23 | 22 |
#' @return A \code{numeric(nrow(vcf))} vector with the mapping bias of for each |
24 | 23 |
#' variant in the \code{CollapsedVCF}. Mapping bias is expected as scaling |
25 | 24 |
#' factor. Adjusted allelic fraction is (observed allelic fraction)/(mapping |
... | ... |
@@ -35,8 +34,7 @@ |
35 | 34 |
#' |
36 | 35 |
#' @export setMappingBiasVcf |
37 | 36 |
setMappingBiasVcf <- function(vcf, tumor.id.in.vcf = NULL, |
38 |
-normal.panel.vcf.file = NULL, min.normals = 5, smooth = TRUE, smooth.n = 5, |
|
39 |
-verbose = TRUE) { |
|
37 |
+normal.panel.vcf.file = NULL, min.normals = 5, smooth = TRUE, smooth.n = 5) { |
|
40 | 38 |
|
41 | 39 |
if (is.null(tumor.id.in.vcf)) { |
42 | 40 |
tumor.id.in.vcf <- .getTumorIdInVcf(vcf) |
... | ... |
@@ -46,12 +44,12 @@ verbose = TRUE) { |
46 | 44 |
normal.id.in.vcf <- .getNormalIdInVcf(vcf, tumor.id.in.vcf) |
47 | 45 |
faAll <- as.numeric(geno(vcf)$FA[!info(vcf)$SOMATIC,normal.id.in.vcf]) |
48 | 46 |
mappingBias <- mean(faAll, na.rm=TRUE)*2 |
49 |
- if (verbose) message("Found SOMATIC annotation in VCF. ", |
|
50 |
- "Setting mapping bias to ", round(mappingBias, digits=3)) |
|
47 |
+ flog.info("Found SOMATIC annotation in VCF. Setting mapping bias to %.3f.", |
|
48 |
+ mappingBias) |
|
51 | 49 |
} |
52 | 50 |
if (is.null(info(vcf)$SOMATIC) && is.null(normal.panel.vcf.file)) { |
53 |
- message( |
|
54 |
- "VCF does not contain somatic status. For best results, consider\n", |
|
51 |
+ flog.info( |
|
52 |
+ "VCF does not contain somatic status. For best results, consider%s%s", |
|
55 | 53 |
"providing normal.panel.vcf.file when matched normals are not ", |
56 | 54 |
"available.") |
57 | 55 |
} |
... | ... |
@@ -64,10 +62,9 @@ verbose = TRUE) { |
64 | 62 |
if (is.null(normal.panel.vcf.file)) { |
65 | 63 |
return(tmp) |
66 | 64 |
} |
67 |
- nvcf <- .readNormalPanelVcfLarge(vcf, normal.panel.vcf.file, |
|
68 |
- verbose=verbose) |
|
65 |
+ nvcf <- .readNormalPanelVcfLarge(vcf, normal.panel.vcf.file) |
|
69 | 66 |
if (nrow(nvcf) < 1) { |
70 |
- warning("setMappingBiasVcf: no hits in ", normal.panel.vcf.file, ".") |
|
67 |
+ flog.warn("setMappingBiasVcf: no hits in %s.", normal.panel.vcf.file) |
|
71 | 68 |
return(tmp) |
72 | 69 |
} |
73 | 70 |
|
... | ... |
@@ -101,15 +98,15 @@ verbose = TRUE) { |
101 | 98 |
as.numeric(y) |
102 | 99 |
} |
103 | 100 |
|
104 |
-.readNormalPanelVcfLarge <- function(vcf, normal.panel.vcf.file, max.file.size=1, verbose) { |
|
101 |
+.readNormalPanelVcfLarge <- function(vcf, normal.panel.vcf.file, max.file.size=1) { |
|
105 | 102 |
genome <- genome(vcf)[1] |
106 | 103 |
if (file.size(normal.panel.vcf.file)/1000^3 > max.file.size || nrow(vcf)< 1000) { |
107 |
- if (verbose) message("Scanning ", normal.panel.vcf.file, "...") |
|
104 |
+ flog.info("Scanning %s...", normal.panel.vcf.file) |
|
108 | 105 |
nvcf <- readVcf(TabixFile(normal.panel.vcf.file), genome=genome, |
109 | 106 |
ScanVcfParam(which = rowRanges(vcf), info=NA, fixed=NA, |
110 | 107 |
geno="FA")) |
111 | 108 |
} else { |
112 |
- if (verbose) message("Loading ", normal.panel.vcf.file, "...") |
|
109 |
+ flog.info("Loading %s...", normal.panel.vcf.file) |
|
113 | 110 |
nvcf <- readVcf(normal.panel.vcf.file, genome=genome, |
114 | 111 |
ScanVcfParam(info=NA, fixed=NA, geno="FA")) |
115 | 112 |
nvcf <- subsetByOverlaps(nvcf, rowRanges(vcf)) |
... | ... |
@@ -4,6 +4,7 @@ library('getopt') |
4 | 4 |
|
5 | 5 |
spec <- matrix(c( |
6 | 6 |
'help' , 'h', 0, "logical", |
7 |
+'version', 'v', 0, "logical", |
|
7 | 8 |
'force' , 'f', 0, "logical", |
8 | 9 |
'bam', 'b', 1, "character", |
9 | 10 |
'gatkcoverage', 'g', 1, "character", |
... | ... |
@@ -18,6 +19,11 @@ if ( !is.null(opt$help) ) { |
18 | 19 |
q(status=1) |
19 | 20 |
} |
20 | 21 |
|
22 |
+if (!is.null(opt$version)) { |
|
23 |
+ message(as.character(packageVersion("PureCN"))) |
|
24 |
+ q(status=1) |
|
25 |
+} |
|
26 |
+ |
|
21 | 27 |
force <- !is.null(opt$force) |
22 | 28 |
|
23 | 29 |
bam.file <- opt$bam |
... | ... |
@@ -4,10 +4,11 @@ library('getopt') |
4 | 4 |
|
5 | 5 |
spec <- matrix(c( |
6 | 6 |
'help' , 'h', 0, "logical", |
7 |
+'version', 'v', 0, "logical", |
|
7 | 8 |
'force' , 'f', 0, "logical", |
8 | 9 |
'gcgene', 'c', 1, "character", |
9 | 10 |
'method', 'm', 1, "character", |
10 |
-'coveragefiles', 'v', 1, "character", |
|
11 |
+'coveragefiles', 'b', 1, "character", |
|
11 | 12 |
'assay', 'a',1, "character", |
12 | 13 |
'outdir' , 'o', 1, "character" |
13 | 14 |
), byrow=TRUE, ncol=4) |
... | ... |
@@ -18,6 +19,12 @@ if ( !is.null(opt$help) ) { |
18 | 19 |
q(status=1) |
19 | 20 |
} |
20 | 21 |
|
22 |
+if (!is.null(opt$version)) { |
|
23 |
+ message(as.character(packageVersion("PureCN"))) |
|
24 |
+ q(status=1) |
|
25 |
+} |
|
26 |
+ |
|
27 |
+ |
|
21 | 28 |
.checkFileList <- function(file) { |
22 | 29 |
files <- read.delim(file, as.is=TRUE, header=FALSE)[,1] |
23 | 30 |
numExists <- sum(file.exists(files), na.rm=TRUE) |
... | ... |
@@ -4,10 +4,11 @@ library('getopt') |
4 | 4 |
|
5 | 5 |
spec <- matrix(c( |
6 | 6 |
'help', 'h', 0, "logical", |
7 |
+'version', 'v', 0, "logical", |
|
7 | 8 |
'force' , 'f', 0, "logical", |
8 | 9 |
'normal', 'n', 1, "character", |
9 | 10 |
'tumor', 't', 1, "character", |
10 |
-'vcf', 'v', 1, "character", |
|
11 |
+'vcf', 'b', 1, "character", |
|
11 | 12 |
'rds', 'r', 1, "character", |
12 | 13 |
'genome', 'g', 1, "character", |
13 | 14 |
'gcgene', 'c', 1, "character", |
... | ... |
@@ -31,6 +32,11 @@ if ( !is.null(opt$help) ) { |
31 | 32 |
q(status=1) |
32 | 33 |
} |
33 | 34 |
|
35 |
+if (!is.null(opt$version)) { |
|
36 |
+ message(as.character(packageVersion("PureCN"))) |
|
37 |
+ q(status=1) |
|
38 |
+} |
|
39 |
+ |
|
34 | 40 |
force <- !is.null(opt$force) |
35 | 41 |
post.optimize <- !is.null(opt$postoptimize) |
36 | 42 |
normal.coverage.file <- opt$normal |
... | ... |
@@ -54,7 +60,9 @@ file.rds <- opt$rds |
54 | 60 |
if (!is.null(file.rds) && file.exists(file.rds)) { |
55 | 61 |
if (is.null(outdir)) outdir <- dirname(file.rds) |
56 | 62 |
} else { |
57 |
- if (is.null(sampleid)) stop("Need sampleid.") |
|
63 |
+ if (is.null(sampleid)) stop("Need --sampleid.") |
|
64 |
+ if (is.null(genome)) stop("Need --genome") |
|
65 |
+ genome <- as.character(genome) |
|
58 | 66 |
file.rds <- file.path(outdir, paste0(sampleid, '_purecn.rds')) |
59 | 67 |
if (is.null(seg.file)) { |
60 | 68 |
tumor.coverage.file <- normalizePath(tumor.coverage.file, |
... | ... |
@@ -94,6 +102,7 @@ if (file.exists(file.rds) && !force) { |
94 | 102 |
} else if (is.null(normal.coverage.file) && is.null(seg.file)) { |
95 | 103 |
stop("Need either normalDB or normal.coverage.file") |
96 | 104 |
} |
105 |
+ file.log <- file.path(outdir, paste0(sampleid, '_purecn.log')) |
|
97 | 106 |
|
98 | 107 |
pdf(paste(outdir,"/", sampleid, '_purecn_segmentation.pdf', sep=''), |
99 | 108 |
width=10, height=11) |
... | ... |
@@ -107,7 +116,7 @@ if (file.exists(file.rds) && !force) { |
107 | 116 |
args.setMappingBiasVcf= |
108 | 117 |
list(normal.panel.vcf.file=normal.panel.vcf.file), |
109 | 118 |
normalDB=normalDB, model.homozygous=model.homozygous, |
110 |
- post.optimize=post.optimize) |
|
119 |
+ log.file=file.log, post.optimize=post.optimize) |
|
111 | 120 |
dev.off() |
112 | 121 |
saveRDS(ret, file=file.rds) |
113 | 122 |
} |
... | ... |
@@ -1,9 +1,9 @@ |
1 | 1 |
test_getSexFromCoverage <- function() { |
2 | 2 |
tumor.coverage.file <- system.file("extdata", "example_tumor.txt", package="PureCN") |
3 | 3 |
coverage <- readCoverageGatk(tumor.coverage.file) |
4 |
- sex <- getSexFromCoverage(coverage, verbose=FALSE) |
|
4 |
+ sex <- getSexFromCoverage(coverage) |
|
5 | 5 |
checkTrue(is.na(sex)) |
6 |
- sex <- getSexFromCoverage(tumor.coverage.file, verbose=FALSE) |
|
6 |
+ sex <- getSexFromCoverage(tumor.coverage.file) |
|
7 | 7 |
checkTrue(is.na(sex)) |
8 | 8 |
|
9 | 9 |
chr22 <- coverage[which(coverage$chr=="chr22"),] |
... | ... |
@@ -240,7 +240,7 @@ test_runAbsoluteCN <- function() { |
240 | 240 |
|
241 | 241 |
# test with a log.ratio and no tumor file |
242 | 242 |
log.ratio <- calculateLogRatio(readCoverageGatk(normal.coverage.file), |
243 |
- readCoverageGatk(tumor.coverage.file), verbose=FALSE) |
|
243 |
+ readCoverageGatk(tumor.coverage.file)) |
|
244 | 244 |
|
245 | 245 |
ret <- runAbsoluteCN( log.ratio=log.ratio, |
246 | 246 |
gc.gene.file=gc.gene.file, |
... | ... |
@@ -4,7 +4,7 @@ |
4 | 4 |
\alias{calculateLogRatio} |
5 | 5 |
\title{Calculate coverage log-ratio of tumor vs. normal} |
6 | 6 |
\usage{ |
7 |
-calculateLogRatio(normal, tumor, verbose = TRUE) |
|
7 |
+calculateLogRatio(normal, tumor) |
|
8 | 8 |
} |
9 | 9 |
\arguments{ |
10 | 10 |
\item{normal}{Normal coverage read in by the \code{\link{readCoverageGatk}} |
... | ... |
@@ -12,8 +12,6 @@ function.} |
12 | 12 |
|
13 | 13 |
\item{tumor}{Tumor coverage read in by the \code{\link{readCoverageGatk}} |
14 | 14 |
function.} |
15 |
- |
|
16 |
-\item{verbose}{Verbose output.} |
|
17 | 15 |
} |
18 | 16 |
\value{ |
19 | 17 |
\code{numeric(nrow(tumor))}, tumor vs. normal copy number log-ratios |
... | ... |
@@ -33,7 +31,7 @@ tumor.coverage.file <- system.file("extdata", "example_tumor.txt", |
33 | 31 |
package="PureCN") |
34 | 32 |
normal <- readCoverageGatk(normal.coverage.file) |
35 | 33 |
tumor <- readCoverageGatk(tumor.coverage.file) |
36 |
-log.ratio <- calculateLogRatio(normal, tumor, verbose=FALSE) |
|
34 |
+log.ratio <- calculateLogRatio(normal, tumor) |
|
37 | 35 |
|
38 | 36 |
} |
39 | 37 |
\author{ |
... | ... |
@@ -70,7 +70,7 @@ legend("bottomright", legend=paste("Purity", purity), fill=seq_along(purity)) |
70 | 70 |
Markus Riester |
71 | 71 |
} |
72 | 72 |
\references{ |
73 |
-Carter et al., Absolute quantification of somatic DNA |
|
74 |
-alterations in human cancer. Nature Biotechnology 2012. |
|
73 |
+Carter et al. (2012), Absolute quantification of somatic DNA |
|
74 |
+alterations in human cancer. Nature Biotechnology. |
|
75 | 75 |
} |
76 | 76 |
|
... | ... |
@@ -5,7 +5,7 @@ |
5 | 5 |
\title{Calculate target weights} |
6 | 6 |
\usage{ |
7 | 7 |
createTargetWeights(tumor.coverage.files, normal.coverage.files, |
8 |
- target.weight.file, verbose = TRUE) |
|
8 |
+ target.weight.file) |
|
9 | 9 |
} |
10 | 10 |
\arguments{ |
11 | 11 |
\item{tumor.coverage.files}{A small number (1-3) of GATK tumor or normal |
... | ... |
@@ -16,8 +16,6 @@ coverage samples.} |
16 | 16 |
with files in \code{tumor.coverage.files}.} |
17 | 17 |
|
18 | 18 |
\item{target.weight.file}{Output filename.} |
19 |
- |
|
20 |
-\item{verbose}{Verbose output.} |
|
21 | 19 |
} |
22 | 20 |
\value{ |
23 | 21 |
A \code{data.frame} with target weights. |
... | ... |
@@ -5,8 +5,7 @@ |
5 | 5 |
\title{Remove low quality targets} |
6 | 6 |
\usage{ |
7 | 7 |
filterTargets(log.ratio, tumor, gc.data, seg.file, filter.lowhigh.gc = 0.001, |
8 |
- min.targeted.base = 5, normalDB = NULL, normalDB.min.coverage = 0.2, |
|
9 |
- verbose) |
|
8 |
+ min.targeted.base = 5, normalDB = NULL, normalDB.min.coverage = 0.2) |
|
10 | 9 |
} |
11 | 10 |
\arguments{ |
12 | 11 |
\item{log.ratio}{Copy number log-ratios, one for each target or interval in |
... | ... |
@@ -33,8 +32,6 @@ likely very different from the true GC content of the probes.} |
33 | 32 |
|
34 | 33 |
\item{normalDB.min.coverage}{Exclude targets with coverage lower than 20 |
35 | 34 |
percent of the chromosome median in the pool of normals.} |
36 |
- |
|
37 |
-\item{verbose}{Verbose output.} |
|
38 | 35 |
} |
39 | 36 |
\value{ |
40 | 37 |
\code{logical(length(log.ratio))} specifying which targets should be |
... | ... |
@@ -6,10 +6,10 @@ |
6 | 6 |
\usage{ |
7 | 7 |
filterVcfBasic(vcf, tumor.id.in.vcf = NULL, use.somatic.status = TRUE, |
8 | 8 |
snp.blacklist = NULL, af.range = c(0.03, 0.97), |
9 |
- contamination.cutoff = c(0.05, 0.075), min.coverage = 15, |
|
9 |
+ contamination.cutoff = c(0.075, 0.02), min.coverage = 15, |
|
10 | 10 |
min.base.quality = 25, min.supporting.reads = NULL, error = 0.001, |
11 | 11 |
target.granges = NULL, remove.off.target.snvs = TRUE, |
12 |
- model.homozygous = FALSE, interval.padding = 50, verbose = TRUE) |
|
12 |
+ model.homozygous = FALSE, interval.padding = 50) |
|
13 | 13 |
} |
14 | 14 |
\arguments{ |
15 | 15 |
\item{vcf}{\code{CollapsedVCF} object, read in with the \code{readVcf} |
... | ... |
@@ -34,8 +34,9 @@ contamination. If a matched normal is available, this value is ignored, |
34 | 34 |
because homozygosity can be confirmed in the normal.} |
35 | 35 |
|
36 | 36 |
\item{contamination.cutoff}{Count SNPs in dbSNP with allelic fraction |
37 |
-smaller than the first value, if found on most chromosomes, remove all with |
|
38 |
-AF smaller than the second value.} |
|
37 |
+smaller than the first value or greater than 1-first value, if found on most chromosomes, mark sample |
|
38 |
+as contaminated if the fraction of putative contamination SNPs exceeds |
|
39 |
+the second value.} |
|
39 | 40 |
|
40 | 41 |
\item{min.coverage}{Minimum coverage in tumor. Variants with lower coverage |
41 | 42 |
are ignored.} |
... | ... |
@@ -62,8 +63,6 @@ SNPs. Ignored in case a matched normal is provided in the VCF.} |
62 | 63 |
|
63 | 64 |
\item{interval.padding}{Include variants in the interval flanking regions of |
64 | 65 |
the specified size in bp. Requires \code{target.granges}.} |
65 |
- |
|
66 |
-\item{verbose}{Verbose output.} |
|
67 | 66 |
} |
68 | 67 |
\value{ |
69 | 68 |
A list with elements \item{vcf}{The filtered \code{CollapsedVCF} |
... | ... |
@@ -8,7 +8,7 @@ filterVcfMuTect(vcf, tumor.id.in.vcf = NULL, stats.file = NULL, |
8 | 8 |
ignore = c("clustered_read_position", "fstar_tumor_lod", |
9 | 9 |
"nearby_gap_events", "poor_mapping_region_alternate_allele_mapq", |
10 | 10 |
"poor_mapping_region_mapq0", "possible_contamination", "strand_artifact", |
11 |
- "seen_in_panel_of_normals"), verbose = TRUE, ...) |
|
11 |
+ "seen_in_panel_of_normals"), ...) |
|
12 | 12 |
} |
13 | 13 |
\arguments{ |
14 | 14 |
\item{vcf}{\code{CollapsedVCF} object, read in with the \code{readVcf} |
... | ... |
@@ -20,8 +20,6 @@ function from the VariantAnnotation package.} |
20 | 20 |
|
21 | 21 |
\item{ignore}{MuTect flags that mark variants for exclusion.} |
22 | 22 |
|
23 |
-\item{verbose}{Verbose output.} |
|
24 |
- |
|
25 | 23 |
\item{\dots}{Additional arguments passed to \code{\link{filterVcfBasic}}.} |
26 | 24 |
} |
27 | 25 |
\value{ |
... | ... |
@@ -6,8 +6,7 @@ |
6 | 6 |
\usage{ |
7 | 7 |
findBestNormal(tumor.coverage.file, normalDB, pcs = 1:3, num.normals = 1, |
8 | 8 |
ignore.sex = FALSE, sex = NULL, normal.coverage.files = NULL, |
9 |
- pool = FALSE, pool.weights = c("voom", "equal"), plot.pool = FALSE, |
|
10 |
- verbose = TRUE, ...) |
|
9 |
+ pool = FALSE, pool.weights = c("voom", "equal"), plot.pool = FALSE, ...) |
|
11 | 10 |
} |
12 | 11 |
\arguments{ |
13 | 12 |
\item{tumor.coverage.file}{GATK coverage file of a tumor sample.} |
... | ... |
@@ -39,8 +38,6 @@ weight all best normals equally.} |
39 | 38 |
|
40 | 39 |
\item{plot.pool}{Allows the pooling function to create plots.} |
41 | 40 |
|
42 |
-\item{verbose}{Verbose output.} |
|
43 |
- |
|
44 | 41 |
\item{\dots}{Additional arguments passed to \code{\link{poolCoverage}}.} |
45 | 42 |
} |
46 | 43 |
\value{ |
... | ... |
@@ -5,7 +5,7 @@ |
5 | 5 |
\title{Get sample sex from coverage} |
6 | 6 |
\usage{ |
7 | 7 |
getSexFromCoverage(coverage.file, min.ratio = 25, min.ratio.na = 20, |
8 |
- remove.outliers = TRUE, verbose = TRUE) |
|
8 |
+ remove.outliers = TRUE) |
|
9 | 9 |
} |
10 | 10 |
\arguments{ |
11 | 11 |
\item{coverage.file}{GATK coverage file or data read with |
... | ... |
@@ -23,8 +23,6 @@ be considered when setting cutoffs.} |
23 | 23 |
|
24 | 24 |
\item{remove.outliers}{Removes coverage outliers before calculating mean |
25 | 25 |
chromosome coverages.} |
26 |
- |
|
27 |
-\item{verbose}{Verbose output.} |
|
28 | 26 |
} |
29 | 27 |
\value{ |
30 | 28 |
Returns a \code{character(1)} with \code{M} for male, \code{F} for |
... | ... |
@@ -5,8 +5,7 @@ |
5 | 5 |
\title{Get sample sex from a VCF file} |
6 | 6 |
\usage{ |
7 | 7 |
getSexFromVcf(vcf, tumor.id.in.vcf = NULL, min.or = 4, min.or.na = 2.5, |
8 |
- max.pv = 0.001, homozygous.cutoff = 0.95, af.cutoff = 0.2, |
|
9 |
- verbose = TRUE) |
|
8 |
+ max.pv = 0.001, homozygous.cutoff = 0.95, af.cutoff = 0.2) |
|
10 | 9 |
} |
11 | 10 |
\arguments{ |
12 | 11 |
\item{vcf}{CollapsedVCF object, read in with the \code{readVcf} function |
... | ... |
@@ -29,8 +28,6 @@ homozygous.} |
29 | 28 |
|
30 | 29 |
\item{af.cutoff}{Remove all SNVs with allelic fraction lower than the |
31 | 30 |
specified value.} |
32 |
- |
|
33 |
-\item{verbose}{Verbose output.} |
|
34 | 31 |
} |
35 | 32 |
\value{ |
36 | 33 |
Returns a \code{character(1)} with \code{M} for male, \code{F} for |
... | ... |
@@ -6,7 +6,7 @@ |
6 | 6 |
\usage{ |
7 | 7 |
readCurationFile(file.rds, file.curation = gsub(".rds$", ".csv", file.rds), |
8 | 8 |
remove.failed = FALSE, report.best.only = FALSE, min.ploidy = NULL, |
9 |
- max.ploidy = NULL, verbose = FALSE) |
|
9 |
+ max.ploidy = NULL) |
|
10 | 10 |
} |
11 | 11 |
\arguments{ |
12 | 12 |
\item{file.rds}{Output of the \code{\link{runAbsoluteCN}} function, |
... | ... |
@@ -25,8 +25,6 @@ be used to automatically ignore unlikely solutions.} |
25 | 25 |
|
26 | 26 |
\item{max.ploidy}{Maximum ploidy to be considered. If \code{NULL}, all. Can |
27 | 27 |
be used to automatically ignore unlikely solutions.} |
28 |
- |
|
29 |
-\item{verbose}{Verbose output.} |
|
30 | 28 |
} |
31 | 29 |
\value{ |
32 | 30 |
The return value of the corresponding \code{\link{runAbsoluteCN}} |
... | ... |
@@ -20,10 +20,11 @@ runAbsoluteCN(normal.coverage.file = NULL, tumor.coverage.file = NULL, |
20 | 20 |
max.coverage.vcf = 300, max.non.clonal = 0.2, |
21 | 21 |
max.homozygous.loss = c(0.05, 1e+07), non.clonal.M = 1/3, |
22 | 22 |
max.mapping.bias = 0.8, iterations = 30, log.ratio.calibration = 0.25, |
23 |
- remove.off.target.snvs = NULL, model.homozygous = FALSE, error = 0.001, |
|
24 |
- gc.gene.file = NULL, max.dropout = c(0.95, 1.1), max.logr.sdev = 0.75, |
|
25 |
- max.segments = 300, min.gof = 0.8, plot.cnv = TRUE, |
|
26 |
- cosmic.vcf.file = NULL, post.optimize = FALSE, verbose = TRUE) |
|
23 |
+ smooth.log.ratio = TRUE, remove.off.target.snvs = NULL, |
|
24 |
+ model.homozygous = FALSE, error = 0.001, gc.gene.file = NULL, |
|
25 |
+ max.dropout = c(0.95, 1.1), max.logr.sdev = 0.75, max.segments = 300, |
|
26 |
+ min.gof = 0.8, plot.cnv = TRUE, cosmic.vcf.file = NULL, |
|
27 |
+ post.optimize = FALSE, log.file = NULL, verbose = TRUE) |
|
27 | 28 |
} |
28 | 29 |
\arguments{ |
29 | 30 |
\item{normal.coverage.file}{GATK coverage file of normal control (optional |
... | ... |
@@ -85,7 +86,7 @@ to \code{\link{filterVcfMuTect}}, which in turn also calls |
85 | 86 |
|
86 | 87 |
\item{args.filterVcf}{Arguments for variant filtering function. Arguments |
87 | 88 |
\code{vcf}, \code{tumor.id.in.vcf}, \code{min.coverage}, |
88 |
-\code{model.homozygous}, \code{error} and \code{verbose} are required in the |
|
89 |
+\code{model.homozygous} and \code{error} are required in the |
|
89 | 90 |
filter function and are automatically set.} |
90 | 91 |
|
91 | 92 |
\item{fun.setPriorVcf}{Function to set prior for somatic status for each |
... | ... |
@@ -103,17 +104,17 @@ coverage files. Needs to return a \code{logical} vector whether an interval |
103 | 104 |
should be used for segmentation. Defaults to \code{\link{filterTargets}}.} |
104 | 105 |
|
105 | 106 |
\item{args.filterTargets}{Arguments for target filtering function. Arguments |
106 |
-\code{log.ratio}, \code{tumor}, \code{gc.data}, \code{seg.file}, |
|
107 |
-\code{normalDB} and \code{verbose} are required and automatically set} |
|
107 |
+\code{log.ratio}, \code{tumor}, \code{gc.data}, \code{seg.file} and |
|
108 |
+\code{normalDB} are required and automatically set} |
|
108 | 109 |
|
109 | 110 |
\item{fun.segmentation}{Function for segmenting the copy number log-ratios. |
110 | 111 |
Expected return value is a \code{data.frame} representation of the |
111 | 112 |
segmentation. Defaults to \code{\link{segmentationCBS}}.} |
112 | 113 |
|
113 | 114 |
\item{args.segmentation}{Arguments for segmentation function. Arguments |
114 |
-\code{normal}, \code{tumor}, \code{log.ratio}, \code{plot.cnv}, |
|
115 |
+\code{normal}, \code{tumor}, \code{log.ratio}, \code{plot.cnv} and |
|
115 | 116 |
\code{min.coverage}, \code{sampleid}, \code{vcf}, \code{tumor.id.in.vcf}, |
116 |
-\code{centromeres} and \code{verbose} are required in the segmentation function |
|
117 |
+\code{centromeres} are required in the segmentation function |
|
117 | 118 |
and automatically set.} |
118 | 119 |
|
119 | 120 |
\item{fun.focal}{Function for identifying focal amplifications. Defaults to |
... | ... |
@@ -187,6 +188,9 @@ that should converge quickly. Allowed range is 10 to 250.} |
187 | 188 |
\item{log.ratio.calibration}{Re-calibrate log-ratios in the window |
188 | 189 |
\code{sd(log.ratio)*log.ratio.calibration}.} |
189 | 190 |
|
191 |
+\item{smooth.log.ratio}{Smooth \code{log.ratio} using the \code{DNAcopy} |
|
192 |
+package.} |
|
193 |
+ |
|
190 | 194 |
\item{remove.off.target.snvs}{Deprecated. Use the corresponding argument in |
191 | 195 |
\code{args.filterVcf}.} |
192 | 196 |
|
... | ... |
@@ -238,6 +242,8 @@ typically result in a slightly more accurate purity, especially for rather |
238 | 242 |
silent genomes or very low purities. Otherwise, it will just use the purity |
239 | 243 |
determined via the SCNA-fit.} |
240 | 244 |
|
245 |
+\item{log.file}{If not \code{NULL}, store verbose output to file.} |
|
246 |
+ |
|
241 | 247 |
\item{verbose}{Verbose output.} |
242 | 248 |
} |
243 | 249 |
\value{ |
... | ... |
@@ -292,6 +298,14 @@ res <- runAbsoluteCN(seg.file=seg.file, fun.segmentation=funSeg, max.ploidy = 4, |
292 | 298 |
\author{ |
293 | 299 |
Markus Riester |
294 | 300 |
} |
301 |
+\references{ |
|
302 |
+Riester et al. (2016). PureCN: Copy number calling and SNV |
|
303 |
+classification using targeted short read sequencing. Source Code for Biology |
|
304 |
+and Medicine, 11, pp. 13. |
|
305 |
+ |
|
306 |
+Carter et al. (2012), Absolute quantification of somatic DNA alterations in |
|
307 |
+human cancer. Nature Biotechnology. |
|
308 |
+} |
|
295 | 309 |
\seealso{ |
296 | 310 |
\code{\link{correctCoverageBias}} \code{\link{segmentationCBS}} |
297 | 311 |
\code{\link{calculatePowerDetectSomatic}} |
... | ... |
@@ -8,7 +8,7 @@ segmentationCBS(normal, tumor, log.ratio, seg, plot.cnv, min.coverage, sampleid, |
8 | 8 |
target.weight.file = NULL, alpha = 0.005, undo.SD = NULL, vcf = NULL, |
9 | 9 |
tumor.id.in.vcf = 1, normal.id.in.vcf = NULL, max.segments = NULL, |
10 | 10 |
prune.hclust.h = NULL, prune.hclust.method = "ward.D", chr.hash = NULL, |
11 |
- centromeres = NULL, verbose = TRUE) |
|
11 |
+ centromeres = NULL) |
|
12 | 12 |
} |
13 | 13 |
\arguments{ |
14 | 14 |
\item{normal}{GATK coverage data for normal sample.} |
... | ... |
@@ -60,8 +60,6 @@ properly ordered.} |
60 | 60 |
|
61 | 61 |
\item{centromeres}{A \code{data.frame} with centromere positions in first |
62 | 62 |
three columns. Currently not supported in this function.} |
63 |
- |
|
64 |
-\item{verbose}{Verbose output.} |
|
65 | 63 |
} |
66 | 64 |
\value{ |
67 | 65 |
\code{data.frame} containing the segmentation. |
... | ... |
@@ -95,6 +93,15 @@ ret <-runAbsoluteCN(normal.coverage.file=normal.coverage.file, |
95 | 93 |
\author{ |
96 | 94 |
Markus Riester |
97 | 95 |
} |
96 |
+\references{ |
|
97 |
+Olshen, A. B., Venkatraman, E. S., Lucito, R., Wigler, M. |
|
98 |
+(2004). Circular binary segmentation for the analysis of array-based DNA |
|
99 |
+copy number data. Biostatistics 5: 557-572. |
|
100 |
+ |
|
101 |
+Venkatraman, E. S., Olshen, A. B. (2007). A faster circular binary |
|
102 |
+segmentation algorithm for the analysis of array CGH data. Bioinformatics |
|
103 |
+23: 657-63. |
|
104 |
+} |
|
98 | 105 |
\seealso{ |
99 | 106 |
\code{\link{runAbsoluteCN}} |
100 | 107 |
} |
... | ... |
@@ -6,10 +6,10 @@ |
6 | 6 |
\usage{ |
7 | 7 |
segmentationPSCBS(normal, tumor, log.ratio, seg, plot.cnv, min.coverage, |
8 | 8 |
sampleid, target.weight.file = NULL, alpha = 0.005, undo.SD = NULL, |
9 |
- drop.outliers = TRUE, flavor = "tcn&dh", tauA = 0.03, vcf = NULL, |
|
10 |
- tumor.id.in.vcf = 1, normal.id.in.vcf = NULL, max.segments = NULL, |
|
11 |
- prune.hclust.h = NULL, prune.hclust.method = "ward.D", chr.hash = NULL, |
|
12 |
- centromeres = NULL, verbose = TRUE, ...) |
|
9 |
+ flavor = "tcn&dh", tauA = 0.03, vcf = NULL, tumor.id.in.vcf = 1, |
|
10 |
+ normal.id.in.vcf = NULL, max.segments = NULL, prune.hclust.h = NULL, |
|
11 |
+ prune.hclust.method = "ward.D", chr.hash = NULL, centromeres = NULL, |
|
12 |
+ ...) |
|
13 | 13 |
} |
14 | 14 |
\arguments{ |
15 | 15 |
\item{normal}{GATK coverage data for normal sample.} |
... | ... |
@@ -38,9 +38,6 @@ function.} |
38 | 38 |
\item{undo.SD}{\code{undo.SD} for CBS, see documentation of the |
39 | 39 |
\code{segment} function. If \code{NULL}, try to find a sensible default.} |
40 | 40 |
|
41 |
-\item{drop.outliers}{If \code{TRUE}, calls the |
|
42 |
-\code{dropSegmentationOutliers} function from PSCBS before segmentation.} |
|
43 |
- |
|
44 | 41 |
\item{flavor}{Flavor value for PSCBS. See \code{segmentByNonPairedPSCBS}.} |
45 | 42 |
|
46 | 43 |
\item{tauA}{tauA argument for PSCBS. See \code{segmentByNonPairedPSCBS}.} |
... | ... |
@@ -70,8 +67,6 @@ properly ordered.} |
70 | 67 |
\item{centromeres}{A \code{data.frame} with centromere positions in first |
71 | 68 |
three columns. If not \code{NULL}, add breakpoints at centromeres.} |
72 | 69 |
|
73 |
-\item{verbose}{Verbose output.} |
|
74 |
- |
|
75 | 70 |
\item{\dots}{Additional parameters passed to the |
76 | 71 |
\code{segmentByNonPairedPSCBS} function.} |
77 | 72 |
} |
... | ... |
@@ -108,6 +103,18 @@ gc.gene.file <- system.file("extdata", "example_gc.gene.file.txt", |
108 | 103 |
\author{ |
109 | 104 |
Markus Riester |
110 | 105 |
} |
106 |
+\references{ |
|
107 |
+Olshen, A. B., Venkatraman, E. S., Lucito, R., Wigler, M. |
|
108 |
+(2004). Circular binary segmentation for the analysis of array-based DNA |
|
109 |
+copy number data. Biostatistics 5: 557-572. |
|
110 |
+ |
|
111 |
+Venkatraman, E. S., Olshen, A. B. (2007). A faster circular binary |
|
112 |
+segmentation algorithm for the analysis of array CGH data. Bioinformatics |
|
113 |
+23: 657-63. |
|
114 |
+ |
|
115 |
+Olshen et al. (2011). Parent-specific copy number in paired tumor-normal |
|
116 |
+studies using circular binary segmentation. Bioinformatics. |
|
117 |
+} |
|
111 | 118 |
\seealso{ |
112 | 119 |
\code{\link{runAbsoluteCN}} |
113 | 120 |
} |
... | ... |
@@ -5,7 +5,7 @@ |
5 | 5 |
\title{Set Mapping Bias VCF} |
6 | 6 |
\usage{ |
7 | 7 |
setMappingBiasVcf(vcf, tumor.id.in.vcf = NULL, normal.panel.vcf.file = NULL, |
8 |
- min.normals = 5, smooth = TRUE, smooth.n = 5, verbose = TRUE) |
|
8 |
+ min.normals = 5, smooth = TRUE, smooth.n = 5) |
|
9 | 9 |
} |
10 | 10 |
\arguments{ |
11 | 11 |
\item{vcf}{\code{CollapsedVCF} object, read in with the \code{readVcf} |
... | ... |
@@ -26,8 +26,6 @@ calculating position-specific mapping bias. Requires |
26 | 26 |
smoothing of neighboring SNPs. Requires \code{normal.panel.vcf.file}.} |
27 | 27 |
|
28 | 28 |
\item{smooth.n}{Number of neighboring variants used for smoothing.} |
29 |
- |
|
30 |
-\item{verbose}{Verbose output.} |
|
31 | 29 |
} |
32 | 30 |
\value{ |
33 | 31 |
A \code{numeric(nrow(vcf))} vector with the mapping bias of for each |
... | ... |
@@ -5,7 +5,7 @@ |
5 | 5 |
\title{Set Somatic Prior VCF} |
6 | 6 |
\usage{ |
7 | 7 |
setPriorVcf(vcf, prior.somatic = c(0.5, 5e-04, 0.999, 1e-04, 0.995, 0.01), |
8 |
- tumor.id.in.vcf = NULL, verbose = TRUE) |
|
8 |
+ tumor.id.in.vcf = NULL) |
|
9 | 9 |
} |
10 | 10 |
\arguments{ |
11 | 11 |
\item{vcf}{\code{CollapsedVCF} object, read in with the \code{readVcf} |
... | ... |
@@ -24,8 +24,6 @@ value is for the case that variant is in both dbSNP and COSMIC > 2.} |
24 | 24 |
|
25 | 25 |
\item{tumor.id.in.vcf}{Id of tumor in case multiple samples are stored in |
26 | 26 |
VCF.} |
27 |
- |
|
28 |
-\item{verbose}{Verbose output.} |
|
29 | 27 |
} |
30 | 28 |
\value{ |
31 | 29 |
A \code{numeric(nrow(vcf))} vector with the prior probability of |
... | ... |
@@ -326,10 +326,10 @@ present in 5 or more samples. |
326 | 326 |
We next use coverage data of normal samples to estimate the expected variance |
327 | 327 |
in coverage per target: |
328 | 328 |
|
329 |
-<<targetweightfile1>>= |
|
329 |
+<<targetweightfile1, message=FALSE>>= |
|
330 | 330 |
target.weight.file <- "target_weights.txt" |
331 | 331 |
createTargetWeights(tumor.coverage.file, normal.coverage.files, |
332 |
- target.weight.file, verbose=FALSE) |
|
332 |
+ target.weight.file) |
|
333 | 333 |
@ |
334 | 334 |
|
335 | 335 |
This function calculates target-level copy number log-ratios using all normal |
... | ... |
@@ -858,11 +858,11 @@ target-level copy number log-ratios, and \Biocpkg{PureCN} should be used for |
858 | 858 |
segmentation and purity/ploidy inference only, it is possible to provide these |
859 | 859 |
log-ratios: |
860 | 860 |
|
861 |
-<<customlogratio>>= |
|
861 |
+<<customlogratio, message=FALSE>>= |
|
862 | 862 |
# We still use the log-ratio exactly as normalized by PureCN for this |
863 | 863 |
# example |
864 | 864 |
log.ratio <- calculateLogRatio(readCoverageGatk(normal.coverage.file), |
865 |
- readCoverageGatk(tumor.coverage.file), verbose=FALSE) |
|
865 |
+ readCoverageGatk(tumor.coverage.file)) |
|
866 | 866 |
|
867 | 867 |
retLogRatio <- runAbsoluteCN(log.ratio=log.ratio, |
868 | 868 |
gc.gene.file=gc.gene.file, vcf.file=vcf.file, |
... | ... |
@@ -964,6 +964,7 @@ Rscript Coverage.R --outdir ~/tmp/ --gatkcoverage example_tumor.txt \ |
964 | 964 |
Argument name & Corresponding PureCN argument & PureCN function \\ |
965 | 965 |
\midrule |
966 | 966 |
--help -h & & \\ |
967 |
+--version -v & & \\ |
|
967 | 968 |
--force -f & & \\ |
968 | 969 |
--outdir -o & & \\ |
969 | 970 |
--bam -b & bam.file & \Rfunction{calculateBamCoverageByInterval} \\ |
... | ... |
@@ -1010,11 +1011,12 @@ Rscript PureCN.R --rds Sample1_purecn.rds |
1010 | 1011 |
Argument name & Corresponding PureCN argument & PureCN function \\ |
1011 | 1012 |
\midrule |
1012 | 1013 |
--help -h & & \\ |
1014 |
+--version -v & & \\ |
|
1013 | 1015 |
--force -f & & \\ |
1014 | 1016 |
--outdir -o & & \\ |
1015 | 1017 |
--normal -n & normal.coverage.file & \Rfunction{runAbsoluteCN} \\ |
1016 | 1018 |
--tumor -t & tumor.coverage.file & \Rfunction{runAbsoluteCN} \\ |
1019 |
+--vcf -b & vcf.file & \Rfunction{runAbsoluteCN} \\ |
|
1017 | 1020 |
--rds -r & file.rds & \Rfunction{readCurationFile} \\ |
1018 | 1021 |
--genome -g & genome & \Rfunction{runAbsoluteCN} \\ |
1019 | 1022 |
--gcgene -c & gc.gene.file & \Rfunction{runAbsoluteCN} \\ |