... | ... |
@@ -122,6 +122,7 @@ importFrom(graphics,strwidth) |
122 | 122 |
importFrom(graphics,symbols) |
123 | 123 |
importFrom(graphics,text) |
124 | 124 |
importFrom(gridExtra,grid.arrange) |
125 |
+importFrom(methods,is) |
|
125 | 126 |
importFrom(rhdf5,H5Fopen) |
126 | 127 |
importFrom(rtracklayer,import) |
127 | 128 |
importFrom(stats,C) |
... | ... |
@@ -329,7 +329,7 @@ c(test.num.copy, round(opt.C))[i], prior.K, mapping.bias.ok, seg.id, min.variant |
329 | 329 |
|
330 | 330 |
tmp <- sapply(prior.purity, .checkFraction, "prior.purity") |
331 | 331 |
|
332 |
- if (!is.null(sampleid) && (class(sampleid) != "character" || |
|
332 |
+ if (!is.null(sampleid) && (!is(sampleid, "character") || |
|
333 | 333 |
length(sampleid) != 1)) { |
334 | 334 |
.stopUserError("sampleid not a character string.") |
335 | 335 |
} |
... | ... |
@@ -838,7 +838,7 @@ c(test.num.copy, round(opt.C))[i], prior.K, mapping.bias.ok, seg.id, min.variant |
838 | 838 |
required.colnames2 <- c("ID", "chromosome", "start", "end", "num_probes", |
839 | 839 |
"mean") |
840 | 840 |
if (ncol(seg) > length(required.colnames)) { |
841 |
- seg <- seg[,1:length(required.colnames)] |
|
841 |
+ seg <- seg[,seq_along(required.colnames)] |
|
842 | 842 |
} |
843 | 843 |
if (identical(colnames(seg), required.colnames2)) { |
844 | 844 |
colnames(seg) <- required.colnames |
... | ... |
@@ -899,7 +899,7 @@ c(test.num.copy, round(opt.C))[i], prior.K, mapping.bias.ok, seg.id, min.variant |
899 | 899 |
flog.info("Provided log2-ratio looks too noisy, using segmentation only.") |
900 | 900 |
} |
901 | 901 |
} |
902 |
- if (class(seg.file)=="character") { |
|
902 |
+ if (is(seg.file, "character")) { |
|
903 | 903 |
seg <- .loadSegFile(seg.file, sampleid, model.homozygous, verbose=FALSE) |
904 | 904 |
} else { |
905 | 905 |
seg <-.checkSeg(seg.file, sampleid, model.homozygous, verbose=FALSE) |
... | ... |
@@ -32,11 +32,13 @@ annotateTargets <- function(x, txdb, org) { |
32 | 32 |
txdb <- .checkSeqlevelStyle(x, txdb, "txdb", "interval file") |
33 | 33 |
id <- transcriptsByOverlaps(txdb, ranges = x[idx], columns = "GENEID") |
34 | 34 |
id$SYMBOL <- suppressWarnings( |
35 |
- select(org, sapply(id$GENEID, function(x)x[1]), "SYMBOL")[, 2]) |
|
35 |
+ select(org, vapply(id$GENEID, function(x) x[1], character(1)), |
|
36 |
+ "SYMBOL")[, 2]) |
|
36 | 37 |
|
37 | 38 |
idExons <- exonsByOverlaps(txdb, ranges = x[idx], columns = "GENEID") |
38 | 39 |
idExons$SYMBOL <- suppressWarnings( |
39 |
- select(org, sapply(idExons$GENEID, function(x)x[1]), "SYMBOL")[, 2]) |
|
40 |
+ select(org, vapply(idExons$GENEID, function(x) x[1], character(1)), |
|
41 |
+ "SYMBOL")[, 2]) |
|
40 | 42 |
|
41 | 43 |
ov <- findOverlaps(x[idx], id) |
42 | 44 |
ovExons <- findOverlaps(x[idx], idExons) |
... | ... |
@@ -58,16 +58,16 @@ calculateBamCoverageByInterval <- function(bam.file, interval.file, |
58 | 58 |
x <- xDupFiltered |
59 | 59 |
if (keep.duplicates) x <- xAll |
60 | 60 |
|
61 |
- intervalGr$coverage <- sapply(seq_along(x), function(i) |
|
61 |
+ intervalGr$coverage <- vapply(seq_along(x), function(i) |
|
62 | 62 |
sum(coverage(IRanges(x[[i]][["pos"]], width = x[[i]][["qwidth"]]), |
63 |
- shift = -start(intervalGr)[i], width = width(intervalGr)[i]))) |
|
63 |
+ shift = -start(intervalGr)[i], width = width(intervalGr)[i])), integer(1)) |
|
64 | 64 |
|
65 | 65 |
intervalGr$average.coverage <- intervalGr$coverage / width(intervalGr) |
66 | 66 |
|
67 |
- intervalGr$counts <- as.numeric(sapply(x, function(y) length(y$pos))) |
|
67 |
+ intervalGr$counts <- as.numeric(vapply(x, function(y) length(y$pos), integer(1))) |
|
68 | 68 |
intervalGr$duplication.rate <- 1 - |
69 |
- sapply(xDupFiltered, function(y) length(y$pos)) / |
|
70 |
- sapply(xAll, function(y) length(y$pos)) |
|
69 |
+ vapply(xDupFiltered, function(y) length(y$pos), integer(1)) / |
|
70 |
+ vapply(xAll, function(y) length(y$pos), integer(1)) |
|
71 | 71 |
|
72 | 72 |
if (!is.null(output.file)) { |
73 | 73 |
.writeCoverage(intervalGr, output.file) |
... | ... |
@@ -29,7 +29,7 @@ |
29 | 29 |
callAlterations <- function(res, id = 1, cutoffs = c(0.5, 6, 7), |
30 | 30 |
log.ratio.cutoffs = c(-0.9, 0.9), failed = NULL, all.genes = FALSE) { |
31 | 31 |
|
32 |
- if (class(res$results[[id]]$gene.calls) != "data.frame") { |
|
32 |
+ if (!is(res$results[[id]]$gene.calls, "data.frame")) { |
|
33 | 33 |
.stopUserError("This function requires gene-level calls.\n", |
34 | 34 |
"Please add a column 'Gene' containing gene symbols to the ", |
35 | 35 |
"interval.file.") |
... | ... |
@@ -76,7 +76,7 @@ callLOH <- function(res, id = 1, arm.cutoff = 0.9) { |
76 | 76 |
|
77 | 77 |
.getCentromeres <- function(res) { |
78 | 78 |
# TODO remove this support for old data.frame centromeres in PureCN 1.12 |
79 |
- if (class(res$input$centromeres) == "GRanges" || |
|
79 |
+ if (is(res$input$centromeres, "GRanges") || |
|
80 | 80 |
is.null(res$input$centromeres)) { |
81 | 81 |
return(res$input$centromeres) |
82 | 82 |
} |
... | ... |
@@ -72,11 +72,11 @@ callMutationBurden <- function(res, id = 1, remove.flagged = TRUE, |
72 | 72 |
|
73 | 73 |
# calculate the callable genomic region for # mutations/MB calculation |
74 | 74 |
if (!is.null(callable)) { |
75 |
- if (class(callable) != "GRanges") { |
|
75 |
+ if (!is(callable, "GRanges")) { |
|
76 | 76 |
.stopUserError("callable not a GRanges object.") |
77 | 77 |
} |
78 | 78 |
if (!is.null(exclude)) { |
79 |
- if (class(exclude) != "GRanges") { |
|
79 |
+ if (!is(exclude, "GRanges")) { |
|
80 | 80 |
.stopUserError("exclude not a GRanges object.") |
81 | 81 |
} |
82 | 82 |
callable <- setdiff(callable, exclude) |
... | ... |
@@ -102,7 +102,7 @@ callMutationBurden <- function(res, id = 1, remove.flagged = TRUE, |
102 | 102 |
# filter mutations, for example if the user wants to |
103 | 103 |
# calculate missense burden |
104 | 104 |
if (!is.null(fun.countMutation)) { |
105 |
- if (class(fun.countMutation) != "function") { |
|
105 |
+ if (!is(fun.countMutation, "function")) { |
|
106 | 106 |
.stopUserError("fun.countMutation not a function.") |
107 | 107 |
} |
108 | 108 |
vcf <- res$input$vcf |
... | ... |
@@ -98,7 +98,7 @@ filterTargets <- function(...) { |
98 | 98 |
} |
99 | 99 |
|
100 | 100 |
.checkNormalDB <- function(tumor, normalDB) { |
101 |
- if (!class(normalDB) == "list") { |
|
101 |
+ if (!is(normalDB, "list")) { |
|
102 | 102 |
.stopUserError("normalDB not a valid normalDB object. ", |
103 | 103 |
"Use createNormalDatabase to create one.") |
104 | 104 |
} |
... | ... |
@@ -313,9 +313,9 @@ function(vcf, tumor.id.in.vcf, allowed=0.05) { |
313 | 313 |
.readAndCheckVcf <- function(vcf.file, genome, DB.info.flag = "DB", |
314 | 314 |
POPAF.info.field = "POP_AF", |
315 | 315 |
min.pop.af = 0.001, check.DB = TRUE) { |
316 |
- if (class(vcf.file) == "character") { |
|
316 |
+ if (is(vcf.file, "character")) { |
|
317 | 317 |
vcf <- readVcf(vcf.file, genome) |
318 |
- } else if (class(vcf.file) != "CollapsedVCF") { |
|
318 |
+ } else if (!is(vcf.file, "CollapsedVCF")) { |
|
319 | 319 |
.stopUserError("vcf.file neither a filename nor a CollapsedVCF ", |
320 | 320 |
"object.") |
321 | 321 |
} else { |
... | ... |
@@ -67,7 +67,7 @@ verbose=TRUE) { |
67 | 67 |
if (m == 0) return(1) |
68 | 68 |
dbinom(m, size=coverage, prob=error/3) |
69 | 69 |
} |
70 |
- k <- min(which(sapply(0:coverage,.pk) < fpr)) - 1 |
|
70 |
+ k <- min(which(vapply(seq(0, coverage),.pk, double(1)) < fpr)) - 1 |
|
71 | 71 |
if (verbose) message("Minimum ", k, " supporting reads.") |
72 | 72 |
|
73 | 73 |
# find allelic fraction to test |
... | ... |
@@ -44,7 +44,7 @@ predictSomatic <- function(res, id = 1, return.vcf = FALSE, |
44 | 44 |
} |
45 | 45 |
|
46 | 46 |
.addSymbols <- function(result) { |
47 |
- if (class(result$gene.calls) == "data.frame") { |
|
47 |
+ if (is(result$gene.calls, "data.frame")) { |
|
48 | 48 |
g.gr <- GRanges(result$gene.calls) |
49 | 49 |
p.gr <- GRanges(result$SNV.posterior$posteriors) |
50 | 50 |
ov <- findOverlaps(p.gr, g.gr) |
... | ... |
@@ -66,6 +66,7 @@ |
66 | 66 |
#' @importFrom GenomicRanges tileGenome |
67 | 67 |
#' @importFrom S4Vectors mcols |
68 | 68 |
#' @importFrom rtracklayer import |
69 |
+#' @importFrom methods is |
|
69 | 70 |
#' @importFrom stats aggregate |
70 | 71 |
preprocessIntervals <- function(interval.file, reference.file, |
71 | 72 |
output.file = NULL, off.target = FALSE, |
... | ... |
@@ -81,7 +82,7 @@ preprocessIntervals <- function(interval.file, reference.file, |
81 | 82 |
off.target.seqlevels=c("targeted", "all"), |
82 | 83 |
small.targets=c("resize", "drop")) { |
83 | 84 |
|
84 |
- if (class(interval.file)=="GRanges") { |
|
85 |
+ if (is(interval.file, "GRanges")) { |
|
85 | 86 |
interval.gr <- .checkIntervals(interval.file) |
86 | 87 |
} else { |
87 | 88 |
interval.gr <- readCoverageFile(interval.file) |
... | ... |
@@ -239,7 +240,7 @@ calculateGCContentByInterval <- function() { |
239 | 240 |
|
240 | 241 |
.checkColScore <- function(y, label) { |
241 | 242 |
colScore <- if (is.null(y$score)) 1 else "score" |
242 |
- if (class(mcols(y)[, colScore]) != "numeric") { |
|
243 |
+ if (!is(mcols(y)[, colScore], "numeric")) { |
|
243 | 244 |
flog.warn("Score column in %s file is not numeric.", label) |
244 | 245 |
class(mcols(y)[, colScore]) <- "numeric" |
245 | 246 |
} |
... | ... |
@@ -332,10 +333,10 @@ calculateGCContentByInterval <- function() { |
332 | 333 |
.checkSeqlevelStyle <- function(ref, x, name1, name2="reference") { |
333 | 334 |
refSeqlevelStyle <- try(seqlevelsStyle(ref), silent=TRUE) |
334 | 335 |
# if unknown, we cannot check and correct |
335 |
- if (class(refSeqlevelStyle) == "try-error") return(x) |
|
336 |
+ if (is(refSeqlevelStyle, "try-error")) return(x) |
|
336 | 337 |
xSeqlevelStyle <- try(seqlevelsStyle(x), silent=TRUE) |
337 | 338 |
|
338 |
- if (class(xSeqlevelStyle) == "try-error") { |
|
339 |
+ if (is(xSeqlevelStyle, "try-error")) { |
|
339 | 340 |
.stopUserError("Chromosome naming style of ", name1, |
340 | 341 |
" file unknown, should be ", refSeqlevelStyle, ".") |
341 | 342 |
} |
... | ... |
@@ -105,10 +105,12 @@ processMultipleSamples <- function(tumor.coverage.files, sampleids, normalDB, |
105 | 105 |
lrsw <- copynumber::winsorize(lrs, arms = arms, verbose = FALSE) |
106 | 106 |
if (is.null(w)) { |
107 | 107 |
w <- 1 |
108 |
- dupr <- sapply(tumors, function(x) median(x[x$on.target]$duplication.rate, na.rm = TRUE)) |
|
108 |
+ dupr <- vapply(tumors, function(x) |
|
109 |
+ median(x[x$on.target]$duplication.rate, na.rm = TRUE), |
|
110 |
+ double(1)) |
|
109 | 111 |
if (!sum(is.na(dupr)) && min(dupr, na.rm = TRUE) > 0) { |
110 |
- w <- (1/dupr) |
|
111 |
- w <- w/max(w) |
|
112 |
+ w <- (1 / dupr) |
|
113 |
+ w <- w / max(w) |
|
112 | 114 |
|
113 | 115 |
flog.info("Setting weights by duplication rate. Lowest weight for %s (%.2f), heighest for %s.", |
114 | 116 |
sampleids[which.min(w)], min(w), sampleids[which.max(w)]) |
... | ... |
@@ -626,13 +626,14 @@ runAbsoluteCN <- function(normal.coverage.file = NULL, |
626 | 626 |
|
627 | 627 |
# estimate stand. dev. for target logR within targets. this will be used as proxy |
628 | 628 |
# for sample error. |
629 |
- targetsPerSegment <- sapply(exon.lrs, length) |
|
629 |
+ targetsPerSegment <- vapply(exon.lrs, length, double(1)) |
|
630 | 630 |
|
631 | 631 |
if (!sum(targetsPerSegment > 50, na.rm = TRUE)) { |
632 | 632 |
.stopRuntimeError("Only tiny segments.") |
633 | 633 |
} |
634 | 634 |
|
635 |
- sd.seg <- max(median(sapply(exon.lrs, sd), na.rm = TRUE), min.logr.sdev) |
|
635 |
+ sd.seg <- max(median(vapply(exon.lrs, sd, double(1)), na.rm = TRUE), |
|
636 |
+ min.logr.sdev) |
|
636 | 637 |
|
637 | 638 |
# if user provided seg file, then we do not have access to the log-ratios and |
638 | 639 |
# need to use the user provided noise estimate also, don't do outlier smoothing |
... | ... |
@@ -760,7 +761,8 @@ runAbsoluteCN <- function(normal.coverage.file = NULL, |
760 | 761 |
function(i) .calcLlikSegment(subclonal = subclonal[i], lr = exon.lrs[[i]] + |
761 | 762 |
log.ratio.offset[i], sd.seg = sd.seg, p = px, Ci = C[i], total.ploidy = total.ploidy, |
762 | 763 |
max.exon.ratio = max.exon.ratio), double(1))) |
763 |
- px.rij.s <- sapply(px.rij, sum, na.rm = TRUE) + log(prior.purity.local) |
|
764 |
+ px.rij.s <- vapply(px.rij, sum, na.rm = TRUE, double(1)) + |
|
765 |
+ log(prior.purity.local) |
|
764 | 766 |
|
765 | 767 |
if (simulated.annealing) |
766 | 768 |
px.rij.s <- px.rij.s * exp(iter/4) |
... | ... |
@@ -959,7 +961,8 @@ runAbsoluteCN <- function(normal.coverage.file = NULL, |
959 | 961 |
Ci = sol$ML.C[i], total.ploidy = px * (sum(sol$seg$size * sol$ML.C))/sum(sol$seg$size) + (1 - |
960 | 962 |
px) * 2, max.exon.ratio = max.exon.ratio), double(1))) |
961 | 963 |
|
962 |
- px.rij.s <- sapply(px.rij, sum, na.rm = TRUE) + log(pp) + vapply(res.snvllik, |
|
964 |
+ px.rij.s <- vapply(px.rij, sum, na.rm = TRUE, double(1)) + |
|
965 |
+ log(pp) + vapply(res.snvllik, |
|
963 | 966 |
function(x) x$llik, double(1)) |
964 | 967 |
idx <- which.max(px.rij.s) |
965 | 968 |
} |
... | ... |
@@ -998,14 +1001,14 @@ runAbsoluteCN <- function(normal.coverage.file = NULL, |
998 | 1001 |
nBefore <- length(results) |
999 | 1002 |
results <- .filterDuplicatedResults(results, purity.cutoff = 0.05) |
1000 | 1003 |
# bring back in original order for progress output |
1001 |
- results <- results[order(sapply(results, function(sol) sol$candidate.id))] |
|
1004 |
+ results <- results[order(vapply(results, function(sol) sol$candidate.id, integer(1)))] |
|
1002 | 1005 |
|
1003 | 1006 |
if (length(results) < nBefore) { |
1004 | 1007 |
flog.info("Skipping %i solutions that converged to the same optima.", |
1005 | 1008 |
nBefore - length(results)) |
1006 | 1009 |
} |
1007 |
- idxFailed <- sapply(results, function(sol) |
|
1008 |
- sol$fraction.subclonal > max.non.clonal) |
|
1010 |
+ idxFailed <- vapply(results, function(sol) |
|
1011 |
+ sol$fraction.subclonal > max.non.clonal, logical(1)) |
|
1009 | 1012 |
if (sum(is.na(idxFailed))) .stopRuntimeError("NAs in fraction.subclonal.") |
1010 | 1013 |
if (sum(idxFailed)) { |
1011 | 1014 |
flog.info("Skipping %i solutions exceeding max.non.clonal (%.2f).", |
... | ... |
@@ -1095,8 +1098,8 @@ runAbsoluteCN <- function(normal.coverage.file = NULL, |
1095 | 1098 |
test.num.copy = test.num.copy, |
1096 | 1099 |
sex = sex, sex.vcf = sex.vcf, chr.hash = chr.hash, centromeres = centromeres, |
1097 | 1100 |
args=list( |
1098 |
- filterVcf = args.filterVcf[sapply(args.filterVcf, object.size) < 1000], |
|
1099 |
- filterIntervals = args.filterIntervals[sapply(args.filterIntervals, object.size) < 1000]) |
|
1101 |
+ filterVcf = args.filterVcf[vapply(args.filterVcf, object.size, double(1)) < 1000], |
|
1102 |
+ filterIntervals = args.filterIntervals[vapply(args.filterIntervals, object.size, double(1)) < 1000]) |
|
1100 | 1103 |
) |
1101 | 1104 |
) |
1102 | 1105 |
} |
... | ... |
@@ -203,7 +203,7 @@ segmentationCBS <- function(normal, tumor, log.ratio, seg, plot.cnv, |
203 | 203 |
sum(queryHits(ov) == i)) |
204 | 204 |
dx <- cbind(seg$seg.mean,xx) |
205 | 205 |
hc <- hclust(dist(dx), method=method) |
206 |
- seg.hc <- data.frame(id=1:nrow(dx), dx, num=numVariants, |
|
206 |
+ seg.hc <- data.frame(id=seq(nrow(dx)), dx, num=numVariants, |
|
207 | 207 |
cluster=cutree(hc,h=h))[hc$order,] |
208 | 208 |
|
209 | 209 |
# cluster only segments with at least n variants |
... | ... |
@@ -244,7 +244,7 @@ segmentationCBS <- function(normal, tumor, log.ratio, seg, plot.cnv, |
244 | 244 |
.pruneByVCF <- function(x, vcf, tumor.id.in.vcf, min.size=5, max.pval=0.00001, |
245 | 245 |
iterations=3, chr.hash, debug=FALSE) { |
246 | 246 |
seg <- try(segments.p(x$cna), silent=TRUE) |
247 |
- if (class(seg) == "try-error") return(x) |
|
247 |
+ if (is(seg, "try-error")) return(x) |
|
248 | 248 |
for (iter in seq_len(iterations)) { |
249 | 249 |
seg.gr <- GRanges(seqnames=.add.chr.name(seg$chrom, chr.hash), |
250 | 250 |
IRanges(start=seg$loc.start, end=seg$loc.end)) |
... | ... |
@@ -179,5 +179,5 @@ normal.panel.vcf.file = NULL, min.normals = 2, smooth = TRUE, smooth.n = 5) { |
179 | 179 |
x[1, ] <- x[1, ] + shape1 |
180 | 180 |
x[2, ] <- x[2, ] + shape2 |
181 | 181 |
# get the alt allelic fraction for all SNPs |
182 |
- apply(x, 2, function(y) y[2] / sum(y[1:2])) |
|
182 |
+ apply(x, 2, function(y) y[2] / sum(head(y,2))) |
|
183 | 183 |
} |
... | ... |
@@ -42,7 +42,7 @@ in.file <- normalizePath(opt$infile, mustWork=TRUE) |
42 | 42 |
suppressPackageStartupMessages(library(rtracklayer)) |
43 | 43 |
|
44 | 44 |
intervals <- try(import(in.file), silent=TRUE) |
45 |
-if (class(intervals) == "try-error") intervals <- in.file |
|
45 |
+if (is(intervals, "try-error")) intervals <- in.file |
|
46 | 46 |
|
47 | 47 |
knownGenome <- list( |
48 | 48 |
hg18="TxDb.Hsapiens.UCSC.hg18.knownGene", |
... | ... |
@@ -89,7 +89,7 @@ reference.file <- normalizePath(opt$fasta, mustWork = TRUE) |
89 | 89 |
suppressPackageStartupMessages(library(rtracklayer)) |
90 | 90 |
|
91 | 91 |
intervals <- try(import(in.file), silent = TRUE) |
92 |
-if (class(intervals) == "try-error") { |
|
92 |
+if (is(intervals, "try-error")) { |
|
93 | 93 |
intervals <- in.file |
94 | 94 |
} else { |
95 | 95 |
if (sum(c("MT", "chrM", "chMT") %in% seqlevels(intervals))) { |