Browse code

Code cleanups to make BiocCheck happy-ish.

lima1 authored on 09/12/2018 21:47:10
Showing 19 changed files

... ...
@@ -39,6 +39,7 @@ Imports:
39 39
     futile.logger,
40 40
     VGAM,
41 41
     tools,
42
+    methods,
42 43
     rhdf5,
43 44
     Matrix
44 45
 Suggests:
... ...
@@ -122,6 +122,7 @@ importFrom(graphics,strwidth)
122 122
 importFrom(graphics,symbols)
123 123
 importFrom(graphics,text)
124 124
 importFrom(gridExtra,grid.arrange)
125
+importFrom(methods,is)
125 126
 importFrom(rhdf5,H5Fopen)
126 127
 importFrom(rtracklayer,import)
127 128
 importFrom(stats,C)
... ...
@@ -329,7 +329,7 @@ c(test.num.copy, round(opt.C))[i], prior.K, mapping.bias.ok, seg.id, min.variant
329 329
 
330 330
     tmp <- sapply(prior.purity, .checkFraction, "prior.purity")
331 331
 
332
-    if (!is.null(sampleid) && (class(sampleid) != "character" ||
332
+    if (!is.null(sampleid) && (!is(sampleid, "character") ||
333 333
         length(sampleid) != 1)) {
334 334
         .stopUserError("sampleid not a character string.")
335 335
     }
... ...
@@ -838,7 +838,7 @@ c(test.num.copy, round(opt.C))[i], prior.K, mapping.bias.ok, seg.id, min.variant
838 838
     required.colnames2 <- c("ID", "chromosome", "start", "end", "num_probes", 
839 839
         "mean")
840 840
     if (ncol(seg) > length(required.colnames)) {
841
-        seg <- seg[,1:length(required.colnames)]
841
+        seg <- seg[,seq_along(required.colnames)]
842 842
     }    
843 843
     if (identical(colnames(seg), required.colnames2)) {
844 844
         colnames(seg) <- required.colnames
... ...
@@ -899,7 +899,7 @@ c(test.num.copy, round(opt.C))[i], prior.K, mapping.bias.ok, seg.id, min.variant
899 899
             flog.info("Provided log2-ratio looks too noisy, using segmentation only.")
900 900
         }
901 901
     }
902
-    if (class(seg.file)=="character") {
902
+    if (is(seg.file, "character")) {
903 903
         seg <- .loadSegFile(seg.file, sampleid, model.homozygous, verbose=FALSE)    
904 904
     } else {
905 905
         seg <-.checkSeg(seg.file, sampleid, model.homozygous, verbose=FALSE)
... ...
@@ -32,11 +32,13 @@ annotateTargets <- function(x, txdb, org) {
32 32
     txdb <- .checkSeqlevelStyle(x, txdb, "txdb", "interval file")
33 33
     id <- transcriptsByOverlaps(txdb, ranges = x[idx], columns = "GENEID")
34 34
     id$SYMBOL <- suppressWarnings(
35
-        select(org, sapply(id$GENEID, function(x)x[1]), "SYMBOL")[, 2])
35
+        select(org, vapply(id$GENEID, function(x) x[1], character(1)), 
36
+               "SYMBOL")[, 2])
36 37
 
37 38
     idExons <- exonsByOverlaps(txdb, ranges = x[idx], columns = "GENEID")
38 39
     idExons$SYMBOL <- suppressWarnings(
39
-        select(org, sapply(idExons$GENEID, function(x)x[1]), "SYMBOL")[, 2])
40
+        select(org, vapply(idExons$GENEID, function(x) x[1], character(1)),
41
+               "SYMBOL")[, 2])
40 42
 
41 43
     ov <- findOverlaps(x[idx], id)
42 44
     ovExons <- findOverlaps(x[idx], idExons)
... ...
@@ -58,16 +58,16 @@ calculateBamCoverageByInterval <- function(bam.file, interval.file,
58 58
     x <- xDupFiltered
59 59
     if (keep.duplicates) x <- xAll
60 60
 
61
-    intervalGr$coverage <- sapply(seq_along(x), function(i)
61
+    intervalGr$coverage <- vapply(seq_along(x), function(i)
62 62
         sum(coverage(IRanges(x[[i]][["pos"]], width = x[[i]][["qwidth"]]),
63
-            shift = -start(intervalGr)[i], width = width(intervalGr)[i])))
63
+            shift = -start(intervalGr)[i], width = width(intervalGr)[i])), integer(1))
64 64
 
65 65
     intervalGr$average.coverage <- intervalGr$coverage / width(intervalGr)
66 66
 
67
-    intervalGr$counts <- as.numeric(sapply(x, function(y) length(y$pos)))
67
+    intervalGr$counts <- as.numeric(vapply(x, function(y) length(y$pos), integer(1)))
68 68
     intervalGr$duplication.rate <- 1 -
69
-        sapply(xDupFiltered, function(y) length(y$pos)) /
70
-        sapply(xAll, function(y) length(y$pos))
69
+        vapply(xDupFiltered, function(y) length(y$pos), integer(1)) /
70
+        vapply(xAll, function(y) length(y$pos), integer(1))
71 71
 
72 72
     if (!is.null(output.file)) {
73 73
         .writeCoverage(intervalGr, output.file)
... ...
@@ -29,7 +29,7 @@
29 29
 callAlterations <- function(res, id = 1, cutoffs = c(0.5, 6, 7),
30 30
 log.ratio.cutoffs = c(-0.9, 0.9), failed = NULL, all.genes = FALSE) {
31 31
 
32
-    if (class(res$results[[id]]$gene.calls) != "data.frame") {
32
+    if (!is(res$results[[id]]$gene.calls, "data.frame")) {
33 33
         .stopUserError("This function requires gene-level calls.\n",
34 34
             "Please add a column 'Gene' containing gene symbols to the ",
35 35
             "interval.file.")
... ...
@@ -76,7 +76,7 @@ callLOH <- function(res, id = 1, arm.cutoff = 0.9) {
76 76
 
77 77
 .getCentromeres <- function(res) {
78 78
     # TODO remove this support for old data.frame centromeres in PureCN 1.12
79
-    if (class(res$input$centromeres) == "GRanges" || 
79
+    if (is(res$input$centromeres, "GRanges") || 
80 80
             is.null(res$input$centromeres)) {
81 81
         return(res$input$centromeres)
82 82
     }    
... ...
@@ -72,11 +72,11 @@ callMutationBurden <- function(res, id = 1, remove.flagged = TRUE,
72 72
     
73 73
     # calculate the callable genomic region for # mutations/MB calculation
74 74
     if (!is.null(callable)) {
75
-        if (class(callable) != "GRanges") {
75
+        if (!is(callable, "GRanges")) {
76 76
             .stopUserError("callable not a GRanges object.")
77 77
         } 
78 78
         if (!is.null(exclude)) {
79
-            if (class(exclude) != "GRanges") {
79
+            if (!is(exclude, "GRanges")) {
80 80
                 .stopUserError("exclude not a GRanges object.")
81 81
             } 
82 82
             callable <- setdiff(callable, exclude)
... ...
@@ -102,7 +102,7 @@ callMutationBurden <- function(res, id = 1, remove.flagged = TRUE,
102 102
     # filter mutations, for example if the user wants to 
103 103
     # calculate missense burden
104 104
     if (!is.null(fun.countMutation)) {
105
-        if (class(fun.countMutation) != "function") {
105
+        if (!is(fun.countMutation, "function")) {
106 106
             .stopUserError("fun.countMutation not a function.")        
107 107
         }
108 108
         vcf <-  res$input$vcf
... ...
@@ -98,7 +98,7 @@ filterTargets <- function(...) {
98 98
 }  
99 99
 
100 100
 .checkNormalDB <- function(tumor, normalDB) {
101
-    if (!class(normalDB) == "list") {
101
+    if (!is(normalDB, "list")) {
102 102
         .stopUserError("normalDB not a valid normalDB object. ",
103 103
             "Use createNormalDatabase to create one.")
104 104
     }    
... ...
@@ -313,9 +313,9 @@ function(vcf, tumor.id.in.vcf, allowed=0.05) {
313 313
 .readAndCheckVcf <- function(vcf.file, genome, DB.info.flag = "DB", 
314 314
                              POPAF.info.field = "POP_AF", 
315 315
                              min.pop.af = 0.001, check.DB = TRUE) {
316
-    if (class(vcf.file) == "character") {
316
+    if (is(vcf.file, "character")) {
317 317
         vcf <- readVcf(vcf.file, genome)
318
-    } else if (class(vcf.file) != "CollapsedVCF") {
318
+    } else if (!is(vcf.file, "CollapsedVCF")) {
319 319
         .stopUserError("vcf.file neither a filename nor a CollapsedVCF ", 
320 320
             "object.") 
321 321
     } else {
... ...
@@ -67,7 +67,7 @@ verbose=TRUE) {
67 67
         if (m == 0) return(1)
68 68
         dbinom(m, size=coverage, prob=error/3)    
69 69
      }
70
-     k <- min(which(sapply(0:coverage,.pk) < fpr)) - 1
70
+     k <- min(which(vapply(seq(0, coverage),.pk, double(1)) < fpr)) - 1
71 71
      if (verbose) message("Minimum ", k, " supporting reads.")
72 72
 
73 73
      # find allelic fraction to test
... ...
@@ -44,7 +44,7 @@ predictSomatic <- function(res, id = 1, return.vcf = FALSE,
44 44
 }
45 45
 
46 46
 .addSymbols <- function(result) {
47
-    if (class(result$gene.calls) == "data.frame") {
47
+    if (is(result$gene.calls, "data.frame")) {
48 48
         g.gr <- GRanges(result$gene.calls)
49 49
         p.gr <- GRanges(result$SNV.posterior$posteriors)
50 50
         ov <- findOverlaps(p.gr, g.gr)
... ...
@@ -66,6 +66,7 @@
66 66
 #' @importFrom GenomicRanges tileGenome
67 67
 #' @importFrom S4Vectors mcols
68 68
 #' @importFrom rtracklayer import
69
+#' @importFrom methods is
69 70
 #' @importFrom stats aggregate
70 71
 preprocessIntervals <- function(interval.file, reference.file,
71 72
                                 output.file = NULL, off.target = FALSE,
... ...
@@ -81,7 +82,7 @@ preprocessIntervals <- function(interval.file, reference.file,
81 82
                                 off.target.seqlevels=c("targeted", "all"),
82 83
                                 small.targets=c("resize", "drop")) {
83 84
 
84
-    if (class(interval.file)=="GRanges") {
85
+    if (is(interval.file, "GRanges")) {
85 86
         interval.gr <- .checkIntervals(interval.file)
86 87
     } else {
87 88
         interval.gr <- readCoverageFile(interval.file)
... ...
@@ -239,7 +240,7 @@ calculateGCContentByInterval <- function() {
239 240
 
240 241
 .checkColScore <- function(y, label) {
241 242
     colScore <- if (is.null(y$score)) 1 else "score"
242
-    if (class(mcols(y)[, colScore]) != "numeric") {
243
+    if (!is(mcols(y)[, colScore], "numeric")) {
243 244
         flog.warn("Score column in %s file is not numeric.", label)
244 245
         class(mcols(y)[, colScore]) <- "numeric"
245 246
     }
... ...
@@ -332,10 +333,10 @@ calculateGCContentByInterval <- function() {
332 333
 .checkSeqlevelStyle <- function(ref, x, name1, name2="reference") {
333 334
     refSeqlevelStyle <- try(seqlevelsStyle(ref), silent=TRUE)
334 335
     # if unknown, we cannot check and correct
335
-    if (class(refSeqlevelStyle) == "try-error") return(x)
336
+    if (is(refSeqlevelStyle, "try-error")) return(x)
336 337
     xSeqlevelStyle <- try(seqlevelsStyle(x), silent=TRUE)
337 338
 
338
-    if (class(xSeqlevelStyle) == "try-error") {
339
+    if (is(xSeqlevelStyle, "try-error")) {
339 340
         .stopUserError("Chromosome naming style of ", name1, 
340 341
             " file unknown, should be ", refSeqlevelStyle, ".") 
341 342
     }    
... ...
@@ -105,10 +105,12 @@ processMultipleSamples <- function(tumor.coverage.files, sampleids, normalDB,
105 105
     lrsw <- copynumber::winsorize(lrs, arms = arms, verbose = FALSE)
106 106
     if (is.null(w)) {
107 107
         w <- 1
108
-        dupr <- sapply(tumors, function(x) median(x[x$on.target]$duplication.rate, na.rm = TRUE))
108
+        dupr <- vapply(tumors, function(x) 
109
+                       median(x[x$on.target]$duplication.rate, na.rm = TRUE),
110
+                       double(1))
109 111
         if (!sum(is.na(dupr)) && min(dupr, na.rm = TRUE) > 0) { 
110
-            w <- (1/dupr)
111
-            w <- w/max(w)
112
+            w <- (1 / dupr)
113
+            w <- w / max(w)
112 114
 
113 115
             flog.info("Setting weights by duplication rate. Lowest weight for %s (%.2f), heighest for %s.",
114 116
                 sampleids[which.min(w)], min(w), sampleids[which.max(w)])
... ...
@@ -626,13 +626,14 @@ runAbsoluteCN <- function(normal.coverage.file = NULL,
626 626
     
627 627
     # estimate stand. dev. for target logR within targets. this will be used as proxy
628 628
     # for sample error.
629
-    targetsPerSegment <- sapply(exon.lrs, length)
629
+    targetsPerSegment <- vapply(exon.lrs, length, double(1))
630 630
     
631 631
     if (!sum(targetsPerSegment > 50, na.rm = TRUE)) {
632 632
         .stopRuntimeError("Only tiny segments.")
633 633
     }
634 634
     
635
-    sd.seg <- max(median(sapply(exon.lrs, sd), na.rm = TRUE), min.logr.sdev)
635
+    sd.seg <- max(median(vapply(exon.lrs, sd, double(1)), na.rm = TRUE),
636
+                  min.logr.sdev)
636 637
    
637 638
     # if user provided seg file, then we do not have access to the log-ratios and
638 639
     # need to use the user provided noise estimate also, don't do outlier smoothing
... ...
@@ -760,7 +761,8 @@ runAbsoluteCN <- function(normal.coverage.file = NULL,
760 761
                     function(i) .calcLlikSegment(subclonal = subclonal[i], lr = exon.lrs[[i]] + 
761 762
                       log.ratio.offset[i], sd.seg = sd.seg, p = px, Ci = C[i], total.ploidy = total.ploidy, 
762 763
                       max.exon.ratio = max.exon.ratio), double(1)))
763
-                  px.rij.s <- sapply(px.rij, sum, na.rm = TRUE) + log(prior.purity.local)
764
+                  px.rij.s <- vapply(px.rij, sum, na.rm = TRUE, double(1)) + 
765
+                      log(prior.purity.local)
764 766
                   
765 767
                   if (simulated.annealing) 
766 768
                     px.rij.s <- px.rij.s * exp(iter/4)
... ...
@@ -959,7 +961,8 @@ runAbsoluteCN <- function(normal.coverage.file = NULL,
959 961
                         Ci = sol$ML.C[i], total.ploidy = px * (sum(sol$seg$size * sol$ML.C))/sum(sol$seg$size) + (1 - 
960 962
                           px) * 2, max.exon.ratio = max.exon.ratio), double(1)))
961 963
                       
962
-                      px.rij.s <- sapply(px.rij, sum, na.rm = TRUE) + log(pp) + vapply(res.snvllik, 
964
+                      px.rij.s <- vapply(px.rij, sum, na.rm = TRUE, double(1)) + 
965
+                          log(pp) + vapply(res.snvllik, 
963 966
                         function(x) x$llik, double(1))
964 967
                       idx <- which.max(px.rij.s)
965 968
                   }
... ...
@@ -998,14 +1001,14 @@ runAbsoluteCN <- function(normal.coverage.file = NULL,
998 1001
     nBefore <- length(results)
999 1002
     results <- .filterDuplicatedResults(results, purity.cutoff = 0.05)
1000 1003
     # bring back in original order for progress output
1001
-    results <- results[order(sapply(results, function(sol) sol$candidate.id))]
1004
+    results <- results[order(vapply(results, function(sol) sol$candidate.id, integer(1)))]
1002 1005
 
1003 1006
     if (length(results) < nBefore) { 
1004 1007
         flog.info("Skipping %i solutions that converged to the same optima.",
1005 1008
                   nBefore - length(results))
1006 1009
     }
1007
-    idxFailed <- sapply(results, function(sol) 
1008
-                        sol$fraction.subclonal > max.non.clonal)
1010
+    idxFailed <- vapply(results, function(sol) 
1011
+                        sol$fraction.subclonal > max.non.clonal, logical(1))
1009 1012
     if (sum(is.na(idxFailed))) .stopRuntimeError("NAs in fraction.subclonal.")
1010 1013
     if (sum(idxFailed)) {
1011 1014
         flog.info("Skipping %i solutions exceeding max.non.clonal (%.2f).",
... ...
@@ -1095,8 +1098,8 @@ runAbsoluteCN <- function(normal.coverage.file = NULL,
1095 1098
             test.num.copy = test.num.copy,
1096 1099
             sex = sex, sex.vcf = sex.vcf, chr.hash = chr.hash, centromeres = centromeres,
1097 1100
             args=list(
1098
-                filterVcf = args.filterVcf[sapply(args.filterVcf, object.size) < 1000],
1099
-                filterIntervals = args.filterIntervals[sapply(args.filterIntervals, object.size) < 1000])
1101
+                filterVcf = args.filterVcf[vapply(args.filterVcf, object.size, double(1)) < 1000],
1102
+                filterIntervals = args.filterIntervals[vapply(args.filterIntervals, object.size, double(1)) < 1000])
1100 1103
         )
1101 1104
     )
1102 1105
 }
... ...
@@ -203,7 +203,7 @@ segmentationCBS <- function(normal, tumor, log.ratio, seg, plot.cnv,
203 203
             sum(queryHits(ov) == i))
204 204
         dx <- cbind(seg$seg.mean,xx)
205 205
         hc <- hclust(dist(dx), method=method)
206
-        seg.hc <- data.frame(id=1:nrow(dx), dx, num=numVariants, 
206
+        seg.hc <- data.frame(id=seq(nrow(dx)), dx, num=numVariants, 
207 207
             cluster=cutree(hc,h=h))[hc$order,]
208 208
 
209 209
         # cluster only segments with at least n variants    
... ...
@@ -244,7 +244,7 @@ segmentationCBS <- function(normal, tumor, log.ratio, seg, plot.cnv,
244 244
 .pruneByVCF <- function(x, vcf, tumor.id.in.vcf, min.size=5, max.pval=0.00001,
245 245
     iterations=3, chr.hash, debug=FALSE) {
246 246
     seg <- try(segments.p(x$cna), silent=TRUE)
247
-    if (class(seg) == "try-error") return(x)
247
+    if (is(seg, "try-error")) return(x)
248 248
     for (iter in seq_len(iterations)) {
249 249
         seg.gr <- GRanges(seqnames=.add.chr.name(seg$chrom, chr.hash), 
250 250
             IRanges(start=seg$loc.start, end=seg$loc.end))
... ...
@@ -179,5 +179,5 @@ normal.panel.vcf.file = NULL, min.normals = 2, smooth = TRUE, smooth.n = 5) {
179 179
     x[1, ] <- x[1, ] + shape1
180 180
     x[2, ] <- x[2, ] + shape2
181 181
     # get the alt allelic fraction for all SNPs
182
-    apply(x, 2, function(y) y[2] / sum(y[1:2]))
182
+    apply(x, 2, function(y) y[2] / sum(head(y,2)))
183 183
 }
... ...
@@ -42,7 +42,7 @@ in.file <- normalizePath(opt$infile, mustWork=TRUE)
42 42
 suppressPackageStartupMessages(library(rtracklayer))
43 43
 
44 44
 intervals <- try(import(in.file), silent=TRUE)
45
-if (class(intervals) == "try-error") intervals <- in.file
45
+if (is(intervals, "try-error")) intervals <- in.file
46 46
 
47 47
 knownGenome <- list(
48 48
     hg18="TxDb.Hsapiens.UCSC.hg18.knownGene",
... ...
@@ -89,7 +89,7 @@ reference.file <- normalizePath(opt$fasta, mustWork = TRUE)
89 89
 suppressPackageStartupMessages(library(rtracklayer))
90 90
 
91 91
 intervals <- try(import(in.file), silent = TRUE)
92
-if (class(intervals) == "try-error") { 
92
+if (is(intervals, "try-error")) { 
93 93
     intervals <- in.file
94 94
 } else {
95 95
     if (sum(c("MT", "chrM", "chMT") %in% seqlevels(intervals))) {