Browse code

fix bug

Chandler Zuo authored on 25/09/2019 21:42:40
Showing 4 changed files

... ...
@@ -13,8 +13,10 @@ import(BSgenome)
13 13
 import(BiocFileCache)
14 14
 import(Rcpp)
15 15
 import(data.table)
16
+import(ggplot2)
16 17
 import(grid)
17 18
 import(rappdirs)
19
+import(testit)
18 20
 importFrom(BiocParallel,MulticoreParam)
19 21
 importFrom(BiocParallel,SnowParam)
20 22
 importFrom(BiocParallel,bpmapply)
... ...
@@ -840,6 +840,8 @@ MatchSubsequence <- function(snp.tbl, motif.scores, motif.lib, snpids = NULL, mo
840 840
 #' ComputePValues(motif_library, snpInfo, motif_scores$motif.scores, ncores = 2, testing.mc=TRUE)
841 841
 #' @import Rcpp
842 842
 #' @import data.table
843
+#' @import testit
844
+#' @import ggplot2
843 845
 #' @importFrom BiocParallel bpmapply MulticoreParam SnowParam
844 846
 #' @useDynLib atSNP
845 847
 #' @export
... ...
@@ -853,9 +855,9 @@ ComputePValues <- function(motif.lib, snp.info, motif.scores, ncores = 1, testin
853 855
   
854 856
   if(Sys.info()[["sysname"]] == "Windows"){
855 857
     snow <- SnowParam(workers = ncores, type = "SOCK")
856
-    results<-bpmapply(function(x) results_motif_par(i=x, par.prior=prior, par.transition=transition, par.motif.lib=motif.lib, par.motif.scores=motif.scores, par.testing.mc=testing.mc, par.figdir=figdir), seq_along(motif.lib), BPPARAM = snow,SIMPLIFY = FALSE)
858
+    results<-bpmapply(function(x) results_motif_par(motif.id=x, par.prior=prior, par.transition=transition, par.motif.lib=motif.lib, par.motif.scores=motif.scores, par.testing.mc=testing.mc, par.figdir=figdir), seq_along(motif.lib), BPPARAM = snow,SIMPLIFY = FALSE)
857 859
   }else{
858
-    results<-bpmapply(function(x) results_motif_par(i=x, par.prior=prior, par.transition=transition, par.motif.lib=motif.lib, par.motif.scores=motif.scores, par.testing.mc=testing.mc, par.figdir=figdir), seq_along(motif.lib), BPPARAM = MulticoreParam(workers = ncores),
860
+    results<-bpmapply(function(x) results_motif_par(motif.id=x, par.prior=prior, par.transition=transition, par.motif.lib=motif.lib, par.motif.scores=motif.scores, par.testing.mc=testing.mc, par.figdir=figdir), seq_along(motif.lib), BPPARAM = MulticoreParam(workers = ncores),
859 861
                       SIMPLIFY = FALSE)
860 862
   }
861 863
 
... ...
@@ -117,11 +117,11 @@ match_subseq_par<-function(i, par.k, par.ncores, par.snp.tbl, par.snpids, par.mo
117 117
                                IUPAC, ref_match_seq, snp_match_seq, ref_seq_snp_match, snp_seq_ref_match, snpbase)])
118 118
 }
119 119
 
120
-results_motif_par<-function(i, par.prior, par.transition, par.motif.lib, par.motif.scores, par.testing.mc, par.figdir) {
121
-  rowids <- which(par.motif.scores$motif == names(par.motif.lib)[i])
120
+results_motif_par<-function(motif.id, par.prior, par.transition, par.motif.lib, par.motif.scores, par.testing.mc, par.figdir) {
121
+  rowids <- which(par.motif.scores$motif == names(par.motif.lib)[motif.id])
122 122
   scores <- cbind(par.motif.scores$log_lik_ref[rowids],
123 123
                   par.motif.scores$log_lik_snp[rowids])
124
-  pwm <- par.motif.lib[[i]]
124
+  pwm <- par.motif.lib[[motif.id]]
125 125
   pwm[pwm < 1e-10] <- 1e-10
126 126
   wei.mat <- pwm
127 127
   for(i in seq(nrow(wei.mat))) {
... ...
@@ -133,9 +133,10 @@ results_motif_par<-function(i, par.prior, par.transition, par.motif.lib, par.mot
133 133
   if(nrow(scores) > 5000) {
134 134
     p <- 5 / nrow(scores)
135 135
   } else {
136
-    p <- 1 / nrow(scores)
136
+    p <- 1 / max(2, nrow(scores))
137 137
   }
138 138
   
139
+  # Use percentiles of the score distribution to construct groups
139 140
   m <- 20
140 141
   b <- (1 / p) ^ ( 1 / sum(seq(m)))
141 142
   allp <- rep(1, m + 1)
... ...
@@ -146,11 +147,18 @@ results_motif_par<-function(i, par.prior, par.transition, par.motif.lib, par.mot
146 147
   }
147 148
   allp <- allp[-(m + 1)]
148 149
   
149
-  score.p <- unique(quantile(c(scores), 1 - allp))
150
+  score.p <- quantile(c(scores), 1 - allp)
151
+  dedup.idx <- c(1, 1 + which(diff(score.p) != 0))
152
+  score.p <- score.p[dedup.idx]
153
+  allp <- allp[dedup.idx]
154
+  
155
+  # When there are few distinct score values, directly use those values
156
+  # instead of percentiles
150 157
   if(length(score.p) > length(unique(c(scores)))) {
151 158
     score.p <- rev(unique(sort(c(scores))))
152 159
     allp <- seq_along(score.p) / length(c(scores))
153 160
   }
161
+  assert(length(allp) == length(score.p))
154 162
   
155 163
   pval_a <- pval_cond <- matrix(1, nrow = nrow(scores), ncol = 4)
156 164
   for(l in seq_along(allp)) {
... ...
@@ -181,12 +189,7 @@ results_motif_par<-function(i, par.prior, par.transition, par.motif.lib, par.mot
181 189
       if(l <= length(allp)) {
182 190
         n_sample <- as.integer((1 - allp[l]) / allp[l] * 100)
183 191
       }
184
-      if(n_sample > 1e5) {
185
-        n_sample <- 1e5
186
-      }
187
-      if(n_sample < 5000) {
188
-        n_sample <- 2000
189
-      }
192
+      n_sample <- max(2000, min(n_sample, 1e5))
190 193
       pval_a.new <- .Call("test_p_value", pwm, par.prior, par.transition, scores[compute.id], theta, n_sample, package = "atSNP")
191 194
     } else {
192 195
       pval_a.new <- .Call("test_p_value", pwm, par.prior, par.transition, scores[compute.id], theta, 100, package = "atSNP")
... ...
@@ -258,20 +261,10 @@ results_motif_par<-function(i, par.prior, par.transition, par.motif.lib, par.mot
258 261
     ## set the importance sample size
259 262
     if(par.testing.mc==FALSE) {
260 263
       n_sample <- 2000
261
-      p <- mean(score_diff >= score.p[l])
262
-      if(p == 0) {
263
-        p <- 1e-4
264
-      }
265
-      if(l <= length(allp)) {
266
-        n_sample <- as.integer((1 - p) / p * 100)
267
-      }
268
-      if(n_sample > 1e5) {
269
-        n_sample <- 1e5
270
-      }
271
-      if(n_sample < 2000) {
272
-        n_sample <- 2000
273
-      }
274
-      
264
+      p <- max(1e-4, mean(score_diff >= score.p[l]))
265
+      n_sample <- as.integer((1 - p) / p * 100)
266
+      n_sample <- max(2000, min(n_sample, 1e5))
267
+
275 268
       pval_diff.new <- .Call("test_p_value_change", pwm,
276 269
                              wei.mat, pwm + 0.25,  par.prior,
277 270
                              par.transition, score_diff[compute.id],
... ...
@@ -298,7 +291,7 @@ results_motif_par<-function(i, par.prior, par.transition, par.motif.lib, par.mot
298 291
   pval_diff[pval_diff[, 1] > 1, 1] <- 1
299 292
   pval_rank[, 1] <- sort(pval_rank[,1])[rank(-rank_ratio)]
300 293
   pval_rank[pval_rank[, 1] > 1, 1] <- 1
301
-  message("Finished testing motif No. ", i)
294
+  message("Finished testing motif No. ", motif.id)
302 295
   if(!is.null(par.figdir)) {
303 296
     if(!file.exists(par.figdir)) {
304 297
       dir.create(par.figdir)
... ...
@@ -318,7 +311,7 @@ results_motif_par<-function(i, par.prior, par.transition, par.motif.lib, par.mot
318 311
     plotdat.diff <- unique(plotdat.diff)
319 312
     localenv <- environment()
320 313
     options(warn = -1)
321
-    pdf(file.path(par.figdir, paste("motif", i, ".pdf", sep = "")), width = 10, height = 10)
314
+    pdf(file.path(par.figdir, paste("motif", motif.id, ".pdf", sep = "")), width = 10, height = 10)
322 315
     id <- which(rank(plotdat$p.value[plotdat$Allele == "ref"]) <= 500)
323 316
     print(ggplot(aes(x = score, y = p.value), data = plotdat[plotdat$Allele == "ref", ], environment = localenv) + geom_point() + scale_y_log10(breaks = 10 ^ seq(-8, 0)) + geom_errorbar(aes(ymax = p.value + sqrt(var), ymin = p.value - sqrt(var))) + ggtitle(paste(names(par.motif.lib)[i], "ref")))
324 317
     id <- which(rank(plotdat$p.value[plotdat$Allele == "snp"]) <= 500)
... ...
@@ -326,8 +319,7 @@ results_motif_par<-function(i, par.prior, par.transition, par.motif.lib, par.mot
326 319
     print(ggplot(aes(x = score, y = p.value), data = plotdat.diff, environment = localenv) + geom_point() + scale_y_log10(breaks = 10 ^ seq(-8, 0)) + geom_errorbar(aes(ymax = p.value + sqrt(var), ymin = p.value - sqrt(var))) + ggtitle(paste(names(par.motif.lib)[i], " Change")))
327 320
     dev.off()
328 321
   }
329
-                      
330
-  
322
+
331 323
   return(list(rowids = rowids,
332 324
               pval_a = pval_a,
333 325
               pval_cond = pval_cond,
... ...
@@ -61,7 +61,6 @@ test_that("Error: quantile function computing are not equivalent.", {
61 61
   for(p in c(0.01, 0.1, 0.5, 0.9, 0.99)) {
62 62
     delta <- .Call("test_find_percentile", c(scores), p, package = "atSNP")
63 63
     delta.r <- -sort(-c(scores))[as.integer(p * length(scores)) + 1]
64
-    delta==delta.r
65 64
     expect_equal(delta, delta.r)
66 65
   }
67 66
 })