... | ... |
@@ -13,8 +13,10 @@ import(BSgenome) |
13 | 13 |
import(BiocFileCache) |
14 | 14 |
import(Rcpp) |
15 | 15 |
import(data.table) |
16 |
+import(ggplot2) |
|
16 | 17 |
import(grid) |
17 | 18 |
import(rappdirs) |
19 |
+import(testit) |
|
18 | 20 |
importFrom(BiocParallel,MulticoreParam) |
19 | 21 |
importFrom(BiocParallel,SnowParam) |
20 | 22 |
importFrom(BiocParallel,bpmapply) |
... | ... |
@@ -840,6 +840,8 @@ MatchSubsequence <- function(snp.tbl, motif.scores, motif.lib, snpids = NULL, mo |
840 | 840 |
#' ComputePValues(motif_library, snpInfo, motif_scores$motif.scores, ncores = 2, testing.mc=TRUE) |
841 | 841 |
#' @import Rcpp |
842 | 842 |
#' @import data.table |
843 |
+#' @import testit |
|
844 |
+#' @import ggplot2 |
|
843 | 845 |
#' @importFrom BiocParallel bpmapply MulticoreParam SnowParam |
844 | 846 |
#' @useDynLib atSNP |
845 | 847 |
#' @export |
... | ... |
@@ -853,9 +855,9 @@ ComputePValues <- function(motif.lib, snp.info, motif.scores, ncores = 1, testin |
853 | 855 |
|
854 | 856 |
if(Sys.info()[["sysname"]] == "Windows"){ |
855 | 857 |
snow <- SnowParam(workers = ncores, type = "SOCK") |
856 |
- results<-bpmapply(function(x) results_motif_par(i=x, par.prior=prior, par.transition=transition, par.motif.lib=motif.lib, par.motif.scores=motif.scores, par.testing.mc=testing.mc, par.figdir=figdir), seq_along(motif.lib), BPPARAM = snow,SIMPLIFY = FALSE) |
|
858 |
+ results<-bpmapply(function(x) results_motif_par(motif.id=x, par.prior=prior, par.transition=transition, par.motif.lib=motif.lib, par.motif.scores=motif.scores, par.testing.mc=testing.mc, par.figdir=figdir), seq_along(motif.lib), BPPARAM = snow,SIMPLIFY = FALSE) |
|
857 | 859 |
}else{ |
858 |
- results<-bpmapply(function(x) results_motif_par(i=x, par.prior=prior, par.transition=transition, par.motif.lib=motif.lib, par.motif.scores=motif.scores, par.testing.mc=testing.mc, par.figdir=figdir), seq_along(motif.lib), BPPARAM = MulticoreParam(workers = ncores), |
|
860 |
+ results<-bpmapply(function(x) results_motif_par(motif.id=x, par.prior=prior, par.transition=transition, par.motif.lib=motif.lib, par.motif.scores=motif.scores, par.testing.mc=testing.mc, par.figdir=figdir), seq_along(motif.lib), BPPARAM = MulticoreParam(workers = ncores), |
|
859 | 861 |
SIMPLIFY = FALSE) |
860 | 862 |
} |
861 | 863 |
|
... | ... |
@@ -117,11 +117,11 @@ match_subseq_par<-function(i, par.k, par.ncores, par.snp.tbl, par.snpids, par.mo |
117 | 117 |
IUPAC, ref_match_seq, snp_match_seq, ref_seq_snp_match, snp_seq_ref_match, snpbase)]) |
118 | 118 |
} |
119 | 119 |
|
120 |
-results_motif_par<-function(i, par.prior, par.transition, par.motif.lib, par.motif.scores, par.testing.mc, par.figdir) { |
|
121 |
- rowids <- which(par.motif.scores$motif == names(par.motif.lib)[i]) |
|
120 |
+results_motif_par<-function(motif.id, par.prior, par.transition, par.motif.lib, par.motif.scores, par.testing.mc, par.figdir) { |
|
121 |
+ rowids <- which(par.motif.scores$motif == names(par.motif.lib)[motif.id]) |
|
122 | 122 |
scores <- cbind(par.motif.scores$log_lik_ref[rowids], |
123 | 123 |
par.motif.scores$log_lik_snp[rowids]) |
124 |
- pwm <- par.motif.lib[[i]] |
|
124 |
+ pwm <- par.motif.lib[[motif.id]] |
|
125 | 125 |
pwm[pwm < 1e-10] <- 1e-10 |
126 | 126 |
wei.mat <- pwm |
127 | 127 |
for(i in seq(nrow(wei.mat))) { |
... | ... |
@@ -133,9 +133,10 @@ results_motif_par<-function(i, par.prior, par.transition, par.motif.lib, par.mot |
133 | 133 |
if(nrow(scores) > 5000) { |
134 | 134 |
p <- 5 / nrow(scores) |
135 | 135 |
} else { |
136 |
- p <- 1 / nrow(scores) |
|
136 |
+ p <- 1 / max(2, nrow(scores)) |
|
137 | 137 |
} |
138 | 138 |
|
139 |
+ # Use percentiles of the score distribution to construct groups |
|
139 | 140 |
m <- 20 |
140 | 141 |
b <- (1 / p) ^ ( 1 / sum(seq(m))) |
141 | 142 |
allp <- rep(1, m + 1) |
... | ... |
@@ -146,11 +147,18 @@ results_motif_par<-function(i, par.prior, par.transition, par.motif.lib, par.mot |
146 | 147 |
} |
147 | 148 |
allp <- allp[-(m + 1)] |
148 | 149 |
|
149 |
- score.p <- unique(quantile(c(scores), 1 - allp)) |
|
150 |
+ score.p <- quantile(c(scores), 1 - allp) |
|
151 |
+ dedup.idx <- c(1, 1 + which(diff(score.p) != 0)) |
|
152 |
+ score.p <- score.p[dedup.idx] |
|
153 |
+ allp <- allp[dedup.idx] |
|
154 |
+ |
|
155 |
+ # When there are few distinct score values, directly use those values |
|
156 |
+ # instead of percentiles |
|
150 | 157 |
if(length(score.p) > length(unique(c(scores)))) { |
151 | 158 |
score.p <- rev(unique(sort(c(scores)))) |
152 | 159 |
allp <- seq_along(score.p) / length(c(scores)) |
153 | 160 |
} |
161 |
+ assert(length(allp) == length(score.p)) |
|
154 | 162 |
|
155 | 163 |
pval_a <- pval_cond <- matrix(1, nrow = nrow(scores), ncol = 4) |
156 | 164 |
for(l in seq_along(allp)) { |
... | ... |
@@ -181,12 +189,7 @@ results_motif_par<-function(i, par.prior, par.transition, par.motif.lib, par.mot |
181 | 189 |
if(l <= length(allp)) { |
182 | 190 |
n_sample <- as.integer((1 - allp[l]) / allp[l] * 100) |
183 | 191 |
} |
184 |
- if(n_sample > 1e5) { |
|
185 |
- n_sample <- 1e5 |
|
186 |
- } |
|
187 |
- if(n_sample < 5000) { |
|
188 |
- n_sample <- 2000 |
|
189 |
- } |
|
192 |
+ n_sample <- max(2000, min(n_sample, 1e5)) |
|
190 | 193 |
pval_a.new <- .Call("test_p_value", pwm, par.prior, par.transition, scores[compute.id], theta, n_sample, package = "atSNP") |
191 | 194 |
} else { |
192 | 195 |
pval_a.new <- .Call("test_p_value", pwm, par.prior, par.transition, scores[compute.id], theta, 100, package = "atSNP") |
... | ... |
@@ -258,20 +261,10 @@ results_motif_par<-function(i, par.prior, par.transition, par.motif.lib, par.mot |
258 | 261 |
## set the importance sample size |
259 | 262 |
if(par.testing.mc==FALSE) { |
260 | 263 |
n_sample <- 2000 |
261 |
- p <- mean(score_diff >= score.p[l]) |
|
262 |
- if(p == 0) { |
|
263 |
- p <- 1e-4 |
|
264 |
- } |
|
265 |
- if(l <= length(allp)) { |
|
266 |
- n_sample <- as.integer((1 - p) / p * 100) |
|
267 |
- } |
|
268 |
- if(n_sample > 1e5) { |
|
269 |
- n_sample <- 1e5 |
|
270 |
- } |
|
271 |
- if(n_sample < 2000) { |
|
272 |
- n_sample <- 2000 |
|
273 |
- } |
|
274 |
- |
|
264 |
+ p <- max(1e-4, mean(score_diff >= score.p[l])) |
|
265 |
+ n_sample <- as.integer((1 - p) / p * 100) |
|
266 |
+ n_sample <- max(2000, min(n_sample, 1e5)) |
|
267 |
+ |
|
275 | 268 |
pval_diff.new <- .Call("test_p_value_change", pwm, |
276 | 269 |
wei.mat, pwm + 0.25, par.prior, |
277 | 270 |
par.transition, score_diff[compute.id], |
... | ... |
@@ -298,7 +291,7 @@ results_motif_par<-function(i, par.prior, par.transition, par.motif.lib, par.mot |
298 | 291 |
pval_diff[pval_diff[, 1] > 1, 1] <- 1 |
299 | 292 |
pval_rank[, 1] <- sort(pval_rank[,1])[rank(-rank_ratio)] |
300 | 293 |
pval_rank[pval_rank[, 1] > 1, 1] <- 1 |
301 |
- message("Finished testing motif No. ", i) |
|
294 |
+ message("Finished testing motif No. ", motif.id) |
|
302 | 295 |
if(!is.null(par.figdir)) { |
303 | 296 |
if(!file.exists(par.figdir)) { |
304 | 297 |
dir.create(par.figdir) |
... | ... |
@@ -318,7 +311,7 @@ results_motif_par<-function(i, par.prior, par.transition, par.motif.lib, par.mot |
318 | 311 |
plotdat.diff <- unique(plotdat.diff) |
319 | 312 |
localenv <- environment() |
320 | 313 |
options(warn = -1) |
321 |
- pdf(file.path(par.figdir, paste("motif", i, ".pdf", sep = "")), width = 10, height = 10) |
|
314 |
+ pdf(file.path(par.figdir, paste("motif", motif.id, ".pdf", sep = "")), width = 10, height = 10) |
|
322 | 315 |
id <- which(rank(plotdat$p.value[plotdat$Allele == "ref"]) <= 500) |
323 | 316 |
print(ggplot(aes(x = score, y = p.value), data = plotdat[plotdat$Allele == "ref", ], environment = localenv) + geom_point() + scale_y_log10(breaks = 10 ^ seq(-8, 0)) + geom_errorbar(aes(ymax = p.value + sqrt(var), ymin = p.value - sqrt(var))) + ggtitle(paste(names(par.motif.lib)[i], "ref"))) |
324 | 317 |
id <- which(rank(plotdat$p.value[plotdat$Allele == "snp"]) <= 500) |
... | ... |
@@ -326,8 +319,7 @@ results_motif_par<-function(i, par.prior, par.transition, par.motif.lib, par.mot |
326 | 319 |
print(ggplot(aes(x = score, y = p.value), data = plotdat.diff, environment = localenv) + geom_point() + scale_y_log10(breaks = 10 ^ seq(-8, 0)) + geom_errorbar(aes(ymax = p.value + sqrt(var), ymin = p.value - sqrt(var))) + ggtitle(paste(names(par.motif.lib)[i], " Change"))) |
327 | 320 |
dev.off() |
328 | 321 |
} |
329 |
- |
|
330 |
- |
|
322 |
+ |
|
331 | 323 |
return(list(rowids = rowids, |
332 | 324 |
pval_a = pval_a, |
333 | 325 |
pval_cond = pval_cond, |
... | ... |
@@ -61,7 +61,6 @@ test_that("Error: quantile function computing are not equivalent.", { |
61 | 61 |
for(p in c(0.01, 0.1, 0.5, 0.9, 0.99)) { |
62 | 62 |
delta <- .Call("test_find_percentile", c(scores), p, package = "atSNP") |
63 | 63 |
delta.r <- -sort(-c(scores))[as.integer(p * length(scores)) + 1] |
64 |
- delta==delta.r |
|
65 | 64 |
expect_equal(delta, delta.r) |
66 | 65 |
} |
67 | 66 |
}) |