7 | 11 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,253 @@ |
1 |
+library(MotifAnalysis) |
|
2 |
+library(testthat) |
|
3 |
+data(example) |
|
4 |
+ |
|
5 |
+trans_mat <- matrix(rep(snpInfo$prior, each = 4), nrow = 4) |
|
6 |
+test_pwm <- motif_library$matrix[[1]] |
|
7 |
+scores <- as.matrix(motif_scores[motif == names(motif_library$matrix)[1], list(log_lik_ref, log_lik_snp)]) |
|
8 |
+ |
|
9 |
+test_score <- test_pwm |
|
10 |
+for(i in seq(nrow(test_score))) { |
|
11 |
+ for(j in seq(ncol(test_score))) { |
|
12 |
+ test_score[i, j] <- exp(mean(log((test_pwm[i, -j]) / test_pwm[i, j]))) |
|
13 |
+ } |
|
14 |
+} |
|
15 |
+ |
|
16 |
+## these are functions for this test only |
|
17 |
+drawonesample <- function(theta) { |
|
18 |
+ delta <- snpInfo$prior * t(test_score) ^ theta |
|
19 |
+ prob_start <- rev(apply(delta, 2, sum)) |
|
20 |
+ sample <- sample(1:4, 19, replace = TRUE, prob = snpInfo$prior) |
|
21 |
+ id <- sample(seq(10), 1, prob = prob_start) |
|
22 |
+ sample[10] <- sample(1:4, 1, prob = delta[, 11 - id]) |
|
23 |
+ sample <- c(sample, |
|
24 |
+ id, |
|
25 |
+ test_score[11 - id, sample[10]] |
|
26 |
+ ) |
|
27 |
+ return(sample) |
|
28 |
+} |
|
29 |
+jointprob <- function(x) prod(test_pwm[cbind(seq(10), x)]) |
|
30 |
+maxjointprob <- function(x) { |
|
31 |
+ maxp <- -Inf |
|
32 |
+ p <- -Inf |
|
33 |
+ for(i in 1:10) { |
|
34 |
+ p <- jointprob(x[i:(i+9)]) |
|
35 |
+ if(p > maxp) |
|
36 |
+ maxp <- p |
|
37 |
+ } |
|
38 |
+ for(i in 1:10) { |
|
39 |
+ p <- jointprob(5 - x[(i+9):i]) |
|
40 |
+ if(p > maxp) |
|
41 |
+ maxp <- p |
|
42 |
+ } |
|
43 |
+ return(maxp) |
|
44 |
+} |
|
45 |
+get_freq <- function(sample) { |
|
46 |
+ emp_freq <- matrix(0, nrow = 19, ncol = 4) |
|
47 |
+ for(i in seq(19)) { |
|
48 |
+ for(j in seq(4)) { |
|
49 |
+ emp_freq[i, j] <- sum(sample[i, ] == j - 1) |
|
50 |
+ } |
|
51 |
+ } |
|
52 |
+ emp_freq <- emp_freq / apply(emp_freq, 1, sum) |
|
53 |
+ return(emp_freq) |
|
54 |
+} |
|
55 |
+ |
|
56 |
+test_that("Error: quantile function computing are not equivalent.", { |
|
57 |
+ for(p in c(1, 10, 50, 90, 99) / 100) { |
|
58 |
+ delta <- .Call("test_find_percentile_diff", scores, p, package = "MotifAnalysis") |
|
59 |
+ delta.r <- as.double(sort(apply(scores, 1, function(x) -abs(diff(x))))[as.integer(p * nrow(scores)) + 1]) |
|
60 |
+ expect_equal(delta, delta.r) |
|
61 |
+ } |
|
62 |
+}) |
|
63 |
+ |
|
64 |
+test_that("Error: the scores for samples are not equivalent.", { |
|
65 |
+ |
|
66 |
+ p <- 0.1 |
|
67 |
+ delta <- .Call("test_find_percentile_diff", scores, p, package = "MotifAnalysis") |
|
68 |
+ theta <- .Call("test_find_theta_diff", test_score, snpInfo$prior, snpInfo$transition, delta, package = "MotifAnalysis") |
|
69 |
+ |
|
70 |
+ ## Use R code to generate a random sample |
|
71 |
+ for(i in seq(10)) { |
|
72 |
+ sample <- drawonesample(theta) |
|
73 |
+ sample_score <- .Call("test_compute_sample_score_diff", test_pwm, test_score, sample[seq(19)] - 1, sample[20] - 1, package = "MotifAnalysis") |
|
74 |
+ expect_equal(sample[21], exp(sample_score[1])) |
|
75 |
+ |
|
76 |
+ sample1 <- sample2 <- sample3 <- sample |
|
77 |
+ sample1[10] <- seq(4)[-sample[10]][1] |
|
78 |
+ sample2[10] <- seq(4)[-sample[10]][2] |
|
79 |
+ sample3[10] <- seq(4)[-sample[10]][3] |
|
80 |
+ sample_score_r <- log(maxjointprob(sample[seq(19)])) - |
|
81 |
+ log(c(maxjointprob(sample1[seq(19)]), |
|
82 |
+ maxjointprob(sample2[seq(19)]), |
|
83 |
+ maxjointprob(sample3[seq(19)]))) |
|
84 |
+ expect_equal(-sample_score_r, sample_score[-1]) |
|
85 |
+ } |
|
86 |
+ |
|
87 |
+ ## Use C code to generate a random sample |
|
88 |
+ for(i in seq(10)) { |
|
89 |
+ delta <- matrix(0, nrow = 40, ncol = 10) |
|
90 |
+ for(i in seq(10)) { |
|
91 |
+ delta[4 * (i - 1) + seq(4), 10] <- snpInfo$prior * test_score[11 - i, ] ^ theta |
|
92 |
+ } |
|
93 |
+ for(i in 1:10) { |
|
94 |
+ delta[4 * (i- 1) + seq(4), seq(9)] <- sum(snpInfo$prior * delta[4 * (i - 1) + seq(4), 10]) |
|
95 |
+ } |
|
96 |
+ sample <- .Call("test_importance_sample", delta, snpInfo$prior, trans_mat, test_pwm, theta, package = "MotifAnalysis") |
|
97 |
+ start_pos <- sample[20] + 1 |
|
98 |
+ adj_score <- test_score[11 - start_pos, sample[10] + 1] |
|
99 |
+ sample_score <- .Call("test_compute_sample_score_diff", test_pwm, test_score, sample[seq(19)], sample[20], package = "MotifAnalysis") |
|
100 |
+ expect_equal(adj_score, exp(sample_score[1])) |
|
101 |
+ } |
|
102 |
+ |
|
103 |
+}) |
|
104 |
+ |
|
105 |
+test_that("Error: compute the normalizing constant.", { |
|
106 |
+ |
|
107 |
+ ## parameters |
|
108 |
+ p <- 0.99 |
|
109 |
+ delta <- .Call("test_find_percentile_diff", scores, p, package = "MotifAnalysis") |
|
110 |
+ theta <- .Call("test_find_theta_diff", test_score, snpInfo$prior, snpInfo$transition, delta, package = "MotifAnalysis") |
|
111 |
+ |
|
112 |
+ ## |
|
113 |
+ const <- .Call("test_func_delta_diff", test_score, snpInfo$prior, trans_mat, theta, package = "MotifAnalysis") |
|
114 |
+ const.r <- sum(snpInfo$prior * t(test_score) ^ theta) / 10 |
|
115 |
+ expect_equal(const, const.r) |
|
116 |
+ |
|
117 |
+}) |
|
118 |
+ |
|
119 |
+test_that("Error: sample distributions are not expected.", { |
|
120 |
+ |
|
121 |
+ ## parameters |
|
122 |
+ p <- 0.1 |
|
123 |
+ delta <- .Call("test_find_percentile_diff", scores, p, package = "MotifAnalysis") |
|
124 |
+ theta <- .Call("test_find_theta_diff", test_score, snpInfo$prior, snpInfo$transition, delta, package = "MotifAnalysis") |
|
125 |
+ delta <- matrix(0, nrow = 40, ncol = 10) |
|
126 |
+ for(i in seq(10)) { |
|
127 |
+ delta[4 * (i - 1) + seq(4), 10] <- test_score[11 - i, ] ^ theta |
|
128 |
+ } |
|
129 |
+ for(i in 1:10) { |
|
130 |
+ delta[4 * (i- 1) + seq(4), seq(9)] <- sum(snpInfo$prior * delta[4 * (i - 1) + seq(4), 10]) |
|
131 |
+ } |
|
132 |
+ |
|
133 |
+ target_freq <- matrix(snpInfo$prior, nrow = 4, ncol = 19) |
|
134 |
+ mat <- snpInfo$prior * t(test_score) ^ theta |
|
135 |
+ wei <- apply(mat, 2, sum) |
|
136 |
+ mat <- mat / rep(apply(mat, 2, sum), each = 4) |
|
137 |
+ mat <- mat * rep(wei, each = 4) |
|
138 |
+ target_freq[, 10] <- apply(mat, 1, mean) |
|
139 |
+ target_freq <- t(target_freq) |
|
140 |
+ target_freq <- target_freq / apply(target_freq, 1, sum) |
|
141 |
+ |
|
142 |
+ registerDoMC(4) |
|
143 |
+ |
|
144 |
+ results <- foreach(i = seq(20)) %dopar% { |
|
145 |
+ |
|
146 |
+ ## generate 1000 samples |
|
147 |
+ sample1 <- sapply(seq(1000), function(x) |
|
148 |
+ .Call("test_importance_sample_diff", |
|
149 |
+ delta, snpInfo$prior, trans_mat, test_score, theta, package = "MotifAnalysis")) |
|
150 |
+ emp_freq1 <- get_freq(sample1) |
|
151 |
+ |
|
152 |
+ sample2 <- sapply(rep(theta, 1000), drawonesample) |
|
153 |
+ emp_freq2 <- get_freq(sample2 - 1) |
|
154 |
+ |
|
155 |
+ print(rbind(emp_freq1[10, ], emp_freq2[10, ], target_freq[10, ])) |
|
156 |
+ max(abs(emp_freq1 - target_freq)) > max(abs(emp_freq2 - target_freq)) |
|
157 |
+ |
|
158 |
+ } |
|
159 |
+ sum(unlist(results)) |
|
160 |
+ |
|
161 |
+ print(pbinom(sum(unlist(results)), size = 20, prob = 0.5)) |
|
162 |
+ |
|
163 |
+}) |
|
164 |
+ |
|
165 |
+## Visual checks |
|
166 |
+if(FALSE) { |
|
167 |
+ |
|
168 |
+ plot(log(y <- sapply(seq(200) / 100 - 1, function(x) |
|
169 |
+ .Call("test_func_delta_diff", test_score, snpInfo$prior, snpInfo$transition, x, package = "MotifAnalysis")))) |
|
170 |
+ |
|
171 |
+## test the theta |
|
172 |
+ |
|
173 |
+ p_values_9 <- .Call("test_p_value_diff", test_pwm, test_score, snpInfo$prior, snpInfo$transition, scores, 0.1, package = "MotifAnalysis") |
|
174 |
+ p_values_8 <- .Call("test_p_value_diff", test_pwm, test_score, snpInfo$prior, snpInfo$transition, scores, 0.2, package = "MotifAnalysis") |
|
175 |
+ |
|
176 |
+ score_diff <- apply(scores, 1, function(x) abs(diff(x))) |
|
177 |
+ |
|
178 |
+ par(mfrow = c(1, 2)) |
|
179 |
+ plot(log(p_values_9) ~ score_diff) |
|
180 |
+ plot(log(p_values_8) ~ score_diff) |
|
181 |
+ |
|
182 |
+ p_values_9 <- .Call("test_p_value_diff", test_pwm, test_score, snpInfo$prior, trans_mat, scores, 0.1, package = "MotifAnalysis") |
|
183 |
+ p_values_8 <- .Call("test_p_value_diff", test_pwm, test_score, snpInfo$prior, trans_mat, scores, 0.2, package = "MotifAnalysis") |
|
184 |
+ |
|
185 |
+ pval_test <- function(x) { |
|
186 |
+ delta <- .Call("test_find_percentile_diff", scores, x, package = "MotifAnalysis") |
|
187 |
+ theta <- .Call("test_find_theta_diff", test_score, snpInfo$prior, trans_mat, delta, package = "MotifAnalysis") |
|
188 |
+ const <- sum(snpInfo$prior * t(test_score) ^ theta) / 10 |
|
189 |
+ message("Constant value: ", const) |
|
190 |
+ sample <- sapply(rep(theta, 1000), drawonesample) |
|
191 |
+ pr <- apply(sample[1:19, ], 2, maxjointprob) |
|
192 |
+ sample1 <- sample2 <- sample3 <- sample |
|
193 |
+ sample1[10, ] <- sapply(sample1[10, ], function(x) seq(4)[-x][1]) |
|
194 |
+ sample2[10, ] <- sapply(sample2[10, ], function(x) seq(4)[-x][2]) |
|
195 |
+ sample3[10, ] <- sapply(sample3[10, ], function(x) seq(4)[-x][3]) |
|
196 |
+ pr1 <- apply(sample1[1:19, ], 2, maxjointprob) |
|
197 |
+ pr2 <- apply(sample2[1:19, ], 2, maxjointprob) |
|
198 |
+ pr3 <- apply(sample3[1:19, ], 2, maxjointprob) |
|
199 |
+ |
|
200 |
+ log_diff <- log(pr) - log(c(pr1, pr2, pr3)) |
|
201 |
+ |
|
202 |
+ wei <- rep(const / sample[21, ] ^ theta, 3) |
|
203 |
+ message("Mean weight: ", mean(wei)) |
|
204 |
+ message("Mean diff score: ", mean(log_diff)) |
|
205 |
+ message("Target score: ", mean(log(sample[21, ]))) |
|
206 |
+ pval <- sapply(score_diff, function(x) sum(rep(wei, each = 3)[log_diff >= x]) / length(log_diff)) |
|
207 |
+ return(pval) |
|
208 |
+ } |
|
209 |
+ |
|
210 |
+ pval_test1 <- function(x) { |
|
211 |
+ delta <- .Call("test_find_percentile_diff", scores, x, package = "MotifAnalysis") |
|
212 |
+ theta <- .Call("test_find_theta_diff", test_score, snpInfo$prior, trans_mat, delta, package = "MotifAnalysis") |
|
213 |
+ const <- sum(snpInfo$prior * t(test_score) ^ theta) / 10 |
|
214 |
+ message("Constant value: ", const) |
|
215 |
+ log_diff <- rep(0, 3000) |
|
216 |
+ wei <- rep(0, 1000) |
|
217 |
+ for(i in seq(1000)) { |
|
218 |
+ sample <- drawonesample(theta) |
|
219 |
+ sample_score <- .Call("test_compute_sample_score_diff", test_pwm, test_score, sample[seq(19)] - 1, sample[20] - 1, package = "MotifAnalysis") |
|
220 |
+ log_diff[seq(3) + 3 * (i - 1)] <- sample_score[-1] |
|
221 |
+ wei[i] <- const / exp(sample_score[1] * theta) |
|
222 |
+ } |
|
223 |
+ message("Mean weight: ", mean(wei)) |
|
224 |
+ message("Mean diff score: ", mean(log_diff)) |
|
225 |
+ |
|
226 |
+ pval <- sapply(score_diff, function(x) sum(rep(wei, each = 3)[log_diff >= x]) / length(log_diff)) |
|
227 |
+ return(pval) |
|
228 |
+ } |
|
229 |
+ |
|
230 |
+ |
|
231 |
+ pval_8 <- pval_test(0.2) |
|
232 |
+ pval1_8 <- pval_test1(0.2) |
|
233 |
+ |
|
234 |
+ min(pval_8) |
|
235 |
+ min(pval1_8) |
|
236 |
+ |
|
237 |
+ p_values_8 <- |
|
238 |
+ |
|
239 |
+ pval1_9 <- pval_test1(0.1) |
|
240 |
+ |
|
241 |
+ plot(log(pval_8), log(p_values_8)) |
|
242 |
+ abline(0,1) |
|
243 |
+ |
|
244 |
+ plot(log(pval1_8), log(p_values_8)) |
|
245 |
+ abline(0,1) |
|
246 |
+ |
|
247 |
+ plot(log(pval_9), log(p_values_9)) |
|
248 |
+ abline(0,1) |
|
249 |
+ |
|
250 |
+ plot(log(pval1_9), log(p_values_9)) |
|
251 |
+ abline(0,1) |
|
252 |
+ |
|
253 |
+} |
... | ... |
@@ -211,23 +211,27 @@ if(FALSE) { |
211 | 211 |
delta <- .Call("test_find_percentile_diff", scores, x, package = "MotifAnalysis") |
212 | 212 |
theta <- .Call("test_find_theta_diff", test_score, snpInfo$prior, trans_mat, delta, package = "MotifAnalysis") |
213 | 213 |
const <- sum(snpInfo$prior * t(test_score) ^ theta) / 10 |
214 |
- log_diff <- rep(0, 30000) |
|
215 |
- wei <- rep(0, 10000) |
|
216 |
- for(i in seq(10000)) { |
|
214 |
+ message("Constant value: ", const) |
|
215 |
+ log_diff <- rep(0, 3000) |
|
216 |
+ wei <- rep(0, 1000) |
|
217 |
+ for(i in seq(1000)) { |
|
217 | 218 |
sample <- drawonesample(theta) |
218 | 219 |
sample_score <- .Call("test_compute_sample_score_diff", test_pwm, test_score, sample[seq(19)] - 1, sample[20] - 1, package = "MotifAnalysis") |
219 | 220 |
log_diff[seq(3) + 3 * (i - 1)] <- sample_score[-1] |
220 | 221 |
wei[i] <- const / exp(sample_score[1] * theta) |
221 | 222 |
} |
223 |
+ message("Mean weight: ", mean(wei)) |
|
224 |
+ message("Mean diff score: ", mean(log_diff)) |
|
225 |
+ message("Target score: ", mean(log(sample[21, ]))) |
|
226 |
+ |
|
222 | 227 |
pval <- sapply(score_diff, function(x) sum(rep(wei, each = 3)[log_diff >= x]) / length(log_diff)) |
223 | 228 |
return(pval) |
224 | 229 |
} |
225 | 230 |
|
226 | 231 |
|
227 | 232 |
pval_8 <- pval_test(0.2) |
228 |
- pval_9 <- pval_test(0.1) |
|
229 |
- |
|
230 | 233 |
pval1_8 <- pval_test1(0.2) |
234 |
+ |
|
231 | 235 |
pval1_9 <- pval_test1(0.1) |
232 | 236 |
|
233 | 237 |
plot(log(pval_8), log(p_values_8)) |