... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
Package: atena |
2 | 2 |
Type: Package |
3 | 3 |
Title: Analysis of Transposable Elements |
4 |
-Version: 1.3.0 |
|
4 |
+Version: 1.3.1 |
|
5 | 5 |
Authors@R: c( |
6 | 6 |
person("Beatriz", "Calvo-Serra", email = "[email protected]", role = c("aut","cre")), |
7 | 7 |
person("Robert", "Castelo", email = "[email protected]", role = c("aut"))) |
... | ... |
@@ -202,7 +202,20 @@ setClass("ERVmapParam", contains="AtenaParam", |
202 | 202 |
#' iterations of the EM SQUAREM algorithm (Du and Varadhan, 2020). Default |
203 | 203 |
#' is 100 and this value is passed to the \code{maxiter} parameter of the |
204 | 204 |
#' \code{\link[SQUAREM]{squarem}()} function. |
205 |
-#' |
|
205 |
+#' |
|
206 |
+#' @slot reassign_mode (Default 'exclude') Character vector indicating |
|
207 |
+#' reassignment mode after EM step. |
|
208 |
+#' Available methods are 'exclude' (reads with more than one best |
|
209 |
+#' assignment are excluded from the final counts), 'choose' (when reads have |
|
210 |
+#' more than one best assignment, one of them is randomly chosen), 'average' |
|
211 |
+#' (the read count is divided evenly among the best assignments) and 'conf' |
|
212 |
+#' (only assignments that exceed a certain threshold -defined by |
|
213 |
+#' \code{conf_prob} parameter- are accepted, then the read count is |
|
214 |
+#' proportionally divided among the assignments above \code{conf_prob}). |
|
215 |
+#' |
|
216 |
+#' @slot conf_prob (Default 0.9) Minimum probability for high confidence |
|
217 |
+#' assignment. |
|
218 |
+#' |
|
206 | 219 |
#' @references |
207 | 220 |
#' Bendall et al. Telescope: characterization of the retrotranscriptome by |
208 | 221 |
#' accurate estimation of transposable element expression. |
... | ... |
@@ -221,7 +234,9 @@ setClass("TelescopeParam", contains="AtenaParam", |
221 | 234 |
pi_prior="integer", |
222 | 235 |
theta_prior="integer", |
223 | 236 |
em_epsilon="numeric", |
224 |
- maxIter="integer")) |
|
237 |
+ maxIter="integer", |
|
238 |
+ reassign_mode="character", |
|
239 |
+ conf_prob="numeric")) |
|
225 | 240 |
|
226 | 241 |
#' TEtranscripts parameter class |
227 | 242 |
#' |
... | ... |
@@ -76,6 +76,19 @@ |
76 | 76 |
#' iterations of the EM SQUAREM algorithm (Du and Varadhan, 2020). Default |
77 | 77 |
#' is 100 and this value is passed to the \code{maxiter} parameter of the |
78 | 78 |
#' \code{\link[SQUAREM]{squarem}()} function. |
79 |
+#' |
|
80 |
+#' @param reassign_mode (Default 'exclude') Character vector indicating |
|
81 |
+#' reassignment mode after EM step. |
|
82 |
+#' Available methods are 'exclude' (reads with more than one best |
|
83 |
+#' assignment are excluded from the final counts), 'choose' (when reads have |
|
84 |
+#' more than one best assignment, one of them is randomly chosen), 'average' |
|
85 |
+#' (the read count is divided evenly among the best assignments) and 'conf' |
|
86 |
+#' (only assignments that exceed a certain threshold -defined by |
|
87 |
+#' \code{conf_prob} parameter- are accepted, then the read count is |
|
88 |
+#' proportionally divided among the assignments above \code{conf_prob}). |
|
89 |
+#' |
|
90 |
+#' @param conf_prob (Default 0.9) Minimum probability for high confidence |
|
91 |
+#' assignment. |
|
79 | 92 |
#' |
80 | 93 |
#' |
81 | 94 |
#' @details |
... | ... |
@@ -121,7 +134,9 @@ TelescopeParam <- function(bfl, teFeatures, aggregateby=character(0), |
121 | 134 |
pi_prior=0L, |
122 | 135 |
theta_prior=0L, |
123 | 136 |
em_epsilon=1e-7, |
124 |
- maxIter=100L) { |
|
137 |
+ maxIter=100L, |
|
138 |
+ reassign_mode="exclude", |
|
139 |
+ conf_prob=0.9) { |
|
125 | 140 |
bfl <- .checkBamFileListArgs(bfl, singleEnd, fragments) |
126 | 141 |
|
127 | 142 |
features <- .processFeatures(teFeatures, deparse(substitute(teFeatures)), |
... | ... |
@@ -133,7 +148,8 @@ TelescopeParam <- function(bfl, teFeatures, aggregateby=character(0), |
133 | 148 |
strandMode=as.integer(strandMode), fragments=fragments, |
134 | 149 |
minOverlFract=minOverlFract, pi_prior=pi_prior, |
135 | 150 |
theta_prior=theta_prior, em_epsilon=em_epsilon, |
136 |
- maxIter=as.integer(maxIter)) |
|
151 |
+ maxIter=as.integer(maxIter), reassign_mode=reassign_mode, |
|
152 |
+ conf_prob=conf_prob) |
|
137 | 153 |
} |
138 | 154 |
|
139 | 155 |
#' @param object A \linkS4class{TelescopeParam} object. |
... | ... |
@@ -372,28 +388,61 @@ setMethod("qtex", "TelescopeParam", |
372 | 388 |
PiTS[PiTS < 0] <- 0 ## Pi estimates are sometimes negatively close to zero |
373 | 389 |
# --- end EM-step --- |
374 | 390 |
|
375 |
- # Implementation for reassign_mode=exclude |
|
376 | 391 |
Theta <- get("Theta", envir=Thetaenv) |
377 | 392 |
X <- .tsEstep(QmatTS, Theta, maskmulti, PiTS) |
393 |
+ cntvec <- .reassign(X, tspar@reassign_mode, tspar@conf_prob, cntvec, tx_idx) |
|
394 |
+ |
|
395 |
+ names(cntvec) <- c(names(tspar@features), "no_feature") |
|
396 |
+ nofeat <- cntvec["no_feature"] |
|
397 |
+ cntvec <- .tssummarizeCounts(cntvec[-length(cntvec)], iste, tspar) |
|
398 |
+ cntvec <- c(cntvec, nofeat) |
|
399 |
+ cntvec |
|
400 |
+} |
|
401 |
+ |
|
402 |
+## private function .reassign() |
|
403 |
+## Implements 4 different reassigning methods from Telescope (exclude, choose, |
|
404 |
+## average and conf) |
|
405 |
+#' @importFrom sparseMatrixStats rowSums2 colSums2 |
|
406 |
+.reassign <- function(X, reassign_mode, conf_prob, cntvec, tx_idx) { |
|
407 |
+ |
|
408 |
+ if (reassign_mode %in% c("exclude", "choose", "average")) { |
|
378 | 409 |
maxbyrow <- rowMaxs(X) |
379 |
- # Version 1 |
|
410 |
+ # Older versions |
|
380 | 411 |
# Xind <- X == maxbyrow |
381 |
- # Version 2 |
|
382 | 412 |
# Xind <- (X / maxbyrow) == 1 |
383 |
- |
|
384 | 413 |
Xind <- as(X, "lgCMatrix") |
385 | 414 |
Xind@x <- (X@x /maxbyrow[X@i+1]) == 1 |
386 | 415 |
nmaxbyrow <- rowSums2(Xind) |
387 |
- |
|
388 |
- # Do not differenciate between TE and genes with istex because in Telescope |
|
389 |
- # genes are also included in the EMstep. |
|
390 | 416 |
cntvec[tx_idx] <- colSums2(Xind[nmaxbyrow == 1, ]) |
417 |
+ |
|
418 |
+ if (reassign_mode == "choose") { |
|
419 |
+ Xind2_s <- summary(Xind[nmaxbyrow > 1, ]) |
|
420 |
+ Xind2_s <- Xind2_s[Xind2_s$x == TRUE,] |
|
421 |
+ Xind2_s <- Xind2_s[order(Xind2_s$i),] |
|
422 |
+ bestov <- as.vector(table(Xind2_s$i)) |
|
423 |
+ selected_ov <- vapply(bestov, FUN = function(x) sample(x = x, size = 1), |
|
424 |
+ FUN.VALUE = integer(1L)) |
|
425 |
+ ovcol <- table(Xind2_s[c(0,cumsum(bestov)[-length(bestov)]) + selected_ov,"j"]) |
|
426 |
+ whcol <- as.integer(names(ovcol)) |
|
427 |
+ cntvec[tx_idx][whcol] <- cntvec[tx_idx][whcol] + as.integer(ovcol) |
|
428 |
+ } |
|
391 | 429 |
|
392 |
- names(cntvec) <- c(names(tspar@features), "no_feature") |
|
393 |
- nofeat <- cntvec["no_feature"] |
|
394 |
- cntvec <- .tssummarizeCounts(cntvec[-length(cntvec)], iste, tspar) |
|
395 |
- cntvec <- c(cntvec, nofeat) |
|
396 |
- cntvec |
|
430 |
+ if (reassign_mode == "average") { |
|
431 |
+ Xind2 <- Xind[nmaxbyrow > 1, ] |
|
432 |
+ cntvec[tx_idx] <- cntvec[tx_idx] + colSums2(Xind2/rowSums2(Xind2)) |
|
433 |
+ } |
|
434 |
+ |
|
435 |
+ } else if (reassign_mode == "conf") { |
|
436 |
+ whbelow <- which(X < conf_prob & X > 0, arr.ind = TRUE) |
|
437 |
+ X[whbelow] <- 0 |
|
438 |
+ X_conf <- X[rowSums2(X) > 0,] |
|
439 |
+ cntvec[tx_idx] <- colSums2(X_conf/rowSums2(X_conf)) |
|
440 |
+ |
|
441 |
+ } else { |
|
442 |
+ stop("'reassign_mode' should be one of 'exclude', 'choose', 'average' or 'conf'") |
|
443 |
+ } |
|
444 |
+ |
|
445 |
+ cntvec |
|
397 | 446 |
} |
398 | 447 |
|
399 | 448 |
|
... | ... |
@@ -20,7 +20,9 @@ TelescopeParam( |
20 | 20 |
pi_prior = 0L, |
21 | 21 |
theta_prior = 0L, |
22 | 22 |
em_epsilon = 1e-07, |
23 |
- maxIter = 100L |
|
23 |
+ maxIter = 100L, |
|
24 |
+ reassign_mode = "exclude", |
|
25 |
+ conf_prob = 0.9 |
|
24 | 26 |
) |
25 | 27 |
|
26 | 28 |
\S4method{show}{TelescopeParam}(object) |
... | ... |
@@ -101,6 +103,19 @@ iterations of the EM SQUAREM algorithm (Du and Varadhan, 2020). Default |
101 | 103 |
is 100 and this value is passed to the \code{maxiter} parameter of the |
102 | 104 |
\code{\link[SQUAREM]{squarem}()} function.} |
103 | 105 |
|
106 |
+\item{reassign_mode}{(Default 'exclude') Character vector indicating |
|
107 |
+reassignment mode after EM step. |
|
108 |
+Available methods are 'exclude' (reads with more than one best |
|
109 |
+assignment are excluded from the final counts), 'choose' (when reads have |
|
110 |
+more than one best assignment, one of them is randomly chosen), 'average' |
|
111 |
+(the read count is divided evenly among the best assignments) and 'conf' |
|
112 |
+(only assignments that exceed a certain threshold -defined by |
|
113 |
+\code{conf_prob} parameter- are accepted, then the read count is |
|
114 |
+proportionally divided among the assignments above \code{conf_prob}).} |
|
115 |
+ |
|
116 |
+\item{conf_prob}{(Default 0.9) Minimum probability for high confidence |
|
117 |
+assignment.} |
|
118 |
+ |
|
104 | 119 |
\item{object}{A \linkS4class{TelescopeParam} object.} |
105 | 120 |
} |
106 | 121 |
\value{ |
... | ... |
@@ -168,6 +183,19 @@ Algorithm Epsilon cutoff.} |
168 | 183 |
iterations of the EM SQUAREM algorithm (Du and Varadhan, 2020). Default |
169 | 184 |
is 100 and this value is passed to the \code{maxiter} parameter of the |
170 | 185 |
\code{\link[SQUAREM]{squarem}()} function.} |
186 |
+ |
|
187 |
+\item{\code{reassign_mode}}{(Default 'exclude') Character vector indicating |
|
188 |
+reassignment mode after EM step. |
|
189 |
+Available methods are 'exclude' (reads with more than one best |
|
190 |
+assignment are excluded from the final counts), 'choose' (when reads have |
|
191 |
+more than one best assignment, one of them is randomly chosen), 'average' |
|
192 |
+(the read count is divided evenly among the best assignments) and 'conf' |
|
193 |
+(only assignments that exceed a certain threshold -defined by |
|
194 |
+\code{conf_prob} parameter- are accepted, then the read count is |
|
195 |
+proportionally divided among the assignments above \code{conf_prob}).} |
|
196 |
+ |
|
197 |
+\item{\code{conf_prob}}{(Default 0.9) Minimum probability for high confidence |
|
198 |
+assignment.} |
|
171 | 199 |
}} |
172 | 200 |
|
173 | 201 |
\examples{ |