... | ... |
@@ -1,6 +1,6 @@ |
1 | 1 |
Package: crisprDesign |
2 | 2 |
Title: Comprehensive design of CRISPR gRNAs for nucleases and base editors |
3 |
-Version: 0.99.88 |
|
3 |
+Version: 0.99.89 |
|
4 | 4 |
Authors@R: c( |
5 | 5 |
person("Jean-Philippe", "Fortin", email = "[email protected]", role = c("aut", "cre")), |
6 | 6 |
person("Luke", "Hoberecht", email = "[email protected]", role = c("aut")) |
... | ... |
@@ -71,7 +71,7 @@ addOpsBarcodes <- function(guideSet, |
71 | 71 |
#' diagonal distances be set to 0 to ignore self distances? |
72 | 72 |
#' TRUE by default. |
73 | 73 |
#' @param splitByChunks Should distances be calculated in a chunk-wise |
74 |
-#' manner? TRUE by default. Highly recommended when the set of query |
|
74 |
+#' manner? FALSE by default. Highly recommended when the set of query |
|
75 | 75 |
#' barcodes is large to reduce memory footprint. |
76 | 76 |
#' @param n_chunks Integer specifying the number of chunks to be used |
77 | 77 |
#' when \code{splitByChunks=TRUE}. If NULL (default), number of chunks |
... | ... |
@@ -118,7 +118,7 @@ getBarcodeDistanceMatrix <- function(queryBarcodes, |
118 | 118 |
if (is.null(min_dist_edit) & binnarize){ |
119 | 119 |
stop("min_dist_edit must be specified when binnarize=TRUE.") |
120 | 120 |
} |
121 |
- if (!splitByChunks){ |
|
121 |
+ if (!splitByChunks | length(queryBarcodes<=200)){ |
|
122 | 122 |
out <- .getChunkDistanceMatrix(queryBarcodes=queryBarcodes, |
123 | 123 |
targetBarcodes=targetBarcodes, |
124 | 124 |
min_dist_edit=min_dist_edit, |
... | ... |
@@ -202,7 +202,10 @@ getBarcodeDistanceMatrix <- function(queryBarcodes, |
202 | 202 |
#' have edit distances less than the min_dist_edit will not be |
203 | 203 |
#' included in the library. 2 by default. |
204 | 204 |
#' @param dist_method String specifying distance method. |
205 |
-#' Must be either "hamming" (default) or "levenstein". |
|
205 |
+#' Must be either "hamming" (default) or "levenstein". |
|
206 |
+#' @param splitByChunks Should distances be calculated in a chunk-wise |
|
207 |
+#' manner? FALSE by default. Highly recommended when the set of query |
|
208 |
+#' barcodes is large to reduce memory footprint. |
|
206 | 209 |
#' |
207 | 210 |
#' @return A subset of the \code{df} containing the gRNAs |
208 | 211 |
#' selected for the OPS library. |
... | ... |
@@ -229,7 +232,8 @@ designOpsLibrary <- function(df, |
229 | 232 |
n_guides=4, |
230 | 233 |
gene_field="gene", |
231 | 234 |
min_dist_edit=2, |
232 |
- dist_method=c("hamming","levenstein") |
|
235 |
+ dist_method=c("hamming","levenstein"), |
|
236 |
+ splitByChunks=FALSE |
|
233 | 237 |
){ |
234 | 238 |
dist_method <- match.arg(dist_method) |
235 | 239 |
df <- .validateOpsGrnaInput(df, gene_field) |
... | ... |
@@ -243,12 +247,14 @@ designOpsLibrary <- function(df, |
243 | 247 |
genes=genes) |
244 | 248 |
grnaList <- .initiateOpsLibrary(grnaList, |
245 | 249 |
dist_method=dist_method, |
246 |
- min_dist_edit=min_dist_edit) |
|
250 |
+ min_dist_edit=min_dist_edit, |
|
251 |
+ splitByChunks=splitByChunks) |
|
247 | 252 |
grnaList <- .updateOpsLibrary(grnaList, |
248 | 253 |
gene_field=gene_field, |
249 | 254 |
n_guides=n_guides, |
250 | 255 |
dist_method=dist_method, |
251 |
- min_dist_edit=min_dist_edit) |
|
256 |
+ min_dist_edit=min_dist_edit, |
|
257 |
+ splitByChunks=splitByChunks) |
|
252 | 258 |
out <- .getFinalOpsLibrary(grnaList) |
253 | 259 |
out <- out[order(out[[gene_field]], out[["rank"]]),,drop=FALSE] |
254 | 260 |
return(out) |
... | ... |
@@ -273,6 +279,9 @@ designOpsLibrary <- function(df, |
273 | 279 |
#' included in the library. 2 by default. |
274 | 280 |
#' @param dist_method String specifying distance method. |
275 | 281 |
#' Must be either "hamming" (default) or "levenstein". |
282 |
+#' @param splitByChunks Should distances be calculated in a chunk-wise |
|
283 |
+#' manner? FALSE by default. Highly recommended when the set of query |
|
284 |
+#' barcodes is large to reduce memory footprint. |
|
276 | 285 |
#' |
277 | 286 |
#' @author Jean-Philippe Fortin |
278 | 287 |
#' |
... | ... |
@@ -308,7 +317,8 @@ updateOpsLibrary <- function(opsLibrary, |
308 | 317 |
n_guides=4, |
309 | 318 |
gene_field="gene", |
310 | 319 |
min_dist_edit=2, |
311 |
- dist_method=c("hamming","levenstein") |
|
320 |
+ dist_method=c("hamming","levenstein"), |
|
321 |
+ splitByChunks=FALSE |
|
312 | 322 |
){ |
313 | 323 |
dist_method <- match.arg(dist_method) |
314 | 324 |
df <- .validateOpsGrnaInput(df, gene_field) |
... | ... |
@@ -321,7 +331,8 @@ updateOpsLibrary <- function(opsLibrary, |
321 | 331 |
gene_field=gene_field, |
322 | 332 |
n_guides=n_guides, |
323 | 333 |
dist_method=dist_method, |
324 |
- min_dist_edit=min_dist_edit) |
|
334 |
+ min_dist_edit=min_dist_edit, |
|
335 |
+ splitByChunks=splitByChunks) |
|
325 | 336 |
out <- .getFinalOpsLibrary(grnaList) |
326 | 337 |
out <- out[order(out[[gene_field]], out[["rank"]]),,drop=FALSE] |
327 | 338 |
return(out) |
... | ... |
@@ -365,13 +376,15 @@ updateOpsLibrary <- function(opsLibrary, |
365 | 376 |
#' @importFrom Matrix rowSums |
366 | 377 |
.initiateOpsLibrary <- function(grnaList, |
367 | 378 |
dist_method, |
368 |
- min_dist_edit |
|
379 |
+ min_dist_edit, |
|
380 |
+ splitByChunks |
|
369 | 381 |
){ |
370 | 382 |
selected <- grnaList[["selected"]] |
371 | 383 |
mat <- getBarcodeDistanceMatrix(queryBarcodes=selected[["opsBarcode"]], |
372 | 384 |
binnarize=TRUE, |
373 | 385 |
dist_method=dist_method, |
374 |
- min_dist_edit=min_dist_edit) |
|
386 |
+ min_dist_edit=min_dist_edit, |
|
387 |
+ splitByChunks=splitByChunks) |
|
375 | 388 |
good <- Matrix::rowSums(mat>0)==0 |
376 | 389 |
# In case all guides are "bad", add first one only: |
377 | 390 |
if (sum(good)==0){ |
... | ... |
@@ -390,7 +403,8 @@ updateOpsLibrary <- function(opsLibrary, |
390 | 403 |
gene_field, |
391 | 404 |
n_guides, |
392 | 405 |
dist_method, |
393 |
- min_dist_edit |
|
406 |
+ min_dist_edit, |
|
407 |
+ splitByChunks |
|
394 | 408 |
){ |
395 | 409 |
shouldWeContinue <- TRUE |
396 | 410 |
while (shouldWeContinue){ |
... | ... |
@@ -399,7 +413,8 @@ updateOpsLibrary <- function(opsLibrary, |
399 | 413 |
gene_field=gene_field, |
400 | 414 |
n_guides=n_guides, |
401 | 415 |
dist_method=dist_method, |
402 |
- min_dist_edit=min_dist_edit) |
|
416 |
+ min_dist_edit=min_dist_edit, |
|
417 |
+ splitByChunks=splitByChunks) |
|
403 | 418 |
counts <- table(factor(grnaList[["selected"]][[gene_field]], |
404 | 419 |
levels=grnaList[["genes"]])) |
405 | 420 |
incomplete <- names(which(counts<n_guides)) |
... | ... |
@@ -425,7 +440,8 @@ updateOpsLibrary <- function(opsLibrary, |
425 | 440 |
gene_field, |
426 | 441 |
n_guides, |
427 | 442 |
dist_method, |
428 |
- min_dist_edit |
|
443 |
+ min_dist_edit, |
|
444 |
+ splitByChunks |
|
429 | 445 |
){ |
430 | 446 |
|
431 | 447 |
.getCandidates <- function(genes, n){ |
... | ... |
@@ -451,7 +467,8 @@ updateOpsLibrary <- function(opsLibrary, |
451 | 467 |
# most divergent: |
452 | 468 |
dist <- getBarcodeDistanceMatrix(cands[["opsBarcode"]], |
453 | 469 |
dist_method=dist_method, |
454 |
- min_dist_edit=min_dist_edit) |
|
470 |
+ min_dist_edit=min_dist_edit, |
|
471 |
+ splitByChunks=splitByChunks) |
|
455 | 472 |
score <- Matrix::rowSums(dist>0) |
456 | 473 |
cands <- cands[order(score),,drop=FALSE] |
457 | 474 |
|
... | ... |
@@ -494,7 +511,8 @@ updateOpsLibrary <- function(opsLibrary, |
494 | 511 |
dist <- getBarcodeDistanceMatrix(cands[["opsBarcode"]], |
495 | 512 |
lib[["opsBarcode"]], |
496 | 513 |
dist_method=dist_method, |
497 |
- min_dist_edit=min_dist_edit) |
|
514 |
+ min_dist_edit=min_dist_edit, |
|
515 |
+ splitByChunks=splitByChunks) |
|
498 | 516 |
cands <- cands[Matrix::rowSums(dist)==0,,drop=FALSE] |
499 | 517 |
grnaList <- .incrementalUpdate(grnaList, cands) |
500 | 518 |
} |
... | ... |
@@ -9,7 +9,8 @@ designOpsLibrary( |
9 | 9 |
n_guides = 4, |
10 | 10 |
gene_field = "gene", |
11 | 11 |
min_dist_edit = 2, |
12 |
- dist_method = c("hamming", "levenstein") |
|
12 |
+ dist_method = c("hamming", "levenstein"), |
|
13 |
+ splitByChunks = FALSE |
|
13 | 14 |
) |
14 | 15 |
} |
15 | 16 |
\arguments{ |
... | ... |
@@ -29,6 +30,10 @@ included in the library. 2 by default.} |
29 | 30 |
|
30 | 31 |
\item{dist_method}{String specifying distance method. |
31 | 32 |
Must be either "hamming" (default) or "levenstein".} |
33 |
+ |
|
34 |
+\item{splitByChunks}{Should distances be calculated in a chunk-wise |
|
35 |
+manner? FALSE by default. Highly recommended when the set of query |
|
36 |
+barcodes is large to reduce memory footprint.} |
|
32 | 37 |
} |
33 | 38 |
\value{ |
34 | 39 |
A subset of the \code{df} containing the gRNAs |
... | ... |
@@ -37,7 +37,7 @@ diagonal distances be set to 0 to ignore self distances? |
37 | 37 |
TRUE by default.} |
38 | 38 |
|
39 | 39 |
\item{splitByChunks}{Should distances be calculated in a chunk-wise |
40 |
-manner? TRUE by default. Highly recommended when the set of query |
|
40 |
+manner? FALSE by default. Highly recommended when the set of query |
|
41 | 41 |
barcodes is large to reduce memory footprint.} |
42 | 42 |
|
43 | 43 |
\item{n_chunks}{Integer specifying the number of chunks to be used |
... | ... |
@@ -10,7 +10,8 @@ updateOpsLibrary( |
10 | 10 |
n_guides = 4, |
11 | 11 |
gene_field = "gene", |
12 | 12 |
min_dist_edit = 2, |
13 |
- dist_method = c("hamming", "levenstein") |
|
13 |
+ dist_method = c("hamming", "levenstein"), |
|
14 |
+ splitByChunks = FALSE |
|
14 | 15 |
) |
15 | 16 |
} |
16 | 17 |
\arguments{ |
... | ... |
@@ -32,6 +33,10 @@ included in the library. 2 by default.} |
32 | 33 |
|
33 | 34 |
\item{dist_method}{String specifying distance method. |
34 | 35 |
Must be either "hamming" (default) or "levenstein".} |
36 |
+ |
|
37 |
+\item{splitByChunks}{Should distances be calculated in a chunk-wise |
|
38 |
+manner? FALSE by default. Highly recommended when the set of query |
|
39 |
+barcodes is large to reduce memory footprint.} |
|
35 | 40 |
} |
36 | 41 |
\value{ |
37 | 42 |
A data.frame containing the original gRNAs from |