Former-commit-id: 4a69d2314d0fe5849d5002a1dc1cdd474f9afaff
... | ... |
@@ -30,6 +30,8 @@ Imports: |
30 | 30 |
e1071, |
31 | 31 |
ape, |
32 | 32 |
kernlab, |
33 |
+ tools, |
|
34 |
+ BiocFileCache, |
|
33 | 35 |
utils |
34 | 36 |
Suggests: |
35 | 37 |
knitr, |
... | ... |
@@ -39,7 +41,6 @@ Suggests: |
39 | 41 |
VignetteBuilder: knitr |
40 | 42 |
Depends: R (>= 4.1), Seurat, SingleCellExperiment, SummarizedExperiment |
41 | 43 |
LazyData: true |
42 |
-LazyDataCompression: xz |
|
43 | 44 |
RoxygenNote: 7.1.1 |
44 | 45 |
URL: https://github.com/grisslab/scAnnotatR |
45 | 46 |
BugReports: https://github.com/grisslab/scAnnotatR/issues/new |
... | ... |
@@ -6,6 +6,7 @@ export(caret_model) |
6 | 6 |
export(cell_type) |
7 | 7 |
export(classify_cells) |
8 | 8 |
export(delete_model) |
9 |
+export(load_models) |
|
9 | 10 |
export(marker_genes) |
10 | 11 |
export(p_thres) |
11 | 12 |
export(parent) |
... | ... |
@@ -16,6 +17,7 @@ export(scAnnotatR) |
16 | 17 |
export(test_classifier) |
17 | 18 |
export(train_classifier) |
18 | 19 |
exportMethods(show) |
20 |
+import(BiocFileCache) |
|
19 | 21 |
import(ROCR) |
20 | 22 |
import(SingleCellExperiment) |
21 | 23 |
import(ape) |
... | ... |
@@ -27,6 +29,7 @@ import(ggplot2) |
27 | 29 |
import(kernlab, except = c(alpha, predict)) |
28 | 30 |
import(methods) |
29 | 31 |
import(pROC) |
32 |
+import(tools) |
|
30 | 33 |
importFrom(Seurat,GetAssayData) |
31 | 34 |
importFrom(SummarizedExperiment,assay) |
32 | 35 |
importFrom(SummarizedExperiment,colData) |
... | ... |
@@ -132,6 +132,79 @@ setMethod("train_classifier", c("train_obj" = "Seurat"), |
132 | 132 |
return(object) |
133 | 133 |
}) |
134 | 134 |
|
135 |
+#' @inherit train_classifier |
|
136 |
+#' |
|
137 |
+#' @param sce_tag_slot string, name of annotation slot indicating |
|
138 |
+#' cell tag/label in the training object. |
|
139 |
+#' For \code{\link{SingleCellExperiment}} object, default value is "ident". |
|
140 |
+#' Expected values are string (A-Z, a-z, 0-9, no special character accepted) |
|
141 |
+#' or binary/logical, 0/"no"/F/FALSE: not being new cell type, |
|
142 |
+#' 1/"yes"/T/TRUE: being new cell type. |
|
143 |
+#' @param sce_parent_tag_slot string, name of a slot in cell meta data |
|
144 |
+#' indicating pre-assigned/predicted cell type. |
|
145 |
+#' Default field is "predicted_cell_type". |
|
146 |
+#' This field would have been filled automatically |
|
147 |
+#' when user called classify_cells function. |
|
148 |
+#' The slot must contain only string values. |
|
149 |
+#' @param sce_assay name of assay to use in training object. |
|
150 |
+#' Default to 'logcounts' assay. |
|
151 |
+#' |
|
152 |
+#' @import SingleCellExperiment |
|
153 |
+#' @importFrom SummarizedExperiment assay |
|
154 |
+#' |
|
155 |
+#' @rdname train_classifier |
|
156 |
+setMethod("train_classifier", c("train_obj" = "SingleCellExperiment"), |
|
157 |
+ function(train_obj, cell_type, marker_genes, parent_cell = NA_character_, |
|
158 |
+ parent_classifier = NULL, path_to_models = "default", |
|
159 |
+ zscore = TRUE, sce_tag_slot = "ident", |
|
160 |
+ sce_parent_tag_slot = "predicted_cell_type", |
|
161 |
+ sce_assay = 'logcounts', ...) { |
|
162 |
+ # solve duplication of cell names |
|
163 |
+ colnames(train_obj) <- make.unique(colnames(train_obj), sep = '_') |
|
164 |
+ |
|
165 |
+ # convert Seurat object to matrix |
|
166 |
+ mat = SummarizedExperiment::assay(train_obj, sce_assay) |
|
167 |
+ |
|
168 |
+ tag = SummarizedExperiment::colData(train_obj)[, sce_tag_slot] |
|
169 |
+ names(tag) <- colnames(train_obj) |
|
170 |
+ |
|
171 |
+ if (sce_parent_tag_slot %in% colnames(SummarizedExperiment::colData(train_obj))) { |
|
172 |
+ parent_tag <- SummarizedExperiment::colData(train_obj)[, sce_parent_tag_slot] |
|
173 |
+ names(parent_tag) <- colnames(train_obj) |
|
174 |
+ } else parent_tag <- NULL |
|
175 |
+ |
|
176 |
+ object <- train_classifier_func(mat, tag, cell_type, marker_genes, |
|
177 |
+ parent_tag, parent_cell, parent_classifier, |
|
178 |
+ path_to_models, zscore) |
|
179 |
+ |
|
180 |
+ return(object) |
|
181 |
+}) |
|
182 |
+ |
|
183 |
+#' Train cell type from matrix |
|
184 |
+#' |
|
185 |
+#' @description Train a classifier for a new cell type from expression matrix |
|
186 |
+#' and tag |
|
187 |
+#' If cell type has a parent, only available for \code{\link{scAnnotatR}} |
|
188 |
+#' object as parent cell classifying model. |
|
189 |
+#' @param mat expression matrix of size n x m, n: genes, m: cells |
|
190 |
+#' @param tag named list indicating cell label |
|
191 |
+#' @param cell_type string indicating the name of the subtype |
|
192 |
+#' This must exactly match cell tag/label if cell tag/label is a string. |
|
193 |
+#' @param parent_tag named list indicating parent cell type |
|
194 |
+#' @param marker_genes list of marker genes used for the new training model |
|
195 |
+#' @param parent_cell string indicated the name of the parent cell type, |
|
196 |
+#' if parent cell type classifier has already been saved in model database. |
|
197 |
+#' Adjust path_to_models for exact database. |
|
198 |
+#' @param parent_classifier classification model for the parent cell type |
|
199 |
+#' @param path_to_models path to the folder containing the model database. |
|
200 |
+#' As default, the pretrained models in the package will be used. |
|
201 |
+#' If user has trained new models, indicate the folder containing the |
|
202 |
+#' new_models.rda file. |
|
203 |
+#' @param zscore whether gene expression in train_obj is transformed to zscore |
|
204 |
+#' |
|
205 |
+#' @return caret trained model |
|
206 |
+#' |
|
207 |
+#' @rdname internal |
|
135 | 208 |
train_classifier_func <- function(mat, tag, cell_type, marker_genes, |
136 | 209 |
parent_tag, parent_cell, parent_classifier, |
137 | 210 |
path_to_models, zscore) { |
... | ... |
@@ -139,7 +212,7 @@ train_classifier_func <- function(mat, tag, cell_type, marker_genes, |
139 | 212 |
processed_parent <- process_parent_classifier( |
140 | 213 |
mat, parent_tag, parent_cell, parent_classifier, path_to_models, zscore |
141 | 214 |
) |
142 |
- |
|
215 |
+ |
|
143 | 216 |
# check parent-child coherence |
144 | 217 |
if (!is.null(processed_parent$pos_parent)) { |
145 | 218 |
tag <- check_parent_child_coherence( |
... | ... |
@@ -206,54 +279,6 @@ train_classifier_func <- function(mat, tag, cell_type, marker_genes, |
206 | 279 |
return(object) |
207 | 280 |
} |
208 | 281 |
|
209 |
-#' @inherit train_classifier |
|
210 |
-#' |
|
211 |
-#' @param sce_tag_slot string, name of annotation slot indicating |
|
212 |
-#' cell tag/label in the training object. |
|
213 |
-#' For \code{\link{SingleCellExperiment}} object, default value is "ident". |
|
214 |
-#' Expected values are string (A-Z, a-z, 0-9, no special character accepted) |
|
215 |
-#' or binary/logical, 0/"no"/F/FALSE: not being new cell type, |
|
216 |
-#' 1/"yes"/T/TRUE: being new cell type. |
|
217 |
-#' @param sce_parent_tag_slot string, name of a slot in cell meta data |
|
218 |
-#' indicating pre-assigned/predicted cell type. |
|
219 |
-#' Default field is "predicted_cell_type". |
|
220 |
-#' This field would have been filled automatically |
|
221 |
-#' when user called classify_cells function. |
|
222 |
-#' The slot must contain only string values. |
|
223 |
-#' @param sce_assay name of assay to use in training object. |
|
224 |
-#' Default to 'logcounts' assay. |
|
225 |
-#' |
|
226 |
-#' @import SingleCellExperiment |
|
227 |
-#' @importFrom SummarizedExperiment assay |
|
228 |
-#' |
|
229 |
-#' @rdname train_classifier |
|
230 |
-setMethod("train_classifier", c("train_obj" = "SingleCellExperiment"), |
|
231 |
- function(train_obj, cell_type, marker_genes, parent_cell = NA_character_, |
|
232 |
- parent_classifier = NULL, path_to_models = "default", |
|
233 |
- zscore = TRUE, sce_tag_slot = "ident", |
|
234 |
- sce_parent_tag_slot = "predicted_cell_type", |
|
235 |
- sce_assay = 'logcounts', ...) { |
|
236 |
- # solve duplication of cell names |
|
237 |
- colnames(train_obj) <- make.unique(colnames(train_obj), sep = '_') |
|
238 |
- |
|
239 |
- # convert Seurat object to matrix |
|
240 |
- mat = SummarizedExperiment::assay(train_obj, sce_assay) |
|
241 |
- |
|
242 |
- tag = SummarizedExperiment::colData(train_obj)[, sce_tag_slot] |
|
243 |
- names(tag) <- colnames(train_obj) |
|
244 |
- |
|
245 |
- if (sce_parent_tag_slot %in% colnames(SummarizedExperiment::colData(train_obj))) { |
|
246 |
- parent_tag <- SummarizedExperiment::colData(train_obj)[, sce_parent_tag_slot] |
|
247 |
- names(parent_tag) <- colnames(train_obj) |
|
248 |
- } else parent_tag <- NULL |
|
249 |
- |
|
250 |
- object <- train_classifier_func(mat, tag, cell_type, marker_genes, |
|
251 |
- parent_tag, parent_cell, parent_classifier, |
|
252 |
- path_to_models, zscore) |
|
253 |
- |
|
254 |
- return(object) |
|
255 |
-}) |
|
256 |
- |
|
257 | 282 |
#' Testing process. |
258 | 283 |
#' |
259 | 284 |
#' @description Testing process. |
... | ... |
@@ -363,6 +388,76 @@ setMethod("test_classifier", c("test_obj" = "Seurat", |
363 | 388 |
return(return_val) |
364 | 389 |
}) |
365 | 390 |
|
391 |
+#' @inherit test_classifier |
|
392 |
+#' |
|
393 |
+#' @param sce_tag_slot string, name of annotation slot |
|
394 |
+#' indicating cell tag/label in the testing object. |
|
395 |
+#' Strings indicating cell types are expected in this slot. |
|
396 |
+#' Default value is "ident". |
|
397 |
+#' Expected values are string (A-Z, a-z, 0-9, no special character accepted) |
|
398 |
+#' or binary/logical, 0/"no"/F/FALSE: not being new cell type, |
|
399 |
+#' 1/"yes"/T/TRUE: being new cell type. |
|
400 |
+#' @param sce_parent_tag_slot string, name of tag slot in cell meta data |
|
401 |
+#' indicating pre-assigned/predicted parent cell type. |
|
402 |
+#' Default is "predicted_cell_type". |
|
403 |
+#' The slot must contain only string values. |
|
404 |
+#' @param sce_assay name of assay to use in \code{\link{SingleCellExperiment}} |
|
405 |
+#' object, defaults to 'logcounts' assay. |
|
406 |
+#' |
|
407 |
+#' @import SingleCellExperiment |
|
408 |
+#' @importFrom SummarizedExperiment assay |
|
409 |
+#' |
|
410 |
+#' @rdname test_classifier |
|
411 |
+setMethod("test_classifier", c("test_obj" = "SingleCellExperiment", |
|
412 |
+ "classifier" = "scAnnotatR"), |
|
413 |
+ function(test_obj, classifier, target_cell_type = NULL, |
|
414 |
+ parent_classifier = NULL, path_to_models = "default", |
|
415 |
+ zscore = TRUE, sce_tag_slot = "ident", |
|
416 |
+ sce_parent_tag_slot = "predicted_cell_type", |
|
417 |
+ sce_assay = 'logcounts', ...) { |
|
418 |
+ # solve duplication of cell names |
|
419 |
+ colnames(test_obj) <- make.unique(colnames(test_obj), sep = '_') |
|
420 |
+ . <- fpr <- tpr <- NULL |
|
421 |
+ |
|
422 |
+ # convert SCE object to matrix |
|
423 |
+ mat = SummarizedExperiment::assay(test_obj, sce_assay) |
|
424 |
+ |
|
425 |
+ tag = SummarizedExperiment::colData(test_obj)[, sce_tag_slot] |
|
426 |
+ names(tag) <- colnames(test_obj) |
|
427 |
+ |
|
428 |
+ if (sce_parent_tag_slot %in% colnames(SummarizedExperiment::colData(test_obj))) { |
|
429 |
+ parent_tag <- SummarizedExperiment::colData(test_obj)[, sce_parent_tag_slot] |
|
430 |
+ names(parent_tag) <- colnames(test_obj) |
|
431 |
+ } else parent_tag <- NULL |
|
432 |
+ |
|
433 |
+ return_val <- test_classifier_func(mat, tag, classifier, parent_tag, |
|
434 |
+ target_cell_type, parent_classifier, |
|
435 |
+ path_to_models, zscore) |
|
436 |
+ |
|
437 |
+ return(return_val) |
|
438 |
+}) |
|
439 |
+ |
|
440 |
+#' Run testing process from matrix and tag |
|
441 |
+#' |
|
442 |
+#' @description Testing process from matrix and tag |
|
443 |
+#' @param mat expression matrix of size n x m, n: genes, m: cells |
|
444 |
+#' @param tag named list indicating cell label |
|
445 |
+#' @param classifier classification model |
|
446 |
+#' @param parent_tag named list indicating parent cell type |
|
447 |
+#' @param target_cell_type vector indicating other cell types than cell labels |
|
448 |
+#' that can be considered as the main cell type in classifier, |
|
449 |
+#' for example, c("plasma cell", "b cell", "b cells", "activating b cell"). |
|
450 |
+#' Default as NULL. |
|
451 |
+#' @param parent_classifier classification model for the parent cell type |
|
452 |
+#' @param path_to_models path to the folder containing the model database. |
|
453 |
+#' As default, the pretrained models in the package will be used. |
|
454 |
+#' If user has trained new models, indicate the folder containing the |
|
455 |
+#' new_models.rda file. |
|
456 |
+#' @param zscore whether gene expression in train_obj is transformed to zscore |
|
457 |
+#' |
|
458 |
+#' @return model performance statistics |
|
459 |
+#' |
|
460 |
+#' @rdname internal |
|
366 | 461 |
test_classifier_func <- function(mat, tag, classifier, parent_tag, |
367 | 462 |
target_cell_type, parent_classifier, |
368 | 463 |
path_to_models, zscore) { |
... | ... |
@@ -415,55 +510,6 @@ test_classifier_func <- function(mat, tag, classifier, parent_tag, |
415 | 510 |
return(return_val) |
416 | 511 |
} |
417 | 512 |
|
418 |
-#' @inherit test_classifier |
|
419 |
-#' |
|
420 |
-#' @param sce_tag_slot string, name of annotation slot |
|
421 |
-#' indicating cell tag/label in the testing object. |
|
422 |
-#' Strings indicating cell types are expected in this slot. |
|
423 |
-#' Default value is "ident". |
|
424 |
-#' Expected values are string (A-Z, a-z, 0-9, no special character accepted) |
|
425 |
-#' or binary/logical, 0/"no"/F/FALSE: not being new cell type, |
|
426 |
-#' 1/"yes"/T/TRUE: being new cell type. |
|
427 |
-#' @param sce_parent_tag_slot string, name of tag slot in cell meta data |
|
428 |
-#' indicating pre-assigned/predicted parent cell type. |
|
429 |
-#' Default is "predicted_cell_type". |
|
430 |
-#' The slot must contain only string values. |
|
431 |
-#' @param sce_assay name of assay to use in \code{\link{SingleCellExperiment}} |
|
432 |
-#' object, defaults to 'logcounts' assay. |
|
433 |
-#' |
|
434 |
-#' @import SingleCellExperiment |
|
435 |
-#' @importFrom SummarizedExperiment assay |
|
436 |
-#' |
|
437 |
-#' @rdname test_classifier |
|
438 |
-setMethod("test_classifier", c("test_obj" = "SingleCellExperiment", |
|
439 |
- "classifier" = "scAnnotatR"), |
|
440 |
- function(test_obj, classifier, target_cell_type = NULL, |
|
441 |
- parent_classifier = NULL, path_to_models = "default", |
|
442 |
- zscore = TRUE, sce_tag_slot = "ident", |
|
443 |
- sce_parent_tag_slot = "predicted_cell_type", |
|
444 |
- sce_assay = 'logcounts', ...) { |
|
445 |
- # solve duplication of cell names |
|
446 |
- colnames(test_obj) <- make.unique(colnames(test_obj), sep = '_') |
|
447 |
- . <- fpr <- tpr <- NULL |
|
448 |
- |
|
449 |
- # convert SCE object to matrix |
|
450 |
- mat = SummarizedExperiment::assay(test_obj, sce_assay) |
|
451 |
- |
|
452 |
- tag = SummarizedExperiment::colData(test_obj)[, sce_tag_slot] |
|
453 |
- names(tag) <- colnames(test_obj) |
|
454 |
- |
|
455 |
- if (sce_parent_tag_slot %in% colnames(SummarizedExperiment::colData(test_obj))) { |
|
456 |
- parent_tag <- SummarizedExperiment::colData(test_obj)[, sce_parent_tag_slot] |
|
457 |
- names(parent_tag) <- colnames(test_obj) |
|
458 |
- } else parent_tag <- NULL |
|
459 |
- |
|
460 |
- return_val <- test_classifier_func(mat, tag, classifier, parent_tag, |
|
461 |
- target_cell_type, parent_classifier, |
|
462 |
- path_to_models, zscore) |
|
463 |
- |
|
464 |
- return(return_val) |
|
465 |
-}) |
|
466 |
- |
|
467 | 513 |
#' Plot roc curve |
468 | 514 |
#' |
469 | 515 |
#' @param test_result result of test_classifier function |
... | ... |
@@ -8,16 +8,4 @@ |
8 | 8 |
#' @source WEIZMANN INSTITUTE OF SCIENCE |
9 | 9 |
#' @author Itay Tirosh, 2016-04-05 |
10 | 10 |
#' @keywords datasets |
11 |
-"tirosh_mel80_example" |
|
12 |
- |
|
13 |
-#' @name default_models |
|
14 |
-#' @title Pretrained classifiers for human cells |
|
15 |
-#' @description Pretrained classifier obtained by training and testing on the |
|
16 |
-#' Sade-Feldman melanoma dataset, the Jerby-Arnon melanoma dataset, the Haniffa |
|
17 |
-#' Skin Cell Atlas and the Haniffa Covid-19 Cell Atlas. |
|
18 |
-#' @docType data |
|
19 |
-#' @usage default_models |
|
20 |
-#' @format a list of \code{\link{scAnnotatR}} objects |
|
21 |
-#' @author Vy Nguyen, June 2021 |
|
22 |
-#' @keywords datasets |
|
23 |
-"default_models" |
|
24 | 11 |
\ No newline at end of file |
12 |
+"tirosh_mel80_example" |
|
25 | 13 |
\ No newline at end of file |
... | ... |
@@ -107,14 +107,16 @@ transform_to_zscore <- function(mat) { |
107 | 107 |
#' @return list of classifiers |
108 | 108 |
#' |
109 | 109 |
#' @importFrom utils data |
110 |
-#' @rdname internal |
|
110 |
+#' @rdname load_models |
|
111 |
+#' @export |
|
111 | 112 |
load_models <- function(path_to_models) { |
112 | 113 |
# prevents R CMD check note |
113 | 114 |
model_list <- NULL |
114 | 115 |
data_env <- new.env(parent = emptyenv()) |
115 | 116 |
|
116 | 117 |
if (path_to_models == "default") { |
117 |
- utils::data("default_models", envir = data_env) |
|
118 |
+ models_path <- download_data_file(TRUE) # more function: if user want to save cache |
|
119 |
+ load(models_path, envir = data_env) |
|
118 | 120 |
model_list <- data_env[["default_models"]] |
119 | 121 |
} else { |
120 | 122 |
models_path <- file.path(path_to_models, "new_models.rda") |
... | ... |
@@ -608,4 +610,43 @@ classify_clust <- function(clusts, most_probable_cell_type) { |
608 | 610 |
names(clust.pred) <- levels(clusts) |
609 | 611 |
converted_pred <- unlist(lapply(clusts, function(x) clust.pred[[x]])) |
610 | 612 |
return(converted_pred) |
611 |
-} |
|
612 | 613 |
\ No newline at end of file |
614 |
+} |
|
615 |
+ |
|
616 |
+#' Create a BiocFileCache object |
|
617 |
+#' |
|
618 |
+#' @return BiocFileCache object |
|
619 |
+#' @import tools |
|
620 |
+#' @import BiocFileCache |
|
621 |
+#' |
|
622 |
+#' @rdname internal |
|
623 |
+.get_cache <- |
|
624 |
+ function() |
|
625 |
+ { |
|
626 |
+ cache <- tools::R_user_dir("scAnnotatR", which="cache") |
|
627 |
+ BiocFileCache::BiocFileCache(cache) |
|
628 |
+ } |
|
629 |
+ |
|
630 |
+#' Download and store default models in cache |
|
631 |
+#' @param verbose logical indicating downloading the file or not |
|
632 |
+#' |
|
633 |
+#' @return path to the downloaded file in cache |
|
634 |
+#' @import BiocFileCache |
|
635 |
+#' |
|
636 |
+#' @rdname internal |
|
637 |
+download_data_file <- |
|
638 |
+ function(verbose = FALSE) |
|
639 |
+ { |
|
640 |
+ fileURL <- "https://github.com/grisslab/scAnnotatR-models/blob/main/default_models.rda?raw=true" |
|
641 |
+ |
|
642 |
+ bfc <- .get_cache() |
|
643 |
+ rid <- BiocFileCache::bfcquery(bfc, "default_models", "rname")$rid |
|
644 |
+ if (!length(rid)) { |
|
645 |
+ if (verbose) |
|
646 |
+ message("Downloading default models..." ) |
|
647 |
+ rid <- names(BiocFileCache::bfcadd(bfc, "default_models", fileURL)) |
|
648 |
+ } |
|
649 |
+ if (isFALSE(BiocFileCache::bfcneedsupdate(bfc, rid))) |
|
650 |
+ BiocFileCache::bfcdownload(bfc, rid) |
|
651 |
+ |
|
652 |
+ BiocFileCache::bfcrpath(bfc, rids = rid) |
|
653 |
+ } |
|
613 | 654 |
\ No newline at end of file |
... | ... |
@@ -9,7 +9,7 @@ |
9 | 9 |
#' both of them once. In addition, default pretrained models |
10 | 10 |
#' of the package cannot be changed or removed. |
11 | 11 |
#' This can be done with the new trained model list. |
12 |
-#' @param path.to.models path to the folder containing the list of new models. |
|
12 |
+#' @param path_to_models path to the folder containing the list of new models. |
|
13 | 13 |
#' |
14 | 14 |
#' @return no return value, but the model is now saved to database |
15 | 15 |
#' |
... | ... |
@@ -27,7 +27,7 @@ |
27 | 27 |
#' |
28 | 28 |
#' # save the trained classifier to system |
29 | 29 |
#' # test classifier can be used before this step |
30 |
-#' save_new_model(new_model = classifier_t, path.to.models = tempdir()) |
|
30 |
+#' save_new_model(new_model = classifier_t, path_to_models = tempdir()) |
|
31 | 31 |
#' |
32 | 32 |
#' # verify if new model has been saved |
33 | 33 |
#' print(names(load(file.path(tempdir(), "new_models.rda")))) |
... | ... |
@@ -35,20 +35,24 @@ |
35 | 35 |
#' |
36 | 36 |
#' @export |
37 | 37 |
save_new_model <- function(new_model, include.default = TRUE, |
38 |
- path.to.models = tempdir()) { |
|
38 |
+ path_to_models = tempdir()) { |
|
39 | 39 |
default_models <- NULL |
40 |
+ data_env <- new.env(parent = emptyenv()) |
|
40 | 41 |
|
41 |
- utils::data("default_models") |
|
42 |
- new_models.file.path = file.path(path.to.models, "new_models.rda") |
|
42 |
+ new_models.file.path = file.path(path_to_models, "new_models.rda") |
|
43 | 43 |
|
44 | 44 |
if (file.exists(new_models.file.path)) { |
45 |
- load(new_models.file.path) |
|
45 |
+ load(new_models.file.path, data_env) |
|
46 |
+ new_models <- data_env[["new_models"]] |
|
46 | 47 |
} else { |
47 | 48 |
new_models = NULL |
48 | 49 |
} |
49 | 50 |
|
50 | 51 |
if (include.default == TRUE) { |
51 | 52 |
# default models not in new_models will be added to new_models |
53 |
+ path_to_default_models <- download_data_file(TRUE) |
|
54 |
+ load(path_to_default_models, envir = data_env) |
|
55 |
+ default_models <- data_env[["default_models"]] |
|
52 | 56 |
to.be.added <- default_models[!names(default_models)%in%names(new_models)] |
53 | 57 |
new_models <- append(to.be.added, new_models) |
54 | 58 |
} |
... | ... |
@@ -77,7 +81,7 @@ save_new_model <- function(new_model, include.default = TRUE, |
77 | 81 |
|
78 | 82 |
#' Plant tree from list of models |
79 | 83 |
#' |
80 |
-#' @param models.file.path list of models. If not provided, |
|
84 |
+#' @param path_to_models list of models. If not provided, |
|
81 | 85 |
#' list of default pretrained models in the package will be used. |
82 | 86 |
#' |
83 | 87 |
#' @return tree structure and plot of tree |
... | ... |
@@ -92,24 +96,12 @@ save_new_model <- function(new_model, include.default = TRUE, |
92 | 96 |
#' plant_tree() |
93 | 97 |
#' |
94 | 98 |
#' @export |
95 |
-plant_tree <- function(models.file.path = "default") { |
|
99 |
+plant_tree <- function(path_to_models = "default") { |
|
96 | 100 |
data_env <- new.env(parent = emptyenv()) |
97 | 101 |
|
98 | 102 |
root.name <- "cell types" |
99 |
- if (models.file.path == "default") { |
|
100 |
- utils::data("default_models", envir = data_env) |
|
101 |
- model_list <- data_env[['default_models']] |
|
102 |
- } else { |
|
103 |
- models_file_path <- file.path(models.file.path, "new_models.rda") |
|
104 |
- if (!file.exists(models_file_path)) { |
|
105 |
- stop("No file exists in the indicated models file path", |
|
106 |
- call. = FALSE) |
|
107 |
- } else { |
|
108 |
- load(models_file_path, envir = data_env) |
|
109 |
- model_list <- data_env[['new_models']] |
|
110 |
- } |
|
111 |
- } |
|
112 | 103 |
|
104 |
+ model_list <- load_models(path_to_models) |
|
113 | 105 |
tree <- NULL |
114 | 106 |
|
115 | 107 |
if (!is.null(model_list)) { |
... | ... |
@@ -146,7 +138,7 @@ plant_tree <- function(models.file.path = "default") { |
146 | 138 |
#' @param cell_type string indicating the cell type of which |
147 | 139 |
#' the model will be removed from package |
148 | 140 |
#' Attention: deletion of a parent model will also delete all of child model. |
149 |
-#' @param path.to.models path to the folder containing |
|
141 |
+#' @param path_to_models path to the folder containing |
|
150 | 142 |
#' the list of models in which the to-be-deleted model is. |
151 | 143 |
#' |
152 | 144 |
#' @return no return value, but the model is deleted from database |
... | ... |
@@ -162,16 +154,19 @@ plant_tree <- function(models.file.path = "default") { |
162 | 154 |
#' marker_genes = selected_marker_genes_T, cell_type = "t cells") |
163 | 155 |
#' |
164 | 156 |
#' # save a classifier to system |
165 |
-#' save_new_model(new_model = classifier_t, path.to.models = tempdir()) |
|
157 |
+#' save_new_model(new_model = classifier_t, path_to_models = tempdir()) |
|
166 | 158 |
#' |
167 | 159 |
#' # delete classifier from system |
168 |
-#' delete_model("t cells", path.to.models = tempdir()) |
|
160 |
+#' delete_model("t cells", path_to_models = tempdir()) |
|
169 | 161 |
#' @export |
170 |
-delete_model <- function(cell_type, path.to.models = tempdir()) { |
|
162 |
+delete_model <- function(cell_type, path_to_models = tempdir()) { |
|
171 | 163 |
new_models <- NULL |
172 | 164 |
data_env <- new.env(parent = emptyenv()) |
173 | 165 |
|
174 |
- new_models.file.path <- file.path(path.to.models, "new_models.rda") |
|
166 |
+ if (path_to_models == 'default') |
|
167 |
+ stop("Cannot delete default models.", call. = FALSE) |
|
168 |
+ |
|
169 |
+ new_models.file.path <- file.path(path_to_models, "new_models.rda") |
|
175 | 170 |
if (!file.exists(new_models.file.path)) { |
176 | 171 |
stop("No list of models available", call. = FALSE) |
177 | 172 |
} else { |
180 | 175 |
deleted file mode 100644 |
... | ... |
@@ -1,21 +0,0 @@ |
1 |
-% Generated by roxygen2: do not edit by hand |
|
2 |
-% Please edit documentation in R/data.R |
|
3 |
-\docType{data} |
|
4 |
-\name{default_models} |
|
5 |
-\alias{default_models} |
|
6 |
-\title{Pretrained classifiers for human cells} |
|
7 |
-\format{ |
|
8 |
-a list of \code{\link{scAnnotatR}} objects |
|
9 |
-} |
|
10 |
-\usage{ |
|
11 |
-default_models |
|
12 |
-} |
|
13 |
-\description{ |
|
14 |
-Pretrained classifier obtained by training and testing on the |
|
15 |
-Sade-Feldman melanoma dataset, the Jerby-Arnon melanoma dataset, the Haniffa |
|
16 |
-Skin Cell Atlas and the Haniffa Covid-19 Cell Atlas. |
|
17 |
-} |
|
18 |
-\author{ |
|
19 |
-Vy Nguyen, June 2021 |
|
20 |
-} |
|
21 |
-\keyword{datasets} |
... | ... |
@@ -4,14 +4,14 @@ |
4 | 4 |
\alias{delete_model} |
5 | 5 |
\title{Delete model/branch from package} |
6 | 6 |
\usage{ |
7 |
-delete_model(cell_type, path.to.models = tempdir()) |
|
7 |
+delete_model(cell_type, path_to_models = tempdir()) |
|
8 | 8 |
} |
9 | 9 |
\arguments{ |
10 | 10 |
\item{cell_type}{string indicating the cell type of which |
11 | 11 |
the model will be removed from package |
12 | 12 |
Attention: deletion of a parent model will also delete all of child model.} |
13 | 13 |
|
14 |
-\item{path.to.models}{path to the folder containing |
|
14 |
+\item{path_to_models}{path to the folder containing |
|
15 | 15 |
the list of models in which the to-be-deleted model is.} |
16 | 16 |
} |
17 | 17 |
\value{ |
... | ... |
@@ -31,8 +31,8 @@ classifier_t <- train_classifier(train_obj = tirosh_mel80_example, |
31 | 31 |
marker_genes = selected_marker_genes_T, cell_type = "t cells") |
32 | 32 |
|
33 | 33 |
# save a classifier to system |
34 |
-save_new_model(new_model = classifier_t, path.to.models = tempdir()) |
|
34 |
+save_new_model(new_model = classifier_t, path_to_models = tempdir()) |
|
35 | 35 |
|
36 | 36 |
# delete classifier from system |
37 |
-delete_model("t cells", path.to.models = tempdir()) |
|
37 |
+delete_model("t cells", path_to_models = tempdir()) |
|
38 | 38 |
} |
... | ... |
@@ -1,5 +1,5 @@ |
1 | 1 |
% Generated by roxygen2: do not edit by hand |
2 |
-% Please edit documentation in R/class.R, R/support.R |
|
2 |
+% Please edit documentation in R/class.R, R/classifier.R, R/support.R |
|
3 | 3 |
\name{checkObjectValidity} |
4 | 4 |
\alias{checkObjectValidity} |
5 | 5 |
\alias{checkCellTypeValidity} |
... | ... |
@@ -13,10 +13,11 @@ |
13 | 13 |
\alias{caret_model<-,scAnnotatR-method} |
14 | 14 |
\alias{marker_genes<-} |
15 | 15 |
\alias{marker_genes<-,scAnnotatR-method} |
16 |
+\alias{train_classifier_func} |
|
17 |
+\alias{test_classifier_func} |
|
16 | 18 |
\alias{balance_dataset} |
17 | 19 |
\alias{train_func} |
18 | 20 |
\alias{transform_to_zscore} |
19 |
-\alias{load_models} |
|
20 | 21 |
\alias{select_marker_genes} |
21 | 22 |
\alias{check_parent_child_coherence} |
22 | 23 |
\alias{check_parent_child_coherence,dgCMatrix,vector-method} |
... | ... |
@@ -31,6 +32,8 @@ |
31 | 32 |
\alias{verify_parent} |
32 | 33 |
\alias{test_performance} |
33 | 34 |
\alias{classify_clust} |
35 |
+\alias{.get_cache} |
|
36 |
+\alias{download_data_file} |
|
34 | 37 |
\title{Internal functions of scAnnotatR package} |
35 | 38 |
\usage{ |
36 | 39 |
checkObjectValidity(object) |
... | ... |
@@ -57,14 +60,35 @@ marker_genes(classifier) <- value |
57 | 60 |
|
58 | 61 |
\S4method{marker_genes}{scAnnotatR}(classifier) <- value |
59 | 62 |
|
63 |
+train_classifier_func( |
|
64 |
+ mat, |
|
65 |
+ tag, |
|
66 |
+ cell_type, |
|
67 |
+ marker_genes, |
|
68 |
+ parent_tag, |
|
69 |
+ parent_cell, |
|
70 |
+ parent_classifier, |
|
71 |
+ path_to_models, |
|
72 |
+ zscore |
|
73 |
+) |
|
74 |
+ |
|
75 |
+test_classifier_func( |
|
76 |
+ mat, |
|
77 |
+ tag, |
|
78 |
+ classifier, |
|
79 |
+ parent_tag, |
|
80 |
+ target_cell_type, |
|
81 |
+ parent_classifier, |
|
82 |
+ path_to_models, |
|
83 |
+ zscore |
|
84 |
+) |
|
85 |
+ |
|
60 | 86 |
balance_dataset(mat, tag) |
61 | 87 |
|
62 | 88 |
train_func(mat, tag) |
63 | 89 |
|
64 | 90 |
transform_to_zscore(mat) |
65 | 91 |
|
66 |
-load_models(path_to_models) |
|
67 |
- |
|
68 | 92 |
select_marker_genes(mat, marker_genes) |
69 | 93 |
|
70 | 94 |
check_parent_child_coherence( |
... | ... |
@@ -120,6 +144,10 @@ verify_parent(mat, classifier, meta.data) |
120 | 144 |
test_performance(mat, classifier, tag) |
121 | 145 |
|
122 | 146 |
classify_clust(clusts, most_probable_cell_type) |
147 |
+ |
|
148 |
+.get_cache() |
|
149 |
+ |
|
150 |
+download_data_file(verbose = FALSE) |
|
123 | 151 |
} |
124 | 152 |
\arguments{ |
125 | 153 |
\item{object}{The request classifier to check.} |
... | ... |
@@ -142,25 +170,25 @@ classify_clust(clusts, most_probable_cell_type) |
142 | 170 |
|
143 | 171 |
\item{tag}{tag of data} |
144 | 172 |
|
145 |
-\item{path_to_models}{path to databases, or by default} |
|
146 |
- |
|
147 |
-\item{pos_parent}{a vector indicating parent classifier prediction} |
|
148 |
- |
|
149 |
-\item{parent_cell}{name of parent cell type} |
|
150 |
- |
|
151 |
-\item{target_cell_type}{alternative cell types (in case of testing classifier)} |
|
152 |
- |
|
153 | 173 |
\item{parent_tag}{vector, named list indicating pre-assigned/predicted |
154 | 174 |
parent cell type} |
155 | 175 |
|
156 |
-\item{parent_cell_type}{name of parent cell type} |
|
176 |
+\item{parent_cell}{name of parent cell type} |
|
157 | 177 |
|
158 | 178 |
\item{parent_classifier}{\code{\link{scAnnotatR}} object corresponding |
159 | 179 |
to classification model for the parent cell type} |
160 | 180 |
|
181 |
+\item{path_to_models}{path to databases, or by default} |
|
182 |
+ |
|
161 | 183 |
\item{zscore}{boolean indicating the transformation of gene expression |
162 | 184 |
in object to zscore or not} |
163 | 185 |
|
186 |
+\item{target_cell_type}{alternative cell types (in case of testing classifier)} |
|
187 |
+ |
|
188 |
+\item{pos_parent}{a vector indicating parent classifier prediction} |
|
189 |
+ |
|
190 |
+\item{parent_cell_type}{name of parent cell type} |
|
191 |
+ |
|
164 | 192 |
\item{pred_cells}{a whole prediction for all cells} |
165 | 193 |
|
166 | 194 |
\item{ignore_ambiguous_result}{whether ignore ambigouous result} |
... | ... |
@@ -174,6 +202,8 @@ in object to zscore or not} |
174 | 202 |
\item{clusts}{cluster info} |
175 | 203 |
|
176 | 204 |
\item{most_probable_cell_type}{predicted cell type} |
205 |
+ |
|
206 |
+\item{verbose}{logical indicating downloading the file or not} |
|
177 | 207 |
} |
178 | 208 |
\value{ |
179 | 209 |
TRUE if the classifier is valid or the reason why it is not |
... | ... |
@@ -200,6 +230,10 @@ the classifier with the new marker genes |
200 | 230 |
|
201 | 231 |
scAnnotatR object with the new marker genes. |
202 | 232 |
|
233 |
+caret trained model |
|
234 |
+ |
|
235 |
+model performance statistics |
|
236 |
+ |
|
203 | 237 |
a list of balanced count matrix |
204 | 238 |
and corresponding tags of balanced count matrix |
205 | 239 |
|
... | ... |
@@ -207,8 +241,6 @@ the classification model (caret object) |
207 | 241 |
|
208 | 242 |
row wise center-scaled count matrix |
209 | 243 |
|
210 |
-list of classifiers |
|
211 |
- |
|
212 | 244 |
filtered matrix |
213 | 245 |
|
214 | 246 |
list of adjusted tag |
... | ... |
@@ -226,7 +258,18 @@ simplified prediction |
226 | 258 |
applicable matrix |
227 | 259 |
|
228 | 260 |
classifier performance |
261 |
+ |
|
262 |
+BiocFileCache object |
|
263 |
+ |
|
264 |
+path to the downloaded file in cache |
|
229 | 265 |
} |
230 | 266 |
\description{ |
231 | 267 |
Check if a scAnnotatR object is valid |
268 |
+ |
|
269 |
+Train a classifier for a new cell type from expression matrix |
|
270 |
+and tag |
|
271 |
+If cell type has a parent, only available for \code{\link{scAnnotatR}} |
|
272 |
+object as parent cell classifying model. |
|
273 |
+ |
|
274 |
+Testing process from matrix and tag |
|
232 | 275 |
} |
233 | 276 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,17 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/support.R |
|
3 |
+\name{load_models} |
|
4 |
+\alias{load_models} |
|
5 |
+\title{Load classifiers from databases} |
|
6 |
+\usage{ |
|
7 |
+load_models(path_to_models) |
|
8 |
+} |
|
9 |
+\arguments{ |
|
10 |
+\item{path_to_models}{path to databases, or by default} |
|
11 |
+} |
|
12 |
+\value{ |
|
13 |
+list of classifiers |
|
14 |
+} |
|
15 |
+\description{ |
|
16 |
+Load classifiers from databases |
|
17 |
+} |
... | ... |
@@ -4,10 +4,10 @@ |
4 | 4 |
\alias{plant_tree} |
5 | 5 |
\title{Plant tree from list of models} |
6 | 6 |
\usage{ |
7 |
-plant_tree(models.file.path = "default") |
|
7 |
+plant_tree(path_to_models = "default") |
|
8 | 8 |
} |
9 | 9 |
\arguments{ |
10 |
-\item{models.file.path}{list of models. If not provided, |
|
10 |
+\item{path_to_models}{list of models. If not provided, |
|
11 | 11 |
list of default pretrained models in the package will be used.} |
12 | 12 |
} |
13 | 13 |
\value{ |
... | ... |
@@ -4,7 +4,7 @@ |
4 | 4 |
\alias{save_new_model} |
5 | 5 |
\title{Save a model to the package} |
6 | 6 |
\usage{ |
7 |
-save_new_model(new_model, include.default = TRUE, path.to.models = tempdir()) |
|
7 |
+save_new_model(new_model, include.default = TRUE, path_to_models = tempdir()) |
|
8 | 8 |
} |
9 | 9 |
\arguments{ |
10 | 10 |
\item{new_model}{new model to be added into the classification tree} |
... | ... |
@@ -18,7 +18,7 @@ both of them once. In addition, default pretrained models |
18 | 18 |
of the package cannot be changed or removed. |
19 | 19 |
This can be done with the new trained model list.} |
20 | 20 |
|
21 |
-\item{path.to.models}{path to the folder containing the list of new models.} |
|
21 |
+\item{path_to_models}{path to the folder containing the list of new models.} |
|
22 | 22 |
} |
23 | 23 |
\value{ |
24 | 24 |
no return value, but the model is now saved to database |
... | ... |
@@ -38,7 +38,7 @@ marker_genes = selected_marker_genes_T, cell_type = "t cells") |
38 | 38 |
|
39 | 39 |
# save the trained classifier to system |
40 | 40 |
# test classifier can be used before this step |
41 |
-save_new_model(new_model = classifier_t, path.to.models = tempdir()) |
|
41 |
+save_new_model(new_model = classifier_t, path_to_models = tempdir()) |
|
42 | 42 |
|
43 | 43 |
# verify if new model has been saved |
44 | 44 |
print(names(load(file.path(tempdir(), "new_models.rda")))) |
... | ... |
@@ -2,7 +2,7 @@ context("scAnnotatR class functions") |
2 | 2 |
library(scAnnotatR) |
3 | 3 |
|
4 | 4 |
test_that("Set cell type changes cell type", { |
5 |
- data("default_models") |
|
5 |
+ default_models <- load_models('default') |
|
6 | 6 |
classifier_B <- default_models[['B cells']] |
7 | 7 |
|
8 | 8 |
cell_type(classifier_B) <- "b cells" |
... | ... |
@@ -10,7 +10,7 @@ test_that("Set cell type changes cell type", { |
10 | 10 |
}) |
11 | 11 |
|
12 | 12 |
test_that("Set probability threshold changes probability threshold", { |
13 |
- data("default_models") |
|
13 |
+ default_models <- load_models('default') |
|
14 | 14 |
classifier_B <- default_models[['B cells']] |
15 | 15 |
|
16 | 16 |
p_thres(classifier_B) <- 0.6 |
... | ... |
@@ -18,7 +18,7 @@ test_that("Set probability threshold changes probability threshold", { |
18 | 18 |
}) |
19 | 19 |
|
20 | 20 |
test_that("Set classifier changes classifier and marker genes", { |
21 |
- data("default_models") |
|
21 |
+ default_models <- load_models('default') |
|
22 | 22 |
classifier_B <- default_models[['B cells']] |
23 | 23 |
classifier_T <- default_models[['T cells']] |
24 | 24 |
|
... | ... |
@@ -233,7 +233,7 @@ New classification models can be stored using the `save_new_model` function: |
233 | 233 |
|
234 | 234 |
```{r} |
235 | 235 |
# no copy of pretrained models is performed |
236 |
-save_new_model(new_model = classifier_B, path.to.models = tempdir(), |
|
236 |
+save_new_model(new_model = classifier_B, path_to_models = tempdir(), |
|
237 | 237 |
include.default = FALSE) |
238 | 238 |
``` |
239 | 239 |
|
... | ... |
@@ -241,7 +241,7 @@ Parameters: |
241 | 241 |
|
242 | 242 |
* **new_model**: The new model that should be added to the database in the |
243 | 243 |
specified directory. |
244 |
- * **path.to.models**: The directory where the new models should be stored. |
|
244 |
+ * **path_to_models**: The directory where the new models should be stored. |
|
245 | 245 |
* **include.default**: If set, the default models shipped with the package |
246 | 246 |
are added to the database. |
247 | 247 |
|
... | ... |
@@ -253,7 +253,7 @@ Models can be deleted from the model database using the `delete_model` function: |
253 | 253 |
|
254 | 254 |
```{r} |
255 | 255 |
# delete the "B cells" model from the new database |
256 |
-delete_model("B cells", path.to.models = tempdir()) |
|
256 |
+delete_model("B cells", path_to_models = tempdir()) |
|
257 | 257 |
``` |
258 | 258 |
|
259 | 259 |
## Session Info |
... | ... |
@@ -60,7 +60,7 @@ library(scAnnotatR) |
60 | 60 |
``` |
61 | 61 |
|
62 | 62 |
```{r} |
63 |
-data("default_models") |
|
63 |
+default_models <- load_models('default') |
|
64 | 64 |
classifier_B <- default_models[['B cells']] |
65 | 65 |
classifier_B |
66 | 66 |
``` |
... | ... |
@@ -171,7 +171,7 @@ parent classifier to the train method: |
171 | 171 |
for example: *parent_cell = 'B cells'* |
172 | 172 |
|
173 | 173 |
* Users can give name of a model among models available in users' database |
174 |
-AND the path to that database, for example: `parent_cell = 'B cells', path.to.models = '.'` |
|
174 |
+AND the path to that database, for example: `parent_cell = 'B cells', path_to_models = '.'` |
|
175 | 175 |
|
176 | 176 |
Train the child classifier: |
177 | 177 |
```{r} |
... | ... |
@@ -230,15 +230,15 @@ names(default_models) |
230 | 230 |
|
231 | 231 |
In our package, the default models already include a model classifying plasma cells. |
232 | 232 |
Therefore, we will save this model to a new local database specified by the |
233 |
-*path.to.models* parameter. If you start with a fresh new local database, |
|
233 |
+*path_to_models* parameter. If you start with a fresh new local database, |
|
234 | 234 |
there is no available parent classifier of plasma cells' classifier. Therefore, |
235 | 235 |
we have to save the parent classifier first, e.g. the classifier for B cells. |
236 | 236 |
|
237 | 237 |
```{r} |
238 | 238 |
# no copy of pretrained models is performed |
239 |
-save_new_model(new_model = classifier_B, path.to.models = tempdir(), |
|
239 |
+save_new_model(new_model = classifier_B, path_to_models = tempdir(), |
|
240 | 240 |
include.default = FALSE) |
241 |
-save_new_model(new_model = classifier_plasma, path.to.models = tempdir(), |
|
241 |
+save_new_model(new_model = classifier_plasma, path_to_models = tempdir(), |
|
242 | 242 |
include.default = FALSE) |
243 | 243 |
``` |
244 | 244 |
|