Browse code

remove default_models store in package, use BiocFileCache to manage pretrained models

Former-commit-id: 4a69d2314d0fe5849d5002a1dc1cdd474f9afaff

nttvy authored on 29/07/2021 14:57:11
Showing 17 changed files

... ...
@@ -30,6 +30,8 @@ Imports:
30 30
     e1071, 
31 31
     ape, 
32 32
     kernlab,
33
+    tools,
34
+    BiocFileCache,
33 35
     utils
34 36
 Suggests: 
35 37
     knitr,
... ...
@@ -39,7 +41,6 @@ Suggests:
39 41
 VignetteBuilder: knitr
40 42
 Depends: R (>= 4.1), Seurat, SingleCellExperiment, SummarizedExperiment
41 43
 LazyData: true
42
-LazyDataCompression: xz
43 44
 RoxygenNote: 7.1.1
44 45
 URL: https://github.com/grisslab/scAnnotatR
45 46
 BugReports: https://github.com/grisslab/scAnnotatR/issues/new
... ...
@@ -6,6 +6,7 @@ export(caret_model)
6 6
 export(cell_type)
7 7
 export(classify_cells)
8 8
 export(delete_model)
9
+export(load_models)
9 10
 export(marker_genes)
10 11
 export(p_thres)
11 12
 export(parent)
... ...
@@ -16,6 +17,7 @@ export(scAnnotatR)
16 17
 export(test_classifier)
17 18
 export(train_classifier)
18 19
 exportMethods(show)
20
+import(BiocFileCache)
19 21
 import(ROCR)
20 22
 import(SingleCellExperiment)
21 23
 import(ape)
... ...
@@ -27,6 +29,7 @@ import(ggplot2)
27 29
 import(kernlab, except = c(alpha, predict))
28 30
 import(methods)
29 31
 import(pROC)
32
+import(tools)
30 33
 importFrom(Seurat,GetAssayData)
31 34
 importFrom(SummarizedExperiment,assay)
32 35
 importFrom(SummarizedExperiment,colData)
... ...
@@ -132,6 +132,79 @@ setMethod("train_classifier", c("train_obj" = "Seurat"),
132 132
   return(object)
133 133
 })
134 134
 
135
+#' @inherit train_classifier
136
+#' 
137
+#' @param sce_tag_slot string, name of annotation slot indicating 
138
+#' cell tag/label in the training object.
139
+#' For \code{\link{SingleCellExperiment}} object, default value is "ident".  
140
+#' Expected values are string (A-Z, a-z, 0-9, no special character accepted) 
141
+#' or binary/logical, 0/"no"/F/FALSE: not being new cell type, 
142
+#' 1/"yes"/T/TRUE: being new cell type.
143
+#' @param sce_parent_tag_slot string, name of a slot in cell meta data 
144
+#' indicating pre-assigned/predicted cell type. 
145
+#' Default field is "predicted_cell_type".
146
+#' This field would have been filled automatically 
147
+#' when user called classify_cells function. 
148
+#' The slot must contain only string values. 
149
+#' @param sce_assay name of assay to use in training object. 
150
+#' Default to 'logcounts' assay.
151
+#' 
152
+#' @import SingleCellExperiment
153
+#' @importFrom SummarizedExperiment assay
154
+#' 
155
+#' @rdname train_classifier
156
+setMethod("train_classifier", c("train_obj" = "SingleCellExperiment"), 
157
+          function(train_obj, cell_type, marker_genes, parent_cell = NA_character_,
158
+                   parent_classifier = NULL, path_to_models = "default", 
159
+                   zscore = TRUE, sce_tag_slot = "ident", 
160
+                   sce_parent_tag_slot = "predicted_cell_type", 
161
+                   sce_assay = 'logcounts', ...) {
162
+  # solve duplication of cell names
163
+  colnames(train_obj) <- make.unique(colnames(train_obj), sep = '_')
164
+  
165
+  # convert Seurat object to matrix
166
+  mat = SummarizedExperiment::assay(train_obj, sce_assay)
167
+  
168
+  tag = SummarizedExperiment::colData(train_obj)[, sce_tag_slot]
169
+  names(tag) <- colnames(train_obj)
170
+  
171
+  if (sce_parent_tag_slot %in% colnames(SummarizedExperiment::colData(train_obj))) {
172
+    parent_tag <- SummarizedExperiment::colData(train_obj)[, sce_parent_tag_slot]
173
+    names(parent_tag) <- colnames(train_obj)
174
+  } else parent_tag <- NULL
175
+  
176
+  object <- train_classifier_func(mat, tag, cell_type, marker_genes, 
177
+                                  parent_tag, parent_cell, parent_classifier,
178
+                                  path_to_models, zscore)
179
+  
180
+  return(object)
181
+})
182
+
183
+#' Train cell type from matrix
184
+#' 
185
+#' @description Train a classifier for a new cell type from expression matrix
186
+#' and tag 
187
+#' If cell type has a parent, only available for \code{\link{scAnnotatR}}
188
+#' object as parent cell classifying model.
189
+#' @param mat expression matrix of size n x m, n: genes, m: cells
190
+#' @param tag named list indicating cell label
191
+#' @param cell_type string indicating the name of the subtype
192
+#' This must exactly match cell tag/label if cell tag/label is a string.
193
+#' @param parent_tag named list indicating parent cell type
194
+#' @param marker_genes list of marker genes used for the new training model
195
+#' @param parent_cell string indicated the name of the parent cell type, 
196
+#' if parent cell type classifier has already been saved in model database.
197
+#' Adjust path_to_models for exact database.  
198
+#' @param parent_classifier classification model for the parent cell type
199
+#' @param path_to_models path to the folder containing the model database. 
200
+#' As default, the pretrained models in the package will be used. 
201
+#' If user has trained new models, indicate the folder containing the 
202
+#' new_models.rda file.
203
+#' @param zscore whether gene expression in train_obj is transformed to zscore
204
+#' 
205
+#' @return caret trained model
206
+#' 
207
+#' @rdname internal
135 208
 train_classifier_func <- function(mat, tag, cell_type, marker_genes, 
136 209
                                   parent_tag, parent_cell, parent_classifier, 
137 210
                                   path_to_models, zscore) {
... ...
@@ -139,7 +212,7 @@ train_classifier_func <- function(mat, tag, cell_type, marker_genes,
139 212
   processed_parent <- process_parent_classifier(
140 213
     mat, parent_tag, parent_cell, parent_classifier, path_to_models, zscore
141 214
   )
142
-
215
+  
143 216
   # check parent-child coherence
144 217
   if (!is.null(processed_parent$pos_parent)) {
145 218
     tag <- check_parent_child_coherence(
... ...
@@ -206,54 +279,6 @@ train_classifier_func <- function(mat, tag, cell_type, marker_genes,
206 279
   return(object)
207 280
 }
208 281
 
209
-#' @inherit train_classifier
210
-#' 
211
-#' @param sce_tag_slot string, name of annotation slot indicating 
212
-#' cell tag/label in the training object.
213
-#' For \code{\link{SingleCellExperiment}} object, default value is "ident".  
214
-#' Expected values are string (A-Z, a-z, 0-9, no special character accepted) 
215
-#' or binary/logical, 0/"no"/F/FALSE: not being new cell type, 
216
-#' 1/"yes"/T/TRUE: being new cell type.
217
-#' @param sce_parent_tag_slot string, name of a slot in cell meta data 
218
-#' indicating pre-assigned/predicted cell type. 
219
-#' Default field is "predicted_cell_type".
220
-#' This field would have been filled automatically 
221
-#' when user called classify_cells function. 
222
-#' The slot must contain only string values. 
223
-#' @param sce_assay name of assay to use in training object. 
224
-#' Default to 'logcounts' assay.
225
-#' 
226
-#' @import SingleCellExperiment
227
-#' @importFrom SummarizedExperiment assay
228
-#' 
229
-#' @rdname train_classifier
230
-setMethod("train_classifier", c("train_obj" = "SingleCellExperiment"), 
231
-          function(train_obj, cell_type, marker_genes, parent_cell = NA_character_,
232
-                   parent_classifier = NULL, path_to_models = "default", 
233
-                   zscore = TRUE, sce_tag_slot = "ident", 
234
-                   sce_parent_tag_slot = "predicted_cell_type", 
235
-                   sce_assay = 'logcounts', ...) {
236
-  # solve duplication of cell names
237
-  colnames(train_obj) <- make.unique(colnames(train_obj), sep = '_')
238
-  
239
-  # convert Seurat object to matrix
240
-  mat = SummarizedExperiment::assay(train_obj, sce_assay)
241
-  
242
-  tag = SummarizedExperiment::colData(train_obj)[, sce_tag_slot]
243
-  names(tag) <- colnames(train_obj)
244
-  
245
-  if (sce_parent_tag_slot %in% colnames(SummarizedExperiment::colData(train_obj))) {
246
-    parent_tag <- SummarizedExperiment::colData(train_obj)[, sce_parent_tag_slot]
247
-    names(parent_tag) <- colnames(train_obj)
248
-  } else parent_tag <- NULL
249
-  
250
-  object <- train_classifier_func(mat, tag, cell_type, marker_genes, 
251
-                                  parent_tag, parent_cell, parent_classifier,
252
-                                  path_to_models, zscore)
253
-  
254
-  return(object)
255
-})
256
-
257 282
 #' Testing process.
258 283
 #' 
259 284
 #' @description Testing process. 
... ...
@@ -363,6 +388,76 @@ setMethod("test_classifier", c("test_obj" = "Seurat",
363 388
   return(return_val)
364 389
 })
365 390
 
391
+#' @inherit test_classifier
392
+#' 
393
+#' @param sce_tag_slot string, name of annotation slot 
394
+#' indicating cell tag/label in the testing object.
395
+#' Strings indicating cell types are expected in this slot. 
396
+#' Default value is "ident".  
397
+#' Expected values are string (A-Z, a-z, 0-9, no special character accepted) 
398
+#' or binary/logical, 0/"no"/F/FALSE: not being new cell type, 
399
+#' 1/"yes"/T/TRUE: being new cell type.
400
+#' @param sce_parent_tag_slot string, name of tag slot in cell meta data
401
+#' indicating pre-assigned/predicted parent cell type. 
402
+#' Default is "predicted_cell_type".
403
+#' The slot must contain only string values. 
404
+#' @param sce_assay name of assay to use in \code{\link{SingleCellExperiment}}
405
+#' object, defaults to 'logcounts' assay.
406
+#'  
407
+#' @import SingleCellExperiment
408
+#' @importFrom SummarizedExperiment assay
409
+#' 
410
+#' @rdname test_classifier
411
+setMethod("test_classifier", c("test_obj" = "SingleCellExperiment", 
412
+                               "classifier" = "scAnnotatR"), 
413
+          function(test_obj, classifier, target_cell_type = NULL, 
414
+                   parent_classifier = NULL, path_to_models = "default", 
415
+                   zscore = TRUE, sce_tag_slot = "ident", 
416
+                   sce_parent_tag_slot = "predicted_cell_type", 
417
+                   sce_assay = 'logcounts', ...) {
418
+  # solve duplication of cell names
419
+  colnames(test_obj) <- make.unique(colnames(test_obj), sep = '_')
420
+  . <- fpr <- tpr <- NULL
421
+  
422
+  # convert SCE object to matrix
423
+  mat = SummarizedExperiment::assay(test_obj, sce_assay)
424
+  
425
+  tag = SummarizedExperiment::colData(test_obj)[, sce_tag_slot]
426
+  names(tag) <- colnames(test_obj)
427
+  
428
+  if (sce_parent_tag_slot %in% colnames(SummarizedExperiment::colData(test_obj))) {
429
+    parent_tag <- SummarizedExperiment::colData(test_obj)[, sce_parent_tag_slot]
430
+    names(parent_tag) <- colnames(test_obj)
431
+  } else parent_tag <- NULL
432
+  
433
+  return_val <- test_classifier_func(mat, tag, classifier, parent_tag,
434
+                                     target_cell_type, parent_classifier,
435
+                                     path_to_models, zscore)
436
+  
437
+  return(return_val)
438
+})
439
+
440
+#' Run testing process from matrix and tag
441
+#' 
442
+#' @description Testing process from matrix and tag 
443
+#' @param mat expression matrix of size n x m, n: genes, m: cells
444
+#' @param tag named list indicating cell label
445
+#' @param classifier classification model
446
+#' @param parent_tag named list indicating parent cell type
447
+#' @param target_cell_type vector indicating other cell types than cell labels 
448
+#' that can be considered as the main cell type in classifier, 
449
+#' for example, c("plasma cell", "b cell", "b cells", "activating b cell"). 
450
+#' Default as NULL.
451
+#' @param parent_classifier classification model for the parent cell type
452
+#' @param path_to_models path to the folder containing the model database. 
453
+#' As default, the pretrained models in the package will be used. 
454
+#' If user has trained new models, indicate the folder containing the 
455
+#' new_models.rda file.
456
+#' @param zscore whether gene expression in train_obj is transformed to zscore
457
+#' 
458
+#' @return model performance statistics
459
+#' 
460
+#' @rdname internal
366 461
 test_classifier_func <- function(mat, tag, classifier, parent_tag, 
367 462
                                  target_cell_type, parent_classifier,
368 463
                                  path_to_models, zscore) {
... ...
@@ -415,55 +510,6 @@ test_classifier_func <- function(mat, tag, classifier, parent_tag,
415 510
   return(return_val)
416 511
 }
417 512
 
418
-#' @inherit test_classifier
419
-#' 
420
-#' @param sce_tag_slot string, name of annotation slot 
421
-#' indicating cell tag/label in the testing object.
422
-#' Strings indicating cell types are expected in this slot. 
423
-#' Default value is "ident".  
424
-#' Expected values are string (A-Z, a-z, 0-9, no special character accepted) 
425
-#' or binary/logical, 0/"no"/F/FALSE: not being new cell type, 
426
-#' 1/"yes"/T/TRUE: being new cell type.
427
-#' @param sce_parent_tag_slot string, name of tag slot in cell meta data
428
-#' indicating pre-assigned/predicted parent cell type. 
429
-#' Default is "predicted_cell_type".
430
-#' The slot must contain only string values. 
431
-#' @param sce_assay name of assay to use in \code{\link{SingleCellExperiment}}
432
-#' object, defaults to 'logcounts' assay.
433
-#'  
434
-#' @import SingleCellExperiment
435
-#' @importFrom SummarizedExperiment assay
436
-#' 
437
-#' @rdname test_classifier
438
-setMethod("test_classifier", c("test_obj" = "SingleCellExperiment", 
439
-                               "classifier" = "scAnnotatR"), 
440
-          function(test_obj, classifier, target_cell_type = NULL, 
441
-                   parent_classifier = NULL, path_to_models = "default", 
442
-                   zscore = TRUE, sce_tag_slot = "ident", 
443
-                   sce_parent_tag_slot = "predicted_cell_type", 
444
-                   sce_assay = 'logcounts', ...) {
445
-  # solve duplication of cell names
446
-  colnames(test_obj) <- make.unique(colnames(test_obj), sep = '_')
447
-  . <- fpr <- tpr <- NULL
448
-  
449
-  # convert SCE object to matrix
450
-  mat = SummarizedExperiment::assay(test_obj, sce_assay)
451
-  
452
-  tag = SummarizedExperiment::colData(test_obj)[, sce_tag_slot]
453
-  names(tag) <- colnames(test_obj)
454
-  
455
-  if (sce_parent_tag_slot %in% colnames(SummarizedExperiment::colData(test_obj))) {
456
-    parent_tag <- SummarizedExperiment::colData(test_obj)[, sce_parent_tag_slot]
457
-    names(parent_tag) <- colnames(test_obj)
458
-  } else parent_tag <- NULL
459
-  
460
-  return_val <- test_classifier_func(mat, tag, classifier, parent_tag,
461
-                                     target_cell_type, parent_classifier,
462
-                                     path_to_models, zscore)
463
-  
464
-  return(return_val)
465
-})
466
-
467 513
 #' Plot roc curve
468 514
 #' 
469 515
 #' @param test_result result of test_classifier function
... ...
@@ -8,16 +8,4 @@
8 8
 #' @source WEIZMANN INSTITUTE OF SCIENCE
9 9
 #' @author Itay Tirosh, 2016-04-05
10 10
 #' @keywords datasets
11
-"tirosh_mel80_example"
12
-
13
-#' @name default_models
14
-#' @title Pretrained classifiers for human cells
15
-#' @description Pretrained classifier obtained by training and testing on the 
16
-#' Sade-Feldman melanoma dataset, the Jerby-Arnon melanoma dataset, the Haniffa
17
-#' Skin Cell Atlas and the Haniffa Covid-19 Cell Atlas.
18
-#' @docType data
19
-#' @usage default_models
20
-#' @format a list of \code{\link{scAnnotatR}} objects
21
-#' @author Vy Nguyen, June 2021
22
-#' @keywords datasets
23
-"default_models"
24 11
\ No newline at end of file
12
+"tirosh_mel80_example"
25 13
\ No newline at end of file
... ...
@@ -107,14 +107,16 @@ transform_to_zscore <- function(mat) {
107 107
 #' @return list of classifiers
108 108
 #' 
109 109
 #' @importFrom utils data
110
-#' @rdname internal
110
+#' @rdname load_models
111
+#' @export
111 112
 load_models <- function(path_to_models) {
112 113
   # prevents R CMD check note
113 114
   model_list <- NULL
114 115
   data_env <- new.env(parent = emptyenv())
115 116
   
116 117
   if (path_to_models == "default") {
117
-    utils::data("default_models", envir = data_env)
118
+    models_path <- download_data_file(TRUE) # more function: if user want to save cache
119
+    load(models_path, envir = data_env)
118 120
     model_list <- data_env[["default_models"]]
119 121
   } else {
120 122
     models_path <- file.path(path_to_models, "new_models.rda")
... ...
@@ -608,4 +610,43 @@ classify_clust <- function(clusts, most_probable_cell_type) {
608 610
   names(clust.pred) <- levels(clusts)
609 611
   converted_pred <- unlist(lapply(clusts, function(x) clust.pred[[x]]))
610 612
   return(converted_pred)
611
-}
612 613
\ No newline at end of file
614
+}
615
+
616
+#' Create a BiocFileCache object
617
+#'
618
+#' @return BiocFileCache object 
619
+#' @import tools
620
+#' @import BiocFileCache
621
+#' 
622
+#' @rdname internal
623
+.get_cache <-
624
+  function()
625
+  {
626
+    cache <- tools::R_user_dir("scAnnotatR", which="cache")
627
+    BiocFileCache::BiocFileCache(cache)
628
+  }
629
+
630
+#' Download and store default models in cache
631
+#' @param verbose logical indicating downloading the file or not 
632
+#'
633
+#' @return path to the downloaded file in cache 
634
+#' @import BiocFileCache
635
+#' 
636
+#' @rdname internal
637
+download_data_file <-
638
+  function(verbose = FALSE)
639
+  {
640
+    fileURL <- "https://github.com/grisslab/scAnnotatR-models/blob/main/default_models.rda?raw=true"
641
+    
642
+    bfc <- .get_cache()
643
+    rid <- BiocFileCache::bfcquery(bfc, "default_models", "rname")$rid
644
+    if (!length(rid)) {
645
+      if (verbose)
646
+        message("Downloading default models..." )
647
+      rid <- names(BiocFileCache::bfcadd(bfc, "default_models", fileURL))
648
+    }
649
+    if (isFALSE(BiocFileCache::bfcneedsupdate(bfc, rid)))
650
+      BiocFileCache::bfcdownload(bfc, rid)
651
+    
652
+    BiocFileCache::bfcrpath(bfc, rids = rid)
653
+  }
613 654
\ No newline at end of file
... ...
@@ -9,7 +9,7 @@
9 9
 #' both of them once. In addition, default pretrained models
10 10
 #' of the package cannot be changed or removed. 
11 11
 #' This can be done with the new trained model list.
12
-#' @param path.to.models path to the folder containing the list of new models.
12
+#' @param path_to_models path to the folder containing the list of new models.
13 13
 #' 
14 14
 #' @return no return value, but the model is now saved to database
15 15
 #' 
... ...
@@ -27,7 +27,7 @@
27 27
 #' 
28 28
 #' # save the trained classifier to system 
29 29
 #' # test classifier can be used before this step
30
-#' save_new_model(new_model = classifier_t, path.to.models = tempdir())
30
+#' save_new_model(new_model = classifier_t, path_to_models = tempdir())
31 31
 #' 
32 32
 #' # verify if new model has been saved
33 33
 #' print(names(load(file.path(tempdir(), "new_models.rda"))))
... ...
@@ -35,20 +35,24 @@
35 35
 #' 
36 36
 #' @export
37 37
 save_new_model <- function(new_model, include.default = TRUE, 
38
-                    path.to.models = tempdir()) {
38
+                    path_to_models = tempdir()) {
39 39
   default_models <- NULL
40
+  data_env <- new.env(parent = emptyenv())
40 41
   
41
-  utils::data("default_models")
42
-  new_models.file.path = file.path(path.to.models, "new_models.rda")
42
+  new_models.file.path = file.path(path_to_models, "new_models.rda")
43 43
   
44 44
   if (file.exists(new_models.file.path)) {
45
-    load(new_models.file.path)
45
+    load(new_models.file.path, data_env)
46
+    new_models <- data_env[["new_models"]]
46 47
   } else {
47 48
     new_models = NULL
48 49
   }
49 50
   
50 51
   if (include.default == TRUE) {
51 52
     # default models not in new_models will be added to new_models
53
+    path_to_default_models <- download_data_file(TRUE)
54
+    load(path_to_default_models, envir = data_env)
55
+    default_models <- data_env[["default_models"]]
52 56
     to.be.added <- default_models[!names(default_models)%in%names(new_models)]
53 57
     new_models <- append(to.be.added, new_models)
54 58
   }
... ...
@@ -77,7 +81,7 @@ save_new_model <- function(new_model, include.default = TRUE,
77 81
 
78 82
 #' Plant tree from list of models
79 83
 #' 
80
-#' @param models.file.path list of models. If not provided, 
84
+#' @param path_to_models list of models. If not provided, 
81 85
 #' list of default pretrained models in the package will be used.
82 86
 #'  
83 87
 #' @return tree structure and plot of tree 
... ...
@@ -92,24 +96,12 @@ save_new_model <- function(new_model, include.default = TRUE,
92 96
 #' plant_tree()
93 97
 #' 
94 98
 #' @export
95
-plant_tree <- function(models.file.path = "default") { 
99
+plant_tree <- function(path_to_models = "default") { 
96 100
   data_env <- new.env(parent = emptyenv())
97 101
   
98 102
   root.name <- "cell types"
99
-  if (models.file.path == "default") {
100
-    utils::data("default_models", envir = data_env)
101
-    model_list <- data_env[['default_models']]
102
-  } else {
103
-    models_file_path <- file.path(models.file.path, "new_models.rda")
104
-    if (!file.exists(models_file_path)) {
105
-      stop("No file exists in the indicated models file path", 
106
-           call. = FALSE)
107
-    } else {
108
-      load(models_file_path, envir = data_env)
109
-      model_list <- data_env[['new_models']]
110
-    }
111
-  }
112 103
   
104
+  model_list <- load_models(path_to_models) 
113 105
   tree <- NULL
114 106
   
115 107
   if (!is.null(model_list)) {
... ...
@@ -146,7 +138,7 @@ plant_tree <- function(models.file.path = "default") {
146 138
 #' @param cell_type string indicating the cell type of which 
147 139
 #' the model will be removed from package
148 140
 #' Attention: deletion of a parent model will also delete all of child model.
149
-#' @param path.to.models path to the folder containing 
141
+#' @param path_to_models path to the folder containing 
150 142
 #' the list of models in which the to-be-deleted model is.
151 143
 #' 
152 144
 #' @return no return value, but the model is deleted from database
... ...
@@ -162,16 +154,19 @@ plant_tree <- function(models.file.path = "default") {
162 154
 #' marker_genes = selected_marker_genes_T, cell_type = "t cells")
163 155
 #' 
164 156
 #' # save a classifier to system
165
-#' save_new_model(new_model = classifier_t, path.to.models = tempdir())
157
+#' save_new_model(new_model = classifier_t, path_to_models = tempdir())
166 158
 #' 
167 159
 #' # delete classifier from system
168
-#' delete_model("t cells", path.to.models = tempdir())
160
+#' delete_model("t cells", path_to_models = tempdir())
169 161
 #' @export
170
-delete_model <- function(cell_type, path.to.models = tempdir()) {
162
+delete_model <- function(cell_type, path_to_models = tempdir()) {
171 163
   new_models <- NULL
172 164
   data_env <- new.env(parent = emptyenv())
173 165
   
174
-  new_models.file.path <- file.path(path.to.models, "new_models.rda")
166
+  if (path_to_models == 'default')
167
+    stop("Cannot delete default models.", call. = FALSE)
168
+  
169
+  new_models.file.path <- file.path(path_to_models, "new_models.rda")
175 170
   if (!file.exists(new_models.file.path)) {
176 171
     stop("No list of models available", call. = FALSE)
177 172
   } else {
178 173
deleted file mode 100644
179 174
Binary files a/data/default_models.rda and /dev/null differ
180 175
deleted file mode 100644
... ...
@@ -1,21 +0,0 @@
1
-% Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/data.R
3
-\docType{data}
4
-\name{default_models}
5
-\alias{default_models}
6
-\title{Pretrained classifiers for human cells}
7
-\format{
8
-a list of \code{\link{scAnnotatR}} objects
9
-}
10
-\usage{
11
-default_models
12
-}
13
-\description{
14
-Pretrained classifier obtained by training and testing on the 
15
-Sade-Feldman melanoma dataset, the Jerby-Arnon melanoma dataset, the Haniffa
16
-Skin Cell Atlas and the Haniffa Covid-19 Cell Atlas.
17
-}
18
-\author{
19
-Vy Nguyen, June 2021
20
-}
21
-\keyword{datasets}
... ...
@@ -4,14 +4,14 @@
4 4
 \alias{delete_model}
5 5
 \title{Delete model/branch from package}
6 6
 \usage{
7
-delete_model(cell_type, path.to.models = tempdir())
7
+delete_model(cell_type, path_to_models = tempdir())
8 8
 }
9 9
 \arguments{
10 10
 \item{cell_type}{string indicating the cell type of which 
11 11
 the model will be removed from package
12 12
 Attention: deletion of a parent model will also delete all of child model.}
13 13
 
14
-\item{path.to.models}{path to the folder containing 
14
+\item{path_to_models}{path to the folder containing 
15 15
 the list of models in which the to-be-deleted model is.}
16 16
 }
17 17
 \value{
... ...
@@ -31,8 +31,8 @@ classifier_t <- train_classifier(train_obj = tirosh_mel80_example,
31 31
 marker_genes = selected_marker_genes_T, cell_type = "t cells")
32 32
 
33 33
 # save a classifier to system
34
-save_new_model(new_model = classifier_t, path.to.models = tempdir())
34
+save_new_model(new_model = classifier_t, path_to_models = tempdir())
35 35
 
36 36
 # delete classifier from system
37
-delete_model("t cells", path.to.models = tempdir())
37
+delete_model("t cells", path_to_models = tempdir())
38 38
 }
... ...
@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/class.R, R/support.R
2
+% Please edit documentation in R/class.R, R/classifier.R, R/support.R
3 3
 \name{checkObjectValidity}
4 4
 \alias{checkObjectValidity}
5 5
 \alias{checkCellTypeValidity}
... ...
@@ -13,10 +13,11 @@
13 13
 \alias{caret_model<-,scAnnotatR-method}
14 14
 \alias{marker_genes<-}
15 15
 \alias{marker_genes<-,scAnnotatR-method}
16
+\alias{train_classifier_func}
17
+\alias{test_classifier_func}
16 18
 \alias{balance_dataset}
17 19
 \alias{train_func}
18 20
 \alias{transform_to_zscore}
19
-\alias{load_models}
20 21
 \alias{select_marker_genes}
21 22
 \alias{check_parent_child_coherence}
22 23
 \alias{check_parent_child_coherence,dgCMatrix,vector-method}
... ...
@@ -31,6 +32,8 @@
31 32
 \alias{verify_parent}
32 33
 \alias{test_performance}
33 34
 \alias{classify_clust}
35
+\alias{.get_cache}
36
+\alias{download_data_file}
34 37
 \title{Internal functions of scAnnotatR package}
35 38
 \usage{
36 39
 checkObjectValidity(object)
... ...
@@ -57,14 +60,35 @@ marker_genes(classifier) <- value
57 60
 
58 61
 \S4method{marker_genes}{scAnnotatR}(classifier) <- value
59 62
 
63
+train_classifier_func(
64
+  mat,
65
+  tag,
66
+  cell_type,
67
+  marker_genes,
68
+  parent_tag,
69
+  parent_cell,
70
+  parent_classifier,
71
+  path_to_models,
72
+  zscore
73
+)
74
+
75
+test_classifier_func(
76
+  mat,
77
+  tag,
78
+  classifier,
79
+  parent_tag,
80
+  target_cell_type,
81
+  parent_classifier,
82
+  path_to_models,
83
+  zscore
84
+)
85
+
60 86
 balance_dataset(mat, tag)
61 87
 
62 88
 train_func(mat, tag)
63 89
 
64 90
 transform_to_zscore(mat)
65 91
 
66
-load_models(path_to_models)
67
-
68 92
 select_marker_genes(mat, marker_genes)
69 93
 
70 94
 check_parent_child_coherence(
... ...
@@ -120,6 +144,10 @@ verify_parent(mat, classifier, meta.data)
120 144
 test_performance(mat, classifier, tag)
121 145
 
122 146
 classify_clust(clusts, most_probable_cell_type)
147
+
148
+.get_cache()
149
+
150
+download_data_file(verbose = FALSE)
123 151
 }
124 152
 \arguments{
125 153
 \item{object}{The request classifier to check.}
... ...
@@ -142,25 +170,25 @@ classify_clust(clusts, most_probable_cell_type)
142 170
 
143 171
 \item{tag}{tag of data}
144 172
 
145
-\item{path_to_models}{path to databases, or by default}
146
-
147
-\item{pos_parent}{a vector indicating parent classifier prediction}
148
-
149
-\item{parent_cell}{name of parent cell type}
150
-
151
-\item{target_cell_type}{alternative cell types (in case of testing classifier)}
152
-
153 173
 \item{parent_tag}{vector, named list indicating pre-assigned/predicted 
154 174
 parent cell type}
155 175
 
156
-\item{parent_cell_type}{name of parent cell type}
176
+\item{parent_cell}{name of parent cell type}
157 177
 
158 178
 \item{parent_classifier}{\code{\link{scAnnotatR}} object corresponding 
159 179
 to classification model for the parent cell type}
160 180
 
181
+\item{path_to_models}{path to databases, or by default}
182
+
161 183
 \item{zscore}{boolean indicating the transformation of gene expression 
162 184
 in object to zscore or not}
163 185
 
186
+\item{target_cell_type}{alternative cell types (in case of testing classifier)}
187
+
188
+\item{pos_parent}{a vector indicating parent classifier prediction}
189
+
190
+\item{parent_cell_type}{name of parent cell type}
191
+
164 192
 \item{pred_cells}{a whole prediction for all cells}
165 193
 
166 194
 \item{ignore_ambiguous_result}{whether ignore ambigouous result}
... ...
@@ -174,6 +202,8 @@ in object to zscore or not}
174 202
 \item{clusts}{cluster info}
175 203
 
176 204
 \item{most_probable_cell_type}{predicted cell type}
205
+
206
+\item{verbose}{logical indicating downloading the file or not}
177 207
 }
178 208
 \value{
179 209
 TRUE if the classifier is valid or the reason why it is not
... ...
@@ -200,6 +230,10 @@ the classifier with the new marker genes
200 230
 
201 231
 scAnnotatR object with the new marker genes.
202 232
 
233
+caret trained model
234
+
235
+model performance statistics
236
+
203 237
 a list of balanced count matrix
204 238
 and corresponding tags of balanced count matrix
205 239
 
... ...
@@ -207,8 +241,6 @@ the classification model (caret object)
207 241
 
208 242
 row wise center-scaled count matrix
209 243
 
210
-list of classifiers
211
-
212 244
 filtered matrix
213 245
 
214 246
 list of adjusted tag
... ...
@@ -226,7 +258,18 @@ simplified prediction
226 258
 applicable matrix
227 259
 
228 260
 classifier performance
261
+
262
+BiocFileCache object
263
+
264
+path to the downloaded file in cache
229 265
 }
230 266
 \description{
231 267
 Check if a scAnnotatR object is valid
268
+
269
+Train a classifier for a new cell type from expression matrix
270
+and tag 
271
+If cell type has a parent, only available for \code{\link{scAnnotatR}}
272
+object as parent cell classifying model.
273
+
274
+Testing process from matrix and tag
232 275
 }
233 276
new file mode 100644
... ...
@@ -0,0 +1,17 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/support.R
3
+\name{load_models}
4
+\alias{load_models}
5
+\title{Load classifiers from databases}
6
+\usage{
7
+load_models(path_to_models)
8
+}
9
+\arguments{
10
+\item{path_to_models}{path to databases, or by default}
11
+}
12
+\value{
13
+list of classifiers
14
+}
15
+\description{
16
+Load classifiers from databases
17
+}
... ...
@@ -4,10 +4,10 @@
4 4
 \alias{plant_tree}
5 5
 \title{Plant tree from list of models}
6 6
 \usage{
7
-plant_tree(models.file.path = "default")
7
+plant_tree(path_to_models = "default")
8 8
 }
9 9
 \arguments{
10
-\item{models.file.path}{list of models. If not provided, 
10
+\item{path_to_models}{list of models. If not provided, 
11 11
 list of default pretrained models in the package will be used.}
12 12
 }
13 13
 \value{
... ...
@@ -4,7 +4,7 @@
4 4
 \alias{save_new_model}
5 5
 \title{Save a model to the package}
6 6
 \usage{
7
-save_new_model(new_model, include.default = TRUE, path.to.models = tempdir())
7
+save_new_model(new_model, include.default = TRUE, path_to_models = tempdir())
8 8
 }
9 9
 \arguments{
10 10
 \item{new_model}{new model to be added into the classification tree}
... ...
@@ -18,7 +18,7 @@ both of them once. In addition, default pretrained models
18 18
 of the package cannot be changed or removed. 
19 19
 This can be done with the new trained model list.}
20 20
 
21
-\item{path.to.models}{path to the folder containing the list of new models.}
21
+\item{path_to_models}{path to the folder containing the list of new models.}
22 22
 }
23 23
 \value{
24 24
 no return value, but the model is now saved to database
... ...
@@ -38,7 +38,7 @@ marker_genes = selected_marker_genes_T, cell_type = "t cells")
38 38
 
39 39
 # save the trained classifier to system 
40 40
 # test classifier can be used before this step
41
-save_new_model(new_model = classifier_t, path.to.models = tempdir())
41
+save_new_model(new_model = classifier_t, path_to_models = tempdir())
42 42
 
43 43
 # verify if new model has been saved
44 44
 print(names(load(file.path(tempdir(), "new_models.rda"))))
... ...
@@ -2,7 +2,7 @@ context("scAnnotatR class functions")
2 2
 library(scAnnotatR)
3 3
 
4 4
 test_that("Set cell type changes cell type", {
5
-  data("default_models")
5
+  default_models <- load_models('default')
6 6
   classifier_B <- default_models[['B cells']]
7 7
   
8 8
   cell_type(classifier_B) <- "b cells"
... ...
@@ -10,7 +10,7 @@ test_that("Set cell type changes cell type", {
10 10
 })
11 11
 
12 12
 test_that("Set probability threshold changes probability threshold", {
13
-  data("default_models")
13
+  default_models <- load_models('default')
14 14
   classifier_B <- default_models[['B cells']]
15 15
   
16 16
   p_thres(classifier_B) <- 0.6
... ...
@@ -18,7 +18,7 @@ test_that("Set probability threshold changes probability threshold", {
18 18
 })
19 19
 
20 20
 test_that("Set classifier changes classifier and marker genes", {
21
-  data("default_models")
21
+  default_models <- load_models('default')
22 22
   classifier_B <- default_models[['B cells']]
23 23
   classifier_T <- default_models[['T cells']]
24 24
   
... ...
@@ -64,7 +64,7 @@ library(scAnnotatR)
64 64
 The models are stored in the `default_models` object:
65 65
 
66 66
 ```{r}
67
-data("default_models")
67
+default_models <- load_models("default")
68 68
 names(default_models)
69 69
 ```
70 70
 
... ...
@@ -233,7 +233,7 @@ New classification models can be stored using the `save_new_model` function:
233 233
 
234 234
 ```{r}
235 235
 # no copy of pretrained models is performed
236
-save_new_model(new_model = classifier_B, path.to.models = tempdir(),
236
+save_new_model(new_model = classifier_B, path_to_models = tempdir(),
237 237
                include.default = FALSE) 
238 238
 ```
239 239
 
... ...
@@ -241,7 +241,7 @@ Parameters:
241 241
 
242 242
   * **new_model**: The new model that should be added to the database in the
243 243
                    specified directory.
244
-  * **path.to.models**: The directory where the new models should be stored.
244
+  * **path_to_models**: The directory where the new models should be stored.
245 245
   * **include.default**: If set, the default models shipped with the package
246 246
                          are added to the database.
247 247
 
... ...
@@ -253,7 +253,7 @@ Models can be deleted from the model database using the `delete_model` function:
253 253
 
254 254
 ```{r}
255 255
 # delete the "B cells" model from the new database
256
-delete_model("B cells", path.to.models = tempdir())
256
+delete_model("B cells", path_to_models = tempdir())
257 257
 ```
258 258
 
259 259
 ## Session Info
... ...
@@ -60,7 +60,7 @@ library(scAnnotatR)
60 60
 ```
61 61
 
62 62
 ```{r}
63
-data("default_models")
63
+default_models <- load_models('default')
64 64
 classifier_B <- default_models[['B cells']]
65 65
 classifier_B
66 66
 ```
... ...
@@ -171,7 +171,7 @@ parent classifier to the train method:
171 171
 for example: *parent_cell = 'B cells'*
172 172
   
173 173
   * Users can give name of a model among models available in users' database 
174
-AND the path to that database, for example: `parent_cell = 'B cells', path.to.models = '.'`
174
+AND the path to that database, for example: `parent_cell = 'B cells', path_to_models = '.'`
175 175
 
176 176
 Train the child classifier:
177 177
 ```{r}
... ...
@@ -230,15 +230,15 @@ names(default_models)
230 230
 
231 231
 In our package, the default models already include a model classifying plasma cells.
232 232
 Therefore, we will save this model to a new local database specified by the 
233
-*path.to.models* parameter. If you start with a fresh new local database, 
233
+*path_to_models* parameter. If you start with a fresh new local database, 
234 234
 there is no available parent classifier of plasma cells' classifier. Therefore,
235 235
 we have to save the parent classifier first, e.g. the classifier for B cells.
236 236
 
237 237
 ```{r}
238 238
 # no copy of pretrained models is performed
239
-save_new_model(new_model = classifier_B, path.to.models = tempdir(), 
239
+save_new_model(new_model = classifier_B, path_to_models = tempdir(), 
240 240
                include.default = FALSE) 
241
-save_new_model(new_model = classifier_plasma, path.to.models = tempdir(), 
241
+save_new_model(new_model = classifier_plasma, path_to_models = tempdir(), 
242 242
                include.default = FALSE) 
243 243
 ```
244 244