... | ... |
@@ -23,9 +23,9 @@ setOldClass("train") |
23 | 23 |
#' # train a classifier, for ex: B cell |
24 | 24 |
#' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A") |
25 | 25 |
#' set.seed(123) |
26 |
-#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
27 |
-#' marker_genes = selected_marker_genes_B, |
|
28 |
-#' cell_type = "B cells") |
|
26 |
+#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
27 |
+#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, |
|
28 |
+#' cell_type = "B cells", tag_slot = 'active.ident') |
|
29 | 29 |
#' |
30 | 30 |
#' classifier_b |
31 | 31 |
#' @export |
... | ... |
@@ -219,8 +219,9 @@ setValidity("scAnnotatR", checkObjectValidity) |
219 | 219 |
#' data("tirosh_mel80_example") |
220 | 220 |
#' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A") |
221 | 221 |
#' set.seed(123) |
222 |
-#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
223 |
-#' marker_genes = selected_marker_genes_B, cell_type = "B cells") |
|
222 |
+#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
223 |
+#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, |
|
224 |
+#' cell_type = "B cells", tag_slot = 'active.ident') |
|
224 | 225 |
#' classifier_b |
225 | 226 |
#' |
226 | 227 |
#' @export |
... | ... |
@@ -251,8 +252,9 @@ setMethod("show", c("object" = "scAnnotatR"), function(object) { |
251 | 252 |
#' data("tirosh_mel80_example") |
252 | 253 |
#' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A") |
253 | 254 |
#' set.seed(123) |
254 |
-#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
255 |
-#' cell_type = "B cells", marker_genes = selected_marker_genes_B) |
|
255 |
+#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
256 |
+#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, |
|
257 |
+#' cell_type = "B cells", tag_slot = 'active.ident') |
|
256 | 258 |
#' cell_type(classifier_b) |
257 | 259 |
#' |
258 | 260 |
#' @export |
... | ... |
@@ -273,8 +275,9 @@ cell_type <- function(classifier) { |
273 | 275 |
#' data("tirosh_mel80_example") |
274 | 276 |
#' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A") |
275 | 277 |
#' set.seed(123) |
276 |
-#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
277 |
-#' marker_genes = selected_marker_genes_B, cell_type = "B cells") |
|
278 |
+#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
279 |
+#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, |
|
280 |
+#' cell_type = "B cells", tag_slot = 'active.ident') |
|
278 | 281 |
#' caret_model(classifier_b) |
279 | 282 |
#' |
280 | 283 |
#' @export |
... | ... |
@@ -294,8 +297,9 @@ caret_model <- function(classifier) { |
294 | 297 |
#' data("tirosh_mel80_example") |
295 | 298 |
#' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A") |
296 | 299 |
#' set.seed(123) |
297 |
-#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
298 |
-#' marker_genes = selected_marker_genes_B, cell_type = "B cells") |
|
300 |
+#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
301 |
+#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, |
|
302 |
+#' cell_type = "B cells", tag_slot = 'active.ident') |
|
299 | 303 |
#' marker_genes(classifier_b) |
300 | 304 |
#' |
301 | 305 |
#' @export |
... | ... |
@@ -315,8 +319,9 @@ marker_genes <- function(classifier) { |
315 | 319 |
#' data("tirosh_mel80_example") |
316 | 320 |
#' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A") |
317 | 321 |
#' set.seed(123) |
318 |
-#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
319 |
-#' marker_genes = selected_marker_genes_B, cell_type = "B cells") |
|
322 |
+#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
323 |
+#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, |
|
324 |
+#' cell_type = "B cells", tag_slot = 'active.ident') |
|
320 | 325 |
#' p_thres(classifier_b) |
321 | 326 |
#' |
322 | 327 |
#' @export |
... | ... |
@@ -337,8 +342,9 @@ p_thres <- function(classifier) { |
337 | 342 |
#' data("tirosh_mel80_example") |
338 | 343 |
#' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A") |
339 | 344 |
#' set.seed(123) |
340 |
-#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
341 |
-#' marker_genes = selected_marker_genes_B, cell_type = "B cells") |
|
345 |
+#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
346 |
+#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, |
|
347 |
+#' cell_type = "B cells", tag_slot = 'active.ident') |
|
342 | 348 |
#' parent(classifier_b) |
343 | 349 |
#' |
344 | 350 |
#' @export |
... | ... |
@@ -367,8 +373,9 @@ setGeneric('cell_type<-', function(classifier, value) |
367 | 373 |
#' data("tirosh_mel80_example") |
368 | 374 |
#' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A") |
369 | 375 |
#' set.seed(123) |
370 |
-#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
371 |
-#' marker_genes = selected_marker_genes_B, cell_type = "B cells") |
|
376 |
+#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
377 |
+#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, |
|
378 |
+#' cell_type = "B cells", tag_slot = 'active.ident') |
|
372 | 379 |
#' cell_type(classifier_b) <- "B cell" |
373 | 380 |
#' @rdname cell_type |
374 | 381 |
setReplaceMethod('cell_type', c("classifier" = "scAnnotatR"), |
... | ... |
@@ -402,10 +409,12 @@ setGeneric('p_thres<-', function(classifier, value) |
402 | 409 |
#' data("tirosh_mel80_example") |
403 | 410 |
#' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A") |
404 | 411 |
#' set.seed(123) |
405 |
-#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
406 |
-#' marker_genes = selected_marker_genes_B, cell_type = "B cells") |
|
407 |
-#' classifier_b_test <- test_classifier(test_obj = tirosh_mel80_example, |
|
408 |
-#' classifier = classifier_b) |
|
412 |
+#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
413 |
+#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, |
|
414 |
+#' cell_type = "B cells", tag_slot = 'active.ident') |
|
415 |
+#' classifier_b_test <- test_classifier(classifier = classifier_b, |
|
416 |
+#' test_obj = tirosh_mel80_example, assay = 'RNA', slot = 'counts', |
|
417 |
+#' tag_slot = 'active.ident') |
|
409 | 418 |
#' # assign a new threhold probability for prediction |
410 | 419 |
#' p_thres(classifier_b) <- 0.4 |
411 | 420 |
#' @rdname p_thres |
... | ... |
@@ -6,25 +6,39 @@ |
6 | 6 |
#' |
7 | 7 |
#' @param train_obj object that can be used for training the new model. |
8 | 8 |
#' \code{\link{Seurat}} object or \code{\link{SingleCellExperiment}} object |
9 |
-#' is expected. |
|
9 |
+#' is supported. |
|
10 | 10 |
#' If the training model has parent, parent_tag_slot may have been indicated. |
11 | 11 |
#' This field would have been filled out automatically |
12 | 12 |
#' if user precedently run classify_cells function. |
13 | 13 |
#' If no (predicted) cell type annotation provided, |
14 | 14 |
#' the function can be run if 1- parent_cell or 2- parent_classifier is provided. |
15 |
+#' @param assay name of assay to use in training object. |
|
16 |
+#' @param slot type of expression data to use in training object, omitted if |
|
17 |
+#' train_obj is \code{\link{SingleCellExperiment}} object. |
|
15 | 18 |
#' @param cell_type string indicating the name of the subtype |
16 | 19 |
#' This must exactly match cell tag/label if cell tag/label is a string. |
17 | 20 |
#' @param marker_genes list of marker genes used for the new training model |
21 |
+#' @param tag_slot string, name of slot in cell meta data |
|
22 |
+#' indicating cell tag/label in the training object. |
|
23 |
+#' Strings indicating cell types are expected in this slot. |
|
24 |
+#' For \code{\link{Seurat}} object, default value is "active.ident". |
|
25 |
+#' Expected values are string (A-Z, a-z, 0-9, no special character accepted) |
|
26 |
+#' or binary/logical, 0/"no"/F/FALSE: not being new cell type, |
|
27 |
+#' 1/"yes"/T/TRUE: being new cell type. |
|
18 | 28 |
#' @param parent_cell string indicated the name of the parent cell type, |
19 | 29 |
#' if parent cell type classifier has already been saved in model database. |
20 | 30 |
#' Adjust path_to_models for exact database. |
31 |
+#' @param parent_tag_slot string, name of a slot in cell meta data |
|
32 |
+#' indicating assigned/predicted cell type. Default is "predicted_cell_type". |
|
33 |
+#' This slot would have been filled automatically |
|
34 |
+#' if user have called classify_cells function. |
|
35 |
+#' The slot must contain only string values. |
|
21 | 36 |
#' @param parent_classifier classification model for the parent cell type |
22 | 37 |
#' @param path_to_models path to the folder containing the model database. |
23 | 38 |
#' As default, the pretrained models in the package will be used. |
24 | 39 |
#' If user has trained new models, indicate the folder containing the |
25 | 40 |
#' new_models.rda file. |
26 | 41 |
#' @param zscore whether gene expression in train_obj is transformed to zscore |
27 |
-#' @param ... arguments passed to other methods |
|
28 | 42 |
#' |
29 | 43 |
#' @return \code{\link{scAnnotatR}} object |
30 | 44 |
#' |
... | ... |
@@ -35,34 +49,6 @@ |
35 | 49 |
#' as parent cell type. For example, when training for B cells, |
36 | 50 |
#' plasma cells must be annotated as B cells in order to be used. |
37 | 51 |
#' |
38 |
-#' @export |
|
39 |
-setGeneric("train_classifier", |
|
40 |
- function(train_obj, cell_type, marker_genes, |
|
41 |
- parent_cell = NA_character_, |
|
42 |
- parent_classifier = NULL, path_to_models = "default", |
|
43 |
- zscore = TRUE, ...) |
|
44 |
- standardGeneric("train_classifier")) |
|
45 |
- |
|
46 |
-#' @inherit train_classifier |
|
47 |
-#' |
|
48 |
-#' @param seurat_tag_slot string, name of slot in cell meta data |
|
49 |
-#' indicating cell tag/label in the training object. |
|
50 |
-#' Strings indicating cell types are expected in this slot. |
|
51 |
-#' For \code{\link{Seurat}} object, default value is "active.ident". |
|
52 |
-#' Expected values are string (A-Z, a-z, 0-9, no special character accepted) |
|
53 |
-#' or binary/logical, 0/"no"/F/FALSE: not being new cell type, |
|
54 |
-#' 1/"yes"/T/TRUE: being new cell type. |
|
55 |
-#' @param seurat_parent_tag_slot string, name of a slot in cell meta data |
|
56 |
-#' indicating assigned/predicted cell type. Default is "predicted_cell_type". |
|
57 |
-#' This slot would have been filled automatically |
|
58 |
-#' if user have called classify_cells function. |
|
59 |
-#' The slot must contain only string values. |
|
60 |
-#' @param seurat_assay name of assay to use in training object. |
|
61 |
-#' Default to 'RNA' assay. |
|
62 |
-#' @param seurat_slot type of expression data to use in training object. |
|
63 |
-#' For \code{\link{Seurat}} object, available types are: "counts", "data" |
|
64 |
-#' and "scale.data". Default to "counts", which contains unnormalized data. |
|
65 |
-#' |
|
66 | 52 |
#' @examples |
67 | 53 |
#' # load small example dataset |
68 | 54 |
#' data("tirosh_mel80_example") |
... | ... |
@@ -76,8 +62,9 @@ setGeneric("train_classifier", |
76 | 62 |
#' # train the classifier, the "cell_type" argument must match |
77 | 63 |
#' # the cell labels in the data, except upper/lower case |
78 | 64 |
#' set.seed(123) |
79 |
-#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
80 |
-#' marker_genes = selected_marker_genes_B, cell_type = "b cells") |
|
65 |
+#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
66 |
+#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, |
|
67 |
+#' cell_type = "b cells", tag_slot = 'active.ident') |
|
81 | 68 |
#' |
82 | 69 |
#' # classify cell types using B cell classifier, |
83 | 70 |
#' # a test classifier process may be used before applying the classifier |
... | ... |
@@ -96,18 +83,78 @@ setGeneric("train_classifier", |
96 | 83 |
#' # for the training process. |
97 | 84 |
#' set.seed(123) |
98 | 85 |
#' plasma_classifier <- train_classifier(train_obj = tirosh_mel80_example, |
99 |
-#' cell_type = "Plasma cell", marker_genes = p_marker_genes, |
|
100 |
-#' parent_classifier = classifier_b, seurat_tag_slot = 'plasma_cell_tag') |
|
86 |
+#' assay = 'RNA', slot = 'counts', cell_type = 'Plasma cell', |
|
87 |
+#' marker_genes = p_marker_genes, tag_slot = 'plasma_cell_tag', |
|
88 |
+#' parent_classifier = classifier_b) |
|
101 | 89 |
#' |
102 |
-#' @importFrom Seurat GetAssayData |
|
90 |
+#' @export |
|
91 |
+train_classifier <- function(train_obj, assay, slot = NULL, |
|
92 |
+ cell_type, marker_genes, tag_slot, |
|
93 |
+ parent_cell = NA_character_, |
|
94 |
+ parent_tag_slot = 'predicted_cell_type', |
|
95 |
+ parent_classifier = NULL, path_to_models = "default", |
|
96 |
+ zscore = TRUE) { |
|
97 |
+ if (is(train_obj, 'Seurat')) { |
|
98 |
+ object <- |
|
99 |
+ train_classifier_seurat(train_obj, cell_type, marker_genes, |
|
100 |
+ parent_cell, parent_classifier, path_to_models, |
|
101 |
+ zscore, tag_slot, parent_tag_slot, assay, slot) |
|
102 |
+ } else if (is(train_obj, 'SingleCellExperiment')) { |
|
103 |
+ object <- |
|
104 |
+ train_classifier_sce(train_obj, cell_type, marker_genes, |
|
105 |
+ parent_cell, parent_classifier, path_to_models, |
|
106 |
+ zscore, tag_slot, parent_tag_slot, assay) |
|
107 |
+ } else { |
|
108 |
+ stop('Training object of not supported class', call. = FALSE) |
|
109 |
+ } |
|
110 |
+ |
|
111 |
+ return(object) |
|
112 |
+} |
|
113 |
+ |
|
114 |
+#' Train cell type classifier, when train_obj is Seurat object |
|
103 | 115 |
#' |
104 |
-#' @rdname train_classifier |
|
105 |
-setMethod("train_classifier", c("train_obj" = "Seurat"), |
|
106 |
- function(train_obj, cell_type, marker_genes, parent_cell = NA_character_, |
|
107 |
- parent_classifier = NULL, path_to_models = "default", |
|
108 |
- zscore = TRUE, seurat_tag_slot = "active.ident", |
|
109 |
- seurat_parent_tag_slot = "predicted_cell_type", |
|
110 |
- seurat_assay = 'RNA', seurat_slot = 'counts', ...) { |
|
116 |
+#' @description Train a classifier for a new cell type |
|
117 |
+#' If cell type has a parent, only available for \code{\link{scAnnotatR}} |
|
118 |
+#' object as parent cell classifying model. |
|
119 |
+#' |
|
120 |
+#' @param train_obj Seurat object |
|
121 |
+#' @param seurat_assay name of assay to use in training object. |
|
122 |
+#' @param seurat_slot type of expression data to use in training object |
|
123 |
+#' @param cell_type string indicating the name of the subtype |
|
124 |
+#' This must exactly match cell tag/label if cell tag/label is a string. |
|
125 |
+#' @param marker_genes list of marker genes used for the new training model |
|
126 |
+#' @param seurat_tag_slot string, name of slot in cell meta data |
|
127 |
+#' indicating cell tag/label in the training object. |
|
128 |
+#' Strings indicating cell types are expected in this slot. |
|
129 |
+#' For \code{\link{Seurat}} object, default value is "active.ident". |
|
130 |
+#' Expected values are string (A-Z, a-z, 0-9, no special character accepted) |
|
131 |
+#' or binary/logical, 0/"no"/F/FALSE: not being new cell type, |
|
132 |
+#' 1/"yes"/T/TRUE: being new cell type. |
|
133 |
+#' @param parent_cell string indicated the name of the parent cell type, |
|
134 |
+#' if parent cell type classifier has already been saved in model database. |
|
135 |
+#' Adjust path_to_models for exact database. |
|
136 |
+#' @param seurat_parent_tag_slot string, name of a slot in cell meta data |
|
137 |
+#' indicating assigned/predicted cell type. Default is "predicted_cell_type". |
|
138 |
+#' This slot would have been filled automatically |
|
139 |
+#' if user have called classify_cells function. |
|
140 |
+#' The slot must contain only string values. |
|
141 |
+#' @param parent_classifier classification model for the parent cell type |
|
142 |
+#' @param path_to_models path to the folder containing the model database. |
|
143 |
+#' As default, the pretrained models in the package will be used. |
|
144 |
+#' If user has trained new models, indicate the folder containing the |
|
145 |
+#' new_models.rda file. |
|
146 |
+#' @param zscore whether gene expression in train_obj is transformed to zscore |
|
147 |
+#' |
|
148 |
+#' @return \code{\link{scAnnotatR}} object |
|
149 |
+#' |
|
150 |
+#' @importFrom Seurat GetAssayData Idents |
|
151 |
+#' |
|
152 |
+#' @rdname internal |
|
153 |
+train_classifier_seurat <- |
|
154 |
+ function(train_obj, cell_type, marker_genes, parent_cell = NA_character_, |
|
155 |
+ parent_classifier = NULL, path_to_models = "default", zscore = TRUE, |
|
156 |
+ seurat_tag_slot, seurat_parent_tag_slot = 'predicted_cell_type', |
|
157 |
+ seurat_assay, seurat_slot) { |
|
111 | 158 |
# convert Seurat object to matrix |
112 | 159 |
mat = Seurat::GetAssayData(object = train_obj, |
113 | 160 |
assay = seurat_assay, slot = seurat_slot) |
... | ... |
@@ -126,43 +173,59 @@ setMethod("train_classifier", c("train_obj" = "Seurat"), |
126 | 173 |
names(parent_tag) <- colnames(train_obj) |
127 | 174 |
} else parent_tag <- NULL |
128 | 175 |
|
129 |
- object <- train_classifier_func(mat, tag, cell_type, marker_genes, |
|
176 |
+ object <- train_classifier_from_mat(mat, tag, cell_type, marker_genes, |
|
130 | 177 |
parent_tag, parent_cell, parent_classifier, |
131 | 178 |
path_to_models, zscore) |
132 | 179 |
return(object) |
133 |
-}) |
|
180 |
+} |
|
134 | 181 |
|
135 |
-#' @inherit train_classifier |
|
182 |
+#' Train cell type classifier, when train_obj is SCE object |
|
136 | 183 |
#' |
137 |
-#' @param sce_tag_slot string, name of annotation slot indicating |
|
138 |
-#' cell tag/label in the training object. |
|
139 |
-#' For \code{\link{SingleCellExperiment}} object, default value is "ident". |
|
184 |
+#' @description Train a classifier for a new cell type |
|
185 |
+#' If cell type has a parent, only available for \code{\link{scAnnotatR}} |
|
186 |
+#' object as parent cell classifying model. |
|
187 |
+#' |
|
188 |
+#' @param train_obj SCE object |
|
189 |
+#' @param sce_assay name of assay to use in training object. |
|
190 |
+#' @param cell_type string indicating the name of the subtype |
|
191 |
+#' This must exactly match cell tag/label if cell tag/label is a string. |
|
192 |
+#' @param marker_genes list of marker genes used for the new training model |
|
193 |
+#' @param sce_tag_slot string, name of slot in cell meta data |
|
194 |
+#' indicating cell tag/label in the training object. |
|
195 |
+#' Strings indicating cell types are expected in this slot. |
|
196 |
+#' For \code{\link{Seurat}} object, default value is "active.ident". |
|
140 | 197 |
#' Expected values are string (A-Z, a-z, 0-9, no special character accepted) |
141 | 198 |
#' or binary/logical, 0/"no"/F/FALSE: not being new cell type, |
142 | 199 |
#' 1/"yes"/T/TRUE: being new cell type. |
200 |
+#' @param parent_cell string indicated the name of the parent cell type, |
|
201 |
+#' if parent cell type classifier has already been saved in model database. |
|
202 |
+#' Adjust path_to_models for exact database. |
|
143 | 203 |
#' @param sce_parent_tag_slot string, name of a slot in cell meta data |
144 |
-#' indicating pre-assigned/predicted cell type. |
|
145 |
-#' Default field is "predicted_cell_type". |
|
146 |
-#' This field would have been filled automatically |
|
147 |
-#' when user called classify_cells function. |
|
204 |
+#' indicating assigned/predicted cell type. Default is "predicted_cell_type". |
|
205 |
+#' This slot would have been filled automatically |
|
206 |
+#' if user have called classify_cells function. |
|
148 | 207 |
#' The slot must contain only string values. |
149 |
-#' @param sce_assay name of assay to use in training object. |
|
150 |
-#' Default to 'logcounts' assay. |
|
208 |
+#' @param parent_classifier classification model for the parent cell type |
|
209 |
+#' @param path_to_models path to the folder containing the model database. |
|
210 |
+#' As default, the pretrained models in the package will be used. |
|
211 |
+#' If user has trained new models, indicate the folder containing the |
|
212 |
+#' new_models.rda file. |
|
213 |
+#' @param zscore whether gene expression in train_obj is transformed to zscore |
|
151 | 214 |
#' |
215 |
+#' @return \code{\link{scAnnotatR}} object |
|
216 |
+#' |
|
152 | 217 |
#' @import SingleCellExperiment |
153 | 218 |
#' @importFrom SummarizedExperiment assay |
154 | 219 |
#' |
155 |
-#' @rdname train_classifier |
|
156 |
-setMethod("train_classifier", c("train_obj" = "SingleCellExperiment"), |
|
157 |
- function(train_obj, cell_type, marker_genes, parent_cell = NA_character_, |
|
158 |
- parent_classifier = NULL, path_to_models = "default", |
|
159 |
- zscore = TRUE, sce_tag_slot = "ident", |
|
160 |
- sce_parent_tag_slot = "predicted_cell_type", |
|
161 |
- sce_assay = 'logcounts', ...) { |
|
220 |
+#' @rdname internal |
|
221 |
+train_classifier_sce <- |
|
222 |
+ function(train_obj, cell_type, marker_genes, parent_cell = NA_character_, |
|
223 |
+ parent_classifier = NULL, path_to_models = "default", zscore = TRUE, |
|
224 |
+ sce_tag_slot, sce_parent_tag_slot = "predicted_cell_type", sce_assay) { |
|
162 | 225 |
# solve duplication of cell names |
163 | 226 |
colnames(train_obj) <- make.unique(colnames(train_obj), sep = '_') |
164 | 227 |
|
165 |
- # convert Seurat object to matrix |
|
228 |
+ # convert SCE object to matrix |
|
166 | 229 |
mat = SummarizedExperiment::assay(train_obj, sce_assay) |
167 | 230 |
|
168 | 231 |
tag = SummarizedExperiment::colData(train_obj)[, sce_tag_slot] |
... | ... |
@@ -173,12 +236,12 @@ setMethod("train_classifier", c("train_obj" = "SingleCellExperiment"), |
173 | 236 |
names(parent_tag) <- colnames(train_obj) |
174 | 237 |
} else parent_tag <- NULL |
175 | 238 |
|
176 |
- object <- train_classifier_func(mat, tag, cell_type, marker_genes, |
|
239 |
+ object <- train_classifier_from_mat(mat, tag, cell_type, marker_genes, |
|
177 | 240 |
parent_tag, parent_cell, parent_classifier, |
178 | 241 |
path_to_models, zscore) |
179 | 242 |
|
180 | 243 |
return(object) |
181 |
-}) |
|
244 |
+} |
|
182 | 245 |
|
183 | 246 |
#' Train cell type from matrix |
184 | 247 |
#' |
... | ... |
@@ -205,7 +268,7 @@ setMethod("train_classifier", c("train_obj" = "SingleCellExperiment"), |
205 | 268 |
#' @return caret trained model |
206 | 269 |
#' |
207 | 270 |
#' @rdname internal |
208 |
-train_classifier_func <- function(mat, tag, cell_type, marker_genes, |
|
271 |
+train_classifier_from_mat <- function(mat, tag, cell_type, marker_genes, |
|
209 | 272 |
parent_tag, parent_cell, parent_classifier, |
210 | 273 |
path_to_models, zscore) { |
211 | 274 |
#--- part of parent cell type |
... | ... |
@@ -283,21 +346,34 @@ train_classifier_func <- function(mat, tag, cell_type, marker_genes, |
283 | 346 |
#' |
284 | 347 |
#' @description Testing process. |
285 | 348 |
#' |
286 |
-#' @param test_obj xxobject that can be used for testing |
|
287 |
-#' @param classifier classification model |
|
349 |
+#' @param test_obj object that can be used for testing |
|
350 |
+#' @param assay name of assay to use in test_object |
|
351 |
+#' @param slot type of expression data to use in test_object. |
|
352 |
+#' For Seurat object, some available types are: "counts", "data" and "scale.data". |
|
353 |
+#' Ignore this if test_obj is \code{\link{SingleCellExperiment}} object. |
|
354 |
+#' @param classifier scAnnotatR classification model |
|
355 |
+#' @param tag_slot string, name of annotation slot |
|
356 |
+#' indicating cell tag/label in the testing object. |
|
357 |
+#' Strings indicating cell types are expected in this slot. |
|
358 |
+#' Expected values are string (A-Z, a-z, 0-9, no special character accepted) |
|
359 |
+#' or binary/logical, 0/"no"/F/FALSE: not being new cell type, |
|
360 |
+#' 1/"yes"/T/TRUE: being new cell type. |
|
288 | 361 |
#' @param target_cell_type vector indicating other cell types than cell labels |
289 | 362 |
#' that can be considered as the main cell type in classifier, |
290 | 363 |
#' for example, c("plasma cell", "b cell", "b cells", "activating b cell"). |
291 | 364 |
#' Default as NULL. |
292 | 365 |
#' @param parent_classifier \code{\link{scAnnotatR}} object |
293 | 366 |
#' corresponding to classification model for the parent cell type |
367 |
+#' @param parent_tag_slot string, name of tag slot in cell meta data |
|
368 |
+#' indicating pre-assigned/predicted parent cell type. |
|
369 |
+#' Default field is "predicted_cell_type". |
|
370 |
+#' The slot must contain only string values. |
|
294 | 371 |
#' @param path_to_models path to the folder containing the list of models. |
295 | 372 |
#' As default, the pretrained models in the package will be used. |
296 | 373 |
#' If user has trained new models, indicate the folder containing |
297 | 374 |
#' the new_models.rda file. |
298 | 375 |
#' @param zscore boolean, whether gene expression is transformed to zscore |
299 |
-#' @param ... arguments passed to other methods |
|
300 |
-#' |
|
376 |
+#' |
|
301 | 377 |
#' @return result of testing process in form of a list, |
302 | 378 |
#' including predicted values, prediction accuracy at a probability threshold, |
303 | 379 |
#' and roc curve information. |
... | ... |
@@ -309,60 +385,99 @@ train_classifier_func <- function(mat, tag, cell_type, marker_genes, |
309 | 385 |
#' For example, when testing for B cells, plasma cells can be annotated as |
310 | 386 |
#' B cells, or target_cell_type is set c("plasma cells"). |
311 | 387 |
#' |
388 |
+#' @examples |
|
389 |
+#' # load small example dataset |
|
390 |
+#' data("tirosh_mel80_example") |
|
391 |
+#' |
|
392 |
+#' # train the classifier |
|
393 |
+#' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A") |
|
394 |
+#' set.seed(123) |
|
395 |
+#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
396 |
+#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, |
|
397 |
+#' cell_type = "b cells", tag_slot = 'active.ident') |
|
398 |
+#' |
|
399 |
+#' # test the classifier, target cell type can be in other formats or |
|
400 |
+#' # alternative cell type that can be considered as the classified cell type |
|
401 |
+#' classifier_b_test <- test_classifier(classifier = classifier_b, |
|
402 |
+#' test_obj = tirosh_mel80_example, assay = 'RNA', slot = 'counts', |
|
403 |
+#' tag_slot = 'active.ident', target_cell_type = c("B cell")) |
|
404 |
+#' classifier_b_test |
|
405 |
+#' |
|
312 | 406 |
#' @export |
313 |
-setGeneric("test_classifier", function(test_obj, classifier, |
|
314 |
- target_cell_type = NULL, |
|
315 |
- parent_classifier = NULL, |
|
316 |
- path_to_models = "default", |
|
317 |
- zscore = TRUE, ...) |
|
407 |
+setGeneric("test_classifier", |
|
408 |
+ function(classifier, test_obj, assay, slot = NULL, tag_slot, |
|
409 |
+ target_cell_type = NULL, parent_classifier = NULL, |
|
410 |
+ parent_tag_slot = 'predicted_cell_type', |
|
411 |
+ path_to_models = "default", zscore = TRUE) |
|
318 | 412 |
standardGeneric("test_classifier")) |
319 | 413 |
|
320 | 414 |
#' @inherit test_classifier |
321 | 415 |
#' |
416 |
+#' @rdname test_classifier |
|
417 |
+setMethod('test_classifier', c('classifier' = 'scAnnotatR'), |
|
418 |
+ function(classifier, test_obj, assay, slot = NULL, tag_slot, |
|
419 |
+ target_cell_type = NULL, parent_classifier = NULL, |
|
420 |
+ parent_tag_slot = 'predicted_cell_type', |
|
421 |
+ path_to_models = "default", zscore = TRUE) { |
|
422 |
+ if (is(test_obj, 'Seurat')) { |
|
423 |
+ return_val <- |
|
424 |
+ test_classifier_seurat(test_obj, classifier, target_cell_type, |
|
425 |
+ parent_classifier, path_to_models, zscore, |
|
426 |
+ tag_slot, parent_tag_slot, assay, slot) |
|
427 |
+ } else if (is(test_obj, 'SingleCellExperiment')) { |
|
428 |
+ return_val <- |
|
429 |
+ test_classifier_sce(test_obj, classifier, target_cell_type, |
|
430 |
+ parent_classifier, path_to_models, zscore, |
|
431 |
+ tag_slot, parent_tag_slot, assay) |
|
432 |
+ } else { |
|
433 |
+ stop('Testing object of not supported class', call. = FALSE) |
|
434 |
+ } |
|
435 |
+ return(return_val) |
|
436 |
+}) |
|
437 |
+ |
|
438 |
+#' Testing process for Seurat object |
|
439 |
+#' |
|
440 |
+#' @description Testing process when test object is of type Seurat |
|
441 |
+#' |
|
442 |
+#' @param test_obj Seurat object used for testing |
|
443 |
+#' @param seurat_assay name of assay to use in test_object |
|
444 |
+#' @param seurat_slot type of expression data to use in test_object. |
|
445 |
+#' For Seurat object, some available types are: "counts", "data" and "scale.data". |
|
446 |
+#' @param classifier scAnnotatR classification model |
|
322 | 447 |
#' @param seurat_tag_slot string, name of annotation slot |
323 | 448 |
#' indicating cell tag/label in the testing object. |
324 | 449 |
#' Strings indicating cell types are expected in this slot. |
325 |
-#' For \code{\link{Seurat}} object, default value is "active.ident". |
|
326 | 450 |
#' Expected values are string (A-Z, a-z, 0-9, no special character accepted) |
327 | 451 |
#' or binary/logical, 0/"no"/F/FALSE: not being new cell type, |
328 | 452 |
#' 1/"yes"/T/TRUE: being new cell type. |
453 |
+#' @param target_cell_type vector indicating other cell types than cell labels |
|
454 |
+#' that can be considered as the main cell type in classifier, |
|
455 |
+#' for example, c("plasma cell", "b cell", "b cells", "activating b cell"). |
|
456 |
+#' Default as NULL. |
|
457 |
+#' @param parent_classifier \code{\link{scAnnotatR}} object |
|
458 |
+#' corresponding to classification model for the parent cell type |
|
329 | 459 |
#' @param seurat_parent_tag_slot string, name of tag slot in cell meta data |
330 | 460 |
#' indicating pre-assigned/predicted parent cell type. |
331 | 461 |
#' Default field is "predicted_cell_type". |
332 | 462 |
#' The slot must contain only string values. |
333 |
-#' @param seurat_assay name of assay to use in |
|
334 |
-#' \code{\link{Seurat}} object, defaults to 'RNA' assay. |
|
335 |
-#' @param seurat_slot type of expression data to use in |
|
336 |
-#' \code{\link{Seurat}} object. |
|
337 |
-#' Some available types are: "counts", "data" and "scale.data". |
|
338 |
-#' Default to "counts", which contains unnormalized data. |
|
339 |
-#' |
|
340 |
-#' @examples |
|
341 |
-#' # load small example dataset |
|
342 |
-#' data("tirosh_mel80_example") |
|
343 |
-#' |
|
344 |
-#' # train the classifier |
|
345 |
-#' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A") |
|
346 |
-#' set.seed(123) |
|
347 |
-#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
348 |
-#' marker_genes = selected_marker_genes_B, cell_type = "B cells") |
|
463 |
+#' @param path_to_models path to the folder containing the list of models. |
|
464 |
+#' As default, the pretrained models in the package will be used. |
|
465 |
+#' If user has trained new models, indicate the folder containing |
|
466 |
+#' the new_models.rda file. |
|
467 |
+#' @param zscore boolean, whether gene expression is transformed to zscore |
|
349 | 468 |
#' |
350 |
-#' # test the classifier, target cell type can be in other formats or |
|
351 |
-#' # alternative cell type that can be considered as the classified cell type |
|
352 |
-#' classifier_b_test <- test_classifier(test_obj = tirosh_mel80_example, |
|
353 |
-#' classifier = classifier_b, target_cell_type = c("B cell")) |
|
354 |
-#' classifier_b_test |
|
469 |
+#' @return result of testing process in form of a list, |
|
470 |
+#' including predicted values, prediction accuracy at a probability threshold, |
|
471 |
+#' and roc curve information. |
|
355 | 472 |
#' |
356 | 473 |
#' @importFrom Seurat GetAssayData |
357 | 474 |
#' |
358 |
-#' @rdname test_classifier |
|
359 |
-setMethod("test_classifier", c("test_obj" = "Seurat", |
|
360 |
- "classifier" = "scAnnotatR"), |
|
361 |
- function(test_obj, classifier, target_cell_type = NULL, |
|
362 |
- parent_classifier = NULL, path_to_models = "default", |
|
363 |
- zscore = TRUE, seurat_tag_slot = "active.ident", |
|
364 |
- seurat_parent_tag_slot = "predicted_cell_type", |
|
365 |
- seurat_assay = 'RNA', seurat_slot = 'counts', ...) { |
|
475 |
+#' @rdname internal |
|
476 |
+test_classifier_seurat <- |
|
477 |
+ function(test_obj, classifier, target_cell_type = NULL, |
|
478 |
+ parent_classifier = NULL, path_to_models = "default", zscore = TRUE, |
|
479 |
+ seurat_tag_slot, seurat_parent_tag_slot = "predicted_cell_type", |
|
480 |
+ seurat_assay, seurat_slot) { |
|
366 | 481 |
. <- fpr <- tpr <- NULL |
367 | 482 |
# convert Seurat object to matrix |
368 | 483 |
mat = Seurat::GetAssayData( |
... | ... |
@@ -382,39 +497,53 @@ setMethod("test_classifier", c("test_obj" = "Seurat", |
382 | 497 |
names(parent_tag) <- colnames(test_obj) |
383 | 498 |
} else parent_tag <- NULL |
384 | 499 |
|
385 |
- return_val <- test_classifier_func(mat, tag, classifier, parent_tag, |
|
500 |
+ return_val <- test_classifier_from_mat(mat, tag, classifier, parent_tag, |
|
386 | 501 |
target_cell_type, parent_classifier, |
387 | 502 |
path_to_models, zscore) |
388 | 503 |
return(return_val) |
389 |
-}) |
|
504 |
+} |
|
390 | 505 |
|
391 |
-#' @inherit test_classifier |
|
506 |
+#' Testing process for SCE object |
|
507 |
+#' |
|
508 |
+#' @description Testing process when test object is of type SCE |
|
392 | 509 |
#' |
510 |
+#' @param test_obj SCE object used for testing |
|
511 |
+#' @param sce_assay name of assay to use in test_object |
|
512 |
+#' @param classifier scAnnotatR classification model |
|
393 | 513 |
#' @param sce_tag_slot string, name of annotation slot |
394 | 514 |
#' indicating cell tag/label in the testing object. |
395 | 515 |
#' Strings indicating cell types are expected in this slot. |
396 |
-#' Default value is "ident". |
|
397 | 516 |
#' Expected values are string (A-Z, a-z, 0-9, no special character accepted) |
398 | 517 |
#' or binary/logical, 0/"no"/F/FALSE: not being new cell type, |
399 | 518 |
#' 1/"yes"/T/TRUE: being new cell type. |
519 |
+#' @param target_cell_type vector indicating other cell types than cell labels |
|
520 |
+#' that can be considered as the main cell type in classifier, |
|
521 |
+#' for example, c("plasma cell", "b cell", "b cells", "activating b cell"). |
|
522 |
+#' Default as NULL. |
|
523 |
+#' @param parent_classifier \code{\link{scAnnotatR}} object |
|
524 |
+#' corresponding to classification model for the parent cell type |
|
400 | 525 |
#' @param sce_parent_tag_slot string, name of tag slot in cell meta data |
401 | 526 |
#' indicating pre-assigned/predicted parent cell type. |
402 |
-#' Default is "predicted_cell_type". |
|
527 |
+#' Default field is "predicted_cell_type". |
|
403 | 528 |
#' The slot must contain only string values. |
404 |
-#' @param sce_assay name of assay to use in \code{\link{SingleCellExperiment}} |
|
405 |
-#' object, defaults to 'logcounts' assay. |
|
406 |
-#' |
|
529 |
+#' @param path_to_models path to the folder containing the list of models. |
|
530 |
+#' As default, the pretrained models in the package will be used. |
|
531 |
+#' If user has trained new models, indicate the folder containing |
|
532 |
+#' the new_models.rda file. |
|
533 |
+#' @param zscore boolean, whether gene expression is transformed to zscore |
|
534 |
+#' |
|
535 |
+#' @return result of testing process in form of a list, |
|
536 |
+#' including predicted values, prediction accuracy at a probability threshold, |
|
537 |
+#' and roc curve information. |
|
538 |
+#' |
|
407 | 539 |
#' @import SingleCellExperiment |
408 | 540 |
#' @importFrom SummarizedExperiment assay |
409 | 541 |
#' |
410 |
-#' @rdname test_classifier |
|
411 |
-setMethod("test_classifier", c("test_obj" = "SingleCellExperiment", |
|
412 |
- "classifier" = "scAnnotatR"), |
|
413 |
- function(test_obj, classifier, target_cell_type = NULL, |
|
414 |
- parent_classifier = NULL, path_to_models = "default", |
|
415 |
- zscore = TRUE, sce_tag_slot = "ident", |
|
416 |
- sce_parent_tag_slot = "predicted_cell_type", |
|
417 |
- sce_assay = 'logcounts', ...) { |
|
542 |
+#' @rdname internal |
|
543 |
+test_classifier_sce <- |
|
544 |
+ function(test_obj, classifier, target_cell_type = NULL, |
|
545 |
+ parent_classifier = NULL, path_to_models = "default", zscore = TRUE, |
|
546 |
+ sce_tag_slot, sce_parent_tag_slot = "predicted_cell_type", sce_assay) { |
|
418 | 547 |
# solve duplication of cell names |
419 | 548 |
colnames(test_obj) <- make.unique(colnames(test_obj), sep = '_') |
420 | 549 |
. <- fpr <- tpr <- NULL |
... | ... |
@@ -430,12 +559,12 @@ setMethod("test_classifier", c("test_obj" = "SingleCellExperiment", |
430 | 559 |
names(parent_tag) <- colnames(test_obj) |
431 | 560 |
} else parent_tag <- NULL |
432 | 561 |
|
433 |
- return_val <- test_classifier_func(mat, tag, classifier, parent_tag, |
|
562 |
+ return_val <- test_classifier_from_mat(mat, tag, classifier, parent_tag, |
|
434 | 563 |
target_cell_type, parent_classifier, |
435 | 564 |
path_to_models, zscore) |
436 | 565 |
|
437 | 566 |
return(return_val) |
438 |
-}) |
|
567 |
+} |
|
439 | 568 |
|
440 | 569 |
#' Run testing process from matrix and tag |
441 | 570 |
#' |
... | ... |
@@ -458,7 +587,7 @@ setMethod("test_classifier", c("test_obj" = "SingleCellExperiment", |
458 | 587 |
#' @return model performance statistics |
459 | 588 |
#' |
460 | 589 |
#' @rdname internal |
461 |
-test_classifier_func <- function(mat, tag, classifier, parent_tag, |
|
590 |
+test_classifier_from_mat <- function(mat, tag, classifier, parent_tag, |
|
462 | 591 |
target_cell_type, parent_classifier, |
463 | 592 |
path_to_models, zscore) { |
464 | 593 |
# target_cell_type check |
... | ... |
@@ -522,11 +651,13 @@ test_classifier_func <- function(mat, tag, classifier, parent_tag, |
522 | 651 |
#' # train a classifier, for ex: B cell |
523 | 652 |
#' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A") |
524 | 653 |
#' set.seed(123) |
525 |
-#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
526 |
-#' marker_genes = selected_marker_genes_B, cell_type = "B cells") |
|
654 |
+#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
655 |
+#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, |
|
656 |
+#' cell_type = "b cells", tag_slot = 'active.ident') |
|
527 | 657 |
#' |
528 |
-#' classifier_b_test <- test_classifier(test_obj = tirosh_mel80_example, |
|
529 |
-#' classifier = classifier_b) |
|
658 |
+#' classifier_b_test <- test_classifier(classifier = classifier_b, |
|
659 |
+#' test_obj = tirosh_mel80_example, assay = 'RNA', slot = 'counts', |
|
660 |
+#' tag_slot = 'active.ident', target_cell_type = c("B cell")) |
|
530 | 661 |
#' |
531 | 662 |
#' # run plot curve on the test result |
532 | 663 |
#' roc_curve <- plot_roc_curve(test_result = classifier_b_test) |
... | ... |
@@ -608,14 +739,16 @@ setGeneric("classify_cells", function(classify_obj, classifiers = NULL, |
608 | 739 |
#' |
609 | 740 |
#' # train the classifier |
610 | 741 |
#' set.seed(123) |
611 |
-#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
612 |
-#' marker_genes = selected_marker_genes_B, cell_type = "B cells") |
|
742 |
+#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
743 |
+#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, |
|
744 |
+#' cell_type = "b cells", tag_slot = 'active.ident') |
|
613 | 745 |
#' |
614 | 746 |
#' # do the same thing with other cell types, for example, T cells |
615 | 747 |
#' selected_marker_genes_T = c("CD4", "CD8A", "CD8B") |
616 | 748 |
#' set.seed(123) |
617 |
-#' classifier_t <- train_classifier(train_obj = tirosh_mel80_example, |
|
618 |
-#' marker_genes = selected_marker_genes_T, cell_type = "T cells") |
|
749 |
+#' classifier_t <- train_classifier(train_obj = tirosh_mel80_example, |
|
750 |
+#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_T, |
|
751 |
+#' cell_type = "T cells", tag_slot = 'active.ident') |
|
619 | 752 |
#' |
620 | 753 |
#' # create a list of classifiers |
621 | 754 |
#' classifier_ls <- list(classifier_b, classifier_t) |
... | ... |
@@ -168,17 +168,8 @@ select_marker_genes <- function(mat, marker_genes) { |
168 | 168 |
#' |
169 | 169 |
#' @return list of adjusted tag |
170 | 170 |
#' @rdname internal |
171 |
-setGeneric("check_parent_child_coherence", |
|
172 |
- function(mat, tag, pos_parent, parent_cell, cell_type, |
|
173 |
- target_cell_type) |
|
174 |
- standardGeneric("check_parent_child_coherence")) |
|
175 |
- |
|
176 |
-#' @inherit check_parent_child_coherence |
|
177 |
-#' |
|
178 |
-#' @rdname internal |
|
179 |
-setMethod("check_parent_child_coherence", c("mat" = "dgCMatrix", 'tag' = 'vector'), |
|
180 |
- function(mat, tag, pos_parent, parent_cell, cell_type, |
|
181 |
- target_cell_type) { |
|
171 |
+check_parent_child_coherence <- function(mat, tag, pos_parent, parent_cell, |
|
172 |
+ cell_type, target_cell_type) { |
|
182 | 173 |
pos.val <- c(1, "yes", TRUE) |
183 | 174 |
|
184 | 175 |
# prepare (sub) cell type tag |
... | ... |
@@ -205,7 +196,7 @@ setMethod("check_parent_child_coherence", c("mat" = "dgCMatrix", 'tag' = 'vector |
205 | 196 |
#SummarizedExperiment::colData(obj)[, tag_slot] <- new.tag_slot |
206 | 197 |
|
207 | 198 |
return(new_tag) |
208 |
-}) |
|
199 |
+} |
|
209 | 200 |
|
210 | 201 |
#' Filter cells from ambiguous chars and non applicable cells |
211 | 202 |
#' Ambiguous characters includes: "/", ",", "-", "+", ".", "and", |
... | ... |
@@ -216,33 +207,26 @@ setMethod("check_parent_child_coherence", c("mat" = "dgCMatrix", 'tag' = 'vector |
216 | 207 |
#' |
217 | 208 |
#' @return filtered matrix and corresponding tag |
218 | 209 |
#' @rdname internal |
219 |
-setGeneric("filter_cells", function(mat, tag) |
|
220 |
- standardGeneric("filter_cells")) |
|
221 |
- |
|
222 |
-#' @inherit filter_cells |
|
223 |
-#' |
|
224 |
-#' @rdname internal |
|
225 |
-setMethod("filter_cells", c("mat" = "dgCMatrix", "tag" = "vector"), |
|
226 |
- function(mat, tag) { |
|
227 |
- # define characters usually included in ambiguous cell types |
|
228 |
- # this is to avoid considering ambiguous cell types as negative cell_type |
|
229 |
- ambiguous.chars <- c("/", ",", " -", " [+]", "[.]", " and ", |
|
230 |
- " or ", "_or_", "-or-", "[(]" ,"[)]", "ambiguous") |
|
231 |
- |
|
232 |
- # only eliminate cell labels containing cell_type and ambiguous.chars |
|
233 |
- ambiguous <- grepl(paste(ambiguous.chars, collapse="|"), tag) |
|
234 |
- n.applicable <- (grepl("not applicable", tag) | is.na(tag)) |
|
235 |
- |
|
236 |
- if (any(ambiguous)) |
|
237 |
- warning('Cell types containing "/", ",", "-", "+", ".", "and", "or", "(", ")", and "ambiguous" are considered as ambiguous. They are removed from training and testing.\n', |
|
238 |
- call. = FALSE, immediate. = TRUE) |
|
239 |
- #obj <- obj[, !(ambiguous | n.applicable)] |
|
240 |
- mat <- mat[, !(ambiguous | n.applicable), drop = FALSE] |
|
241 |
- tag <- tag[!(ambiguous | n.applicable)] |
|
242 |
- |
|
243 |
- filtered <- list('mat' = mat, 'tag' = tag) |
|
244 |
- return(filtered) |
|
245 |
- }) |
|
210 |
+filter_cells <- function(mat, tag) { |
|
211 |
+ # define characters usually included in ambiguous cell types |
|
212 |
+ # this is to avoid considering ambiguous cell types as negative cell_type |
|
213 |
+ ambiguous.chars <- c("/", ",", " -", " [+]", "[.]", " and ", |
|
214 |
+ " or ", "_or_", "-or-", "[(]" ,"[)]", "ambiguous") |
|
215 |
+ |
|
216 |
+ # only eliminate cell labels containing cell_type and ambiguous.chars |
|
217 |
+ ambiguous <- grepl(paste(ambiguous.chars, collapse="|"), tag) |
|
218 |
+ n.applicable <- (grepl("not applicable", tag) | is.na(tag)) |
|
219 |
+ |
|
220 |
+ if (any(ambiguous)) |
|
221 |
+ warning('Cell types containing "/", ",", "-", "+", ".", "and", "or", "(", ")", and "ambiguous" are considered as ambiguous. They are removed from training and testing.\n', |
|
222 |
+ call. = FALSE, immediate. = TRUE) |
|
223 |
+ #obj <- obj[, !(ambiguous | n.applicable)] |
|
224 |
+ mat <- mat[, !(ambiguous | n.applicable), drop = FALSE] |
|
225 |
+ tag <- tag[!(ambiguous | n.applicable)] |
|
226 |
+ |
|
227 |
+ filtered <- list('mat' = mat, 'tag' = tag) |
|
228 |
+ return(filtered) |
|
229 |
+} |
|
246 | 230 |
|
247 | 231 |
#' Construct tag vector |
248 | 232 |
#' |
... | ... |
@@ -254,25 +238,17 @@ setMethod("filter_cells", c("mat" = "dgCMatrix", "tag" = "vector"), |
254 | 238 |
#' @return a binary vector for cell tag |
255 | 239 |
#' |
256 | 240 |
#' @rdname internal |
257 |
-setGeneric("construct_tag_vect", |
|
258 |
- function(tag, cell_type) |
|
259 |
- standardGeneric("construct_tag_vect")) |
|
260 |
- |
|
261 |
-#' @inherit construct_tag_vect |
|
262 |
-#' |
|
263 |
-#' @rdname internal |
|
264 |
-setMethod("construct_tag_vect", c("tag" = "vector"), |
|
265 |
- function(tag, cell_type) { |
|
266 |
- pos.val <- c(1, "yes", TRUE) |
|
267 |
- |
|
268 |
- # x <- SummarizedExperiment::colData(obj)[, tag_slot] |
|
269 |
- test <- (tag %in% pos.val) | (tolower(tag) %in% tolower(cell_type)) |
|
270 |
- new_tag <- ifelse(test, "yes", "no") |
|
271 |
- |
|
272 |
- named_tag = setNames(new_tag, names(tag)) |
|
273 |
- |
|
274 |
- return(named_tag) |
|
275 |
- }) |
|
241 |
+construct_tag_vect <- function(tag, cell_type) { |
|
242 |
+ pos.val <- c(1, "yes", TRUE) |
|
243 |
+ |
|
244 |
+ # x <- SummarizedExperiment::colData(obj)[, tag_slot] |
|
245 |
+ test <- (tag %in% pos.val) | (tolower(tag) %in% tolower(cell_type)) |
|
246 |
+ new_tag <- ifelse(test, "yes", "no") |
|
247 |
+ |
|
248 |
+ named_tag = setNames(new_tag, names(tag)) |
|
249 |
+ |
|
250 |
+ return(named_tag) |
|
251 |
+} |
|
276 | 252 |
|
277 | 253 |
#' Process parent classifier |
278 | 254 |
#' |
... | ... |
@@ -292,17 +268,8 @@ setMethod("construct_tag_vect", c("tag" = "vector"), |
292 | 268 |
#' @import dplyr |
293 | 269 |
#' |
294 | 270 |
#' @rdname internal |
295 |
-setGeneric("process_parent_classifier", |
|
296 |
- function(mat, parent_tag, parent_cell_type, parent_classifier, |
|
297 |
- path_to_models, zscore = TRUE) |
|
298 |
- standardGeneric("process_parent_classifier")) |
|
299 |
- |
|
300 |
-#' @inherit process_parent_classifier |
|
301 |
-#' |
|
302 |
-#' @rdname internal |
|
303 |
-setMethod("process_parent_classifier", c("mat" = "dgCMatrix"), |
|
304 |
- function(mat, parent_tag, parent_cell_type, parent_classifier, |
|
305 |
- path_to_models, zscore = TRUE) { |
|
271 |
+process_parent_classifier <- function(mat, parent_tag, parent_cell_type, |
|
272 |
+ parent_classifier, path_to_models, zscore) { |
|
306 | 273 |
pos_parent <- parent.classifier <- . <- model_list <- NULL |
307 | 274 |
|
308 | 275 |
if (is.na(parent_cell_type) && !is.null(parent_classifier)) |
... | ... |
@@ -368,7 +335,7 @@ setMethod("process_parent_classifier", c("mat" = "dgCMatrix"), |
368 | 335 |
return_val <- list('pos_parent' = pos_parent, 'parent_cell'= parent_cell_type, |
369 | 336 |
'parent.classifier' = parent.classifier, 'model_list' = model_list) |
370 | 337 |
return(return_val) |
371 |
-}) |
|
338 |
+} |
|
372 | 339 |
|
373 | 340 |
#' Make prediction |
374 | 341 |
#' |
... | ... |
@@ -22,8 +22,9 @@ |
22 | 22 |
#' # train classifier |
23 | 23 |
#' selected_marker_genes_T = c("CD4", "CD8A", "CD8B") |
24 | 24 |
#' set.seed(123) |
25 |
-#' classifier_t <- train_classifier(train_obj = tirosh_mel80_example, |
|
26 |
-#' marker_genes = selected_marker_genes_T, cell_type = "t cells") |
|
25 |
+#' classifier_t <- train_classifier(train_obj = tirosh_mel80_example, |
|
26 |
+#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_T, |
|
27 |
+#' cell_type = "t cells", tag_slot = 'active.ident') |
|
27 | 28 |
#' |
28 | 29 |
#' # save the trained classifier to system |
29 | 30 |
#' # test classifier can be used before this step |
... | ... |
@@ -150,8 +151,9 @@ plant_tree <- function(path_to_models = "default") { |
150 | 151 |
#' # train a classifier |
151 | 152 |
#' set.seed(123) |
152 | 153 |
#' selected_marker_genes_T = c("CD4", "CD8A", "CD8B") |
153 |
-#' classifier_t <- train_classifier(train_obj = tirosh_mel80_example, |
|
154 |
-#' marker_genes = selected_marker_genes_T, cell_type = "t cells") |
|
154 |
+#' classifier_t <- train_classifier(train_obj = tirosh_mel80_example, |
|
155 |
+#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_T, |
|
156 |
+#' cell_type = "t cells", tag_slot = 'active.ident') |
|
155 | 157 |
#' |
156 | 158 |
#' # save a classifier to system |
157 | 159 |
#' save_new_model(new_model = classifier_t, path_to_models = tempdir()) |
... | ... |
@@ -20,8 +20,9 @@ Returns the caret model of the \code{\link{scAnnotatR}} object |
20 | 20 |
data("tirosh_mel80_example") |
21 | 21 |
selected_marker_genes_B = c("CD19", "MS4A1", "CD79A") |
22 | 22 |
set.seed(123) |
23 |
-classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
24 |
-marker_genes = selected_marker_genes_B, cell_type = "B cells") |
|
23 |
+classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
24 |
+assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, |
|
25 |
+cell_type = "B cells", tag_slot = 'active.ident') |
|
25 | 26 |
caret_model(classifier_b) |
26 | 27 |
|
27 | 28 |
} |
... | ... |
@@ -27,14 +27,16 @@ Returns the cell type for the given classifier. |
27 | 27 |
data("tirosh_mel80_example") |
28 | 28 |
selected_marker_genes_B = c("CD19", "MS4A1", "CD79A") |
29 | 29 |
set.seed(123) |
30 |
-classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
31 |
-cell_type = "B cells", marker_genes = selected_marker_genes_B) |
|
30 |
+classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
31 |
+assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, |
|
32 |
+cell_type = "B cells", tag_slot = 'active.ident') |
|
32 | 33 |
cell_type(classifier_b) |
33 | 34 |
|
34 | 35 |
data("tirosh_mel80_example") |
35 | 36 |
selected_marker_genes_B = c("CD19", "MS4A1", "CD79A") |
36 | 37 |
set.seed(123) |
37 |
-classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
38 |
-marker_genes = selected_marker_genes_B, cell_type = "B cells") |
|
38 |
+classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
39 |
+assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, |
|
40 |
+cell_type = "B cells", tag_slot = 'active.ident') |
|
39 | 41 |
cell_type(classifier_b) <- "B cell" |
40 | 42 |
} |
... | ... |
@@ -107,14 +107,16 @@ selected_marker_genes_B = c("CD19", "MS4A1", "CD79A") |
107 | 107 |
|
108 | 108 |
# train the classifier |
109 | 109 |
set.seed(123) |
110 |
-classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
111 |
-marker_genes = selected_marker_genes_B, cell_type = "B cells") |
|
110 |
+classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
111 |
+assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, |
|
112 |
+cell_type = "b cells", tag_slot = 'active.ident') |
|
112 | 113 |
|
113 | 114 |
# do the same thing with other cell types, for example, T cells |
114 | 115 |
selected_marker_genes_T = c("CD4", "CD8A", "CD8B") |
115 | 116 |
set.seed(123) |
116 |
-classifier_t <- train_classifier(train_obj = tirosh_mel80_example, |
|
117 |
-marker_genes = selected_marker_genes_T, cell_type = "T cells") |
|
117 |
+classifier_t <- train_classifier(train_obj = tirosh_mel80_example, |
|
118 |
+assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_T, |
|
119 |
+cell_type = "T cells", tag_slot = 'active.ident') |
|
118 | 120 |
|
119 | 121 |
# create a list of classifiers |
120 | 122 |
classifier_ls <- list(classifier_b, classifier_t) |
... | ... |
@@ -27,8 +27,9 @@ data("tirosh_mel80_example") |
27 | 27 |
# train a classifier |
28 | 28 |
set.seed(123) |
29 | 29 |
selected_marker_genes_T = c("CD4", "CD8A", "CD8B") |
30 |
-classifier_t <- train_classifier(train_obj = tirosh_mel80_example, |
|
31 |
-marker_genes = selected_marker_genes_T, cell_type = "t cells") |
|
30 |
+classifier_t <- train_classifier(train_obj = tirosh_mel80_example, |
|
31 |
+assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_T, |
|
32 |
+cell_type = "t cells", tag_slot = 'active.ident') |
|
32 | 33 |
|
33 | 34 |
# save a classifier to system |
34 | 35 |
save_new_model(new_model = classifier_t, path_to_models = tempdir()) |
... | ... |
@@ -13,20 +13,20 @@ |
13 | 13 |
\alias{caret_model<-,scAnnotatR-method} |
14 | 14 |
\alias{marker_genes<-} |
15 | 15 |
\alias{marker_genes<-,scAnnotatR-method} |
16 |
-\alias{train_classifier_func} |
|
17 |
-\alias{test_classifier_func} |
|
16 |
+\alias{train_classifier_seurat} |
|
17 |
+\alias{train_classifier_sce} |
|
18 |
+\alias{train_classifier_from_mat} |
|
19 |
+\alias{test_classifier_seurat} |
|
20 |
+\alias{test_classifier_sce} |
|
21 |
+\alias{test_classifier_from_mat} |
|
18 | 22 |
\alias{balance_dataset} |
19 | 23 |
\alias{train_func} |
20 | 24 |
\alias{transform_to_zscore} |
21 | 25 |
\alias{select_marker_genes} |
22 | 26 |
\alias{check_parent_child_coherence} |
23 |
-\alias{check_parent_child_coherence,dgCMatrix,vector-method} |
|
24 | 27 |
\alias{filter_cells} |
25 |
-\alias{filter_cells,dgCMatrix,vector-method} |
|
26 | 28 |
\alias{construct_tag_vect} |
27 |
-\alias{construct_tag_vect,vector-method} |
|
28 | 29 |
\alias{process_parent_classifier} |
29 |
-\alias{process_parent_classifier,dgCMatrix-method} |
|
30 | 30 |
\alias{make_prediction} |
31 | 31 |
\alias{simplify_prediction} |
32 | 32 |
\alias{verify_parent} |
... | ... |
@@ -60,7 +60,34 @@ marker_genes(classifier) <- value |
60 | 60 |
|
61 | 61 |
\S4method{marker_genes}{scAnnotatR}(classifier) <- value |
62 | 62 |
|
63 |
-train_classifier_func( |
|
63 |
+train_classifier_seurat( |
|
64 |
+ train_obj, |
|
65 |
+ cell_type, |
|
66 |
+ marker_genes, |
|
67 |
+ parent_cell = NA_character_, |
|
68 |
+ parent_classifier = NULL, |
|
69 |
+ path_to_models = "default", |
|
70 |
+ zscore = TRUE, |
|
71 |
+ seurat_tag_slot, |
|
72 |
+ seurat_parent_tag_slot = "predicted_cell_type", |
|
73 |
+ seurat_assay, |
|
74 |
+ seurat_slot |
|
75 |
+) |
|
76 |
+ |
|
77 |
+train_classifier_sce( |
|
78 |
+ train_obj, |
|
79 |
+ cell_type, |
|
80 |
+ marker_genes, |
|
81 |
+ parent_cell = NA_character_, |
|
82 |
+ parent_classifier = NULL, |
|
83 |
+ path_to_models = "default", |
|
84 |
+ zscore = TRUE, |
|
85 |
+ sce_tag_slot, |
|
86 |
+ sce_parent_tag_slot = "predicted_cell_type", |
|
87 |
+ sce_assay |
|
88 |
+) |
|
89 |
+ |
|
90 |
+train_classifier_from_mat( |
|
64 | 91 |
mat, |
65 | 92 |
tag, |
66 | 93 |
cell_type, |
... | ... |
@@ -72,7 +99,32 @@ train_classifier_func( |
72 | 99 |
zscore |
73 | 100 |
) |
74 | 101 |
|
75 |
-test_classifier_func( |
|
102 |
+test_classifier_seurat( |
|
103 |
+ test_obj, |
|
104 |
+ classifier, |
|
105 |
+ target_cell_type = NULL, |
|
106 |
+ parent_classifier = NULL, |
|
107 |
+ path_to_models = "default", |
|
108 |
+ zscore = TRUE, |
|
109 |
+ seurat_tag_slot, |
|
110 |
+ seurat_parent_tag_slot = "predicted_cell_type", |
|
111 |
+ seurat_assay, |
|
112 |
+ seurat_slot |
|
113 |
+) |
|
114 |
+ |
|
115 |
+test_classifier_sce( |
|
116 |
+ test_obj, |
|
117 |
+ classifier, |
|
118 |
+ target_cell_type = NULL, |
|
119 |
+ parent_classifier = NULL, |
|
120 |
+ path_to_models = "default", |
|
121 |
+ zscore = TRUE, |
|
122 |
+ sce_tag_slot, |
|
123 |
+ sce_parent_tag_slot = "predicted_cell_type", |
|
124 |
+ sce_assay |
|
125 |
+) |
|
126 |
+ |
|
127 |
+test_classifier_from_mat( |
|
76 | 128 |
mat, |
77 | 129 |
tag, |
78 | 130 |
classifier, |
... | ... |
@@ -100,39 +152,17 @@ check_parent_child_coherence( |
100 | 152 |
target_cell_type |
101 | 153 |
) |
102 | 154 |
|
103 |
-\S4method{check_parent_child_coherence}{dgCMatrix,vector}( |
|
104 |
- mat, |
|
105 |
- tag, |
|
106 |
- pos_parent, |
|
107 |
- parent_cell, |
|
108 |
- cell_type, |
|
109 |
- target_cell_type |
|
110 |
-) |
|
111 |
- |
|
112 | 155 |
filter_cells(mat, tag) |
113 | 156 |
|
114 |
-\S4method{filter_cells}{dgCMatrix,vector}(mat, tag) |
|
115 |
- |
|
116 | 157 |
construct_tag_vect(tag, cell_type) |
117 | 158 |
|
118 |
-\S4method{construct_tag_vect}{vector}(tag, cell_type) |
|
119 |
- |
|
120 | 159 |
process_parent_classifier( |
121 | 160 |
mat, |
122 | 161 |
parent_tag, |
123 | 162 |
parent_cell_type, |
124 | 163 |
parent_classifier, |
125 | 164 |
path_to_models, |
126 |
- zscore = TRUE |
|
127 |
-) |
|
128 |
- |
|
129 |
-\S4method{process_parent_classifier}{dgCMatrix}( |
|
130 |
- mat, |
|
131 |
- parent_tag, |
|
132 |
- parent_cell_type, |
|
133 |
- parent_classifier, |
|
134 |
- path_to_models, |
|
135 |
- zscore = TRUE |
|
165 |
+ zscore |
|
136 | 166 |
) |
137 | 167 |
|
138 | 168 |
make_prediction(mat, classifier, pred_cells, ignore_ambiguous_result = TRUE) |
... | ... |
@@ -166,12 +196,7 @@ download_data_file(verbose = FALSE) |
166 | 196 |
|
167 | 197 |
\item{value}{the new classifier} |
168 | 198 |
|
169 |
-\item{mat}{expression matrix} |
|
170 |
- |
|
171 |
-\item{tag}{tag of data} |
|
172 |
- |
|
173 |
-\item{parent_tag}{vector, named list indicating pre-assigned/predicted |
|
174 |
-parent cell type} |
|
199 |
+\item{train_obj}{SCE object} |
|
175 | 200 |
|
176 | 201 |
\item{parent_cell}{name of parent cell type} |
177 | 202 |
|
... | ... |
@@ -183,6 +208,46 @@ to classification model for the parent cell type} |
183 | 208 |
\item{zscore}{boolean indicating the transformation of gene expression |
184 | 209 |
in object to zscore or not} |
185 | 210 |
|
211 |
+\item{seurat_tag_slot}{string, name of annotation slot |
|
212 |
+indicating cell tag/label in the testing object. |
|
213 |
+Strings indicating cell types are expected in this slot. |
|
214 |
+Expected values are string (A-Z, a-z, 0-9, no special character accepted) |
|
215 |
+or binary/logical, 0/"no"/F/FALSE: not being new cell type, |
|
216 |
+1/"yes"/T/TRUE: being new cell type.} |
|
217 |
+ |
|
218 |
+\item{seurat_parent_tag_slot}{string, name of tag slot in cell meta data |
|
219 |
+indicating pre-assigned/predicted parent cell type. |
|
220 |
+Default field is "predicted_cell_type". |
|
221 |
+The slot must contain only string values.} |
|
222 |
+ |
|
223 |
+\item{seurat_assay}{name of assay to use in test_object} |
|
224 |
+ |
|
225 |
+\item{seurat_slot}{type of expression data to use in test_object. |
|
226 |
+For Seurat object, some available types are: "counts", "data" and "scale.data".} |
|
227 |
+ |
|
228 |
+\item{sce_tag_slot}{string, name of annotation slot |
|
229 |
+indicating cell tag/label in the testing object. |
|
230 |
+Strings indicating cell types are expected in this slot. |
|
231 |
+Expected values are string (A-Z, a-z, 0-9, no special character accepted) |
|
232 |
+or binary/logical, 0/"no"/F/FALSE: not being new cell type, |
|
233 |
+1/"yes"/T/TRUE: being new cell type.} |
|
234 |
+ |
|
235 |
+\item{sce_parent_tag_slot}{string, name of tag slot in cell meta data |
|
236 |
+indicating pre-assigned/predicted parent cell type. |
|
237 |
+Default field is "predicted_cell_type". |
|
238 |
+The slot must contain only string values.} |
|
239 |
+ |
|
240 |
+\item{sce_assay}{name of assay to use in test_object} |
|
241 |
+ |
|
242 |
+\item{mat}{expression matrix} |
|
243 |
+ |
|
244 |
+\item{tag}{tag of data} |
|
245 |
+ |
|
246 |
+\item{parent_tag}{vector, named list indicating pre-assigned/predicted |
|
247 |
+parent cell type} |
|
248 |
+ |
|
249 |
+\item{test_obj}{SCE object used for testing} |
|
250 |
+ |
|
186 | 251 |
\item{target_cell_type}{alternative cell types (in case of testing classifier)} |
187 | 252 |
|
188 | 253 |
\item{pos_parent}{a vector indicating parent classifier prediction} |
... | ... |
@@ -230,8 +295,20 @@ the classifier with the new marker genes |
230 | 295 |
|
231 | 296 |
scAnnotatR object with the new marker genes. |
232 | 297 |
|
298 |
+\code{\link{scAnnotatR}} object |
|
299 |
+ |
|
300 |
+\code{\link{scAnnotatR}} object |
|
301 |
+ |
|
233 | 302 |
caret trained model |
234 | 303 |
|
304 |
+result of testing process in form of a list, |
|
305 |
+including predicted values, prediction accuracy at a probability threshold, |
|
306 |
+and roc curve information. |
|
307 |
+ |
|
308 |
+result of testing process in form of a list, |
|
309 |
+including predicted values, prediction accuracy at a probability threshold, |
|
310 |
+and roc curve information. |
|
311 |
+ |
|
235 | 312 |
model performance statistics |
236 | 313 |
|
237 | 314 |
a list of balanced count matrix |
... | ... |
@@ -266,10 +343,22 @@ path to the downloaded file in cache |
266 | 343 |
\description{ |
267 | 344 |
Check if a scAnnotatR object is valid |
268 | 345 |
|
346 |
+Train a classifier for a new cell type |
|
347 |
+If cell type has a parent, only available for \code{\link{scAnnotatR}} |
|
348 |
+object as parent cell classifying model. |
|
349 |
+ |
|
350 |
+Train a classifier for a new cell type |
|
351 |
+If cell type has a parent, only available for \code{\link{scAnnotatR}} |
|
352 |
+object as parent cell classifying model. |
|
353 |
+ |
|
269 | 354 |
Train a classifier for a new cell type from expression matrix |
270 | 355 |
and tag |
271 | 356 |
If cell type has a parent, only available for \code{\link{scAnnotatR}} |
272 | 357 |
object as parent cell classifying model. |
273 | 358 |
|
359 |
+Testing process when test object is of type Seurat |
|
360 |
+ |
|
361 |
+Testing process when test object is of type SCE |
|
362 |
+ |
|
274 | 363 |
Testing process from matrix and tag |
275 | 364 |
} |
... | ... |
@@ -19,8 +19,9 @@ Returns the set of marker genes for the given classifier. |
19 | 19 |
data("tirosh_mel80_example") |
20 | 20 |
selected_marker_genes_B = c("CD19", "MS4A1", "CD79A") |
21 | 21 |
set.seed(123) |
22 |
-classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
23 |
-marker_genes = selected_marker_genes_B, cell_type = "B cells") |
|
22 |
+classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
23 |
+assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, |
|
24 |
+cell_type = "B cells", tag_slot = 'active.ident') |
|
24 | 25 |
marker_genes(classifier_b) |
25 | 26 |
|
26 | 27 |
} |
... | ... |
@@ -27,17 +27,20 @@ Returns the probability threshold for the given classifier. |
27 | 27 |
data("tirosh_mel80_example") |
28 | 28 |
selected_marker_genes_B = c("CD19", "MS4A1", "CD79A") |
29 | 29 |
set.seed(123) |
30 |
-classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
31 |
-marker_genes = selected_marker_genes_B, cell_type = "B cells") |
|
30 |
+classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
31 |
+assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, |
|
32 |
+cell_type = "B cells", tag_slot = 'active.ident') |
|
32 | 33 |
p_thres(classifier_b) |
33 | 34 |
|
34 | 35 |
data("tirosh_mel80_example") |
35 | 36 |
selected_marker_genes_B = c("CD19", "MS4A1", "CD79A") |
36 | 37 |
set.seed(123) |
37 |
-classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
38 |
-marker_genes = selected_marker_genes_B, cell_type = "B cells") |
|
39 |
-classifier_b_test <- test_classifier(test_obj = tirosh_mel80_example, |
|
40 |
-classifier = classifier_b) |
|
38 |
+classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
39 |
+assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, |
|
40 |
+cell_type = "B cells", tag_slot = 'active.ident') |
|
41 |
+classifier_b_test <- test_classifier(classifier = classifier_b, |
|
42 |
+test_obj = tirosh_mel80_example, assay = 'RNA', slot = 'counts', |
|
43 |
+tag_slot = 'active.ident') |
|
41 | 44 |
# assign a new threhold probability for prediction |
42 | 45 |
p_thres(classifier_b) <- 0.4 |
43 | 46 |
} |
... | ... |
@@ -19,8 +19,9 @@ Returns the parent of the cell type corresponding to the given classifier. |
19 | 19 |
data("tirosh_mel80_example") |
20 | 20 |
selected_marker_genes_B = c("CD19", "MS4A1", "CD79A") |
21 | 21 |
set.seed(123) |
22 |
-classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
23 |
-marker_genes = selected_marker_genes_B, cell_type = "B cells") |
|
22 |
+classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
23 |
+assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, |
|
24 |
+cell_type = "B cells", tag_slot = 'active.ident') |
|
24 | 25 |
parent(classifier_b) |
25 | 26 |
|
26 | 27 |
} |
... | ... |
@@ -22,11 +22,13 @@ data("tirosh_mel80_example") |
22 | 22 |
# train a classifier, for ex: B cell |
23 | 23 |
selected_marker_genes_B = c("CD19", "MS4A1", "CD79A") |
24 | 24 |
set.seed(123) |
25 |
-classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
26 |
-marker_genes = selected_marker_genes_B, cell_type = "B cells") |
|
25 |
+classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
26 |
+assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, |
|
27 |
+cell_type = "b cells", tag_slot = 'active.ident') |
|
27 | 28 |
|
28 |
-classifier_b_test <- test_classifier(test_obj = tirosh_mel80_example, |
|
29 |
-classifier = classifier_b) |
|
29 |
+classifier_b_test <- test_classifier(classifier = classifier_b, |
|
30 |
+test_obj = tirosh_mel80_example, assay = 'RNA', slot = 'counts', |
|
31 |
+tag_slot = 'active.ident', target_cell_type = c("B cell")) |
|
30 | 32 |
|
31 | 33 |
# run plot curve on the test result |
32 | 34 |
roc_curve <- plot_roc_curve(test_result = classifier_b_test) |
... | ... |
@@ -33,8 +33,9 @@ data("tirosh_mel80_example") |
33 | 33 |
# train classifier |
34 | 34 |
selected_marker_genes_T = c("CD4", "CD8A", "CD8B") |
35 | 35 |
set.seed(123) |
36 |
-classifier_t <- train_classifier(train_obj = tirosh_mel80_example, |
|
37 |
-marker_genes = selected_marker_genes_T, cell_type = "t cells") |
|
36 |
+classifier_t <- train_classifier(train_obj = tirosh_mel80_example, |
|
37 |
+assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_T, |
|
38 |
+cell_type = "t cells", tag_slot = 'active.ident') |
|
38 | 39 |
|
39 | 40 |
# save the trained classifier to system |
40 | 41 |
# test classifier can be used before this step |
... | ... |
@@ -50,9 +50,9 @@ data("tirosh_mel80_example") |
50 | 50 |
# train a classifier, for ex: B cell |
51 | 51 |
selected_marker_genes_B = c("CD19", "MS4A1", "CD79A") |
52 | 52 |
set.seed(123) |
53 |
-classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
54 |
- marker_genes = selected_marker_genes_B, |
|
55 |
- cell_type = "B cells") |
|
53 |
+classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
54 |
+assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, |
|
55 |
+cell_type = "B cells", tag_slot = 'active.ident') |
|
56 | 56 |
|
57 | 57 |
classifier_b |
58 | 58 |
} |
... | ... |
@@ -19,8 +19,9 @@ Show object |
19 | 19 |
data("tirosh_mel80_example") |
20 | 20 |
selected_marker_genes_B = c("CD19", "MS4A1", "CD79A") |
21 | 21 |
set.seed(123) |
22 |
-classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
23 |
-marker_genes = selected_marker_genes_B, cell_type = "B cells") |
|
22 |
+classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
23 |
+assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, |
|
24 |
+cell_type = "B cells", tag_slot = 'active.ident') |
|
24 | 25 |
classifier_b |
25 | 26 |
|
26 | 27 |
} |
... | ... |
@@ -2,51 +2,52 @@ |
2 | 2 |
% Please edit documentation in R/classifier.R |
3 | 3 |
\name{test_classifier} |
4 | 4 |
\alias{test_classifier} |
5 |
-\alias{test_classifier,Seurat,scAnnotatR-method} |
|
6 |
-\alias{test_classifier,SingleCellExperiment,scAnnotatR-method} |
|
5 |
+\alias{test_classifier,scAnnotatR-method} |
|
7 | 6 |
\title{Testing process.} |
8 | 7 |
\usage{ |
9 | 8 |
test_classifier( |
10 |
- test_obj, |
|
11 | 9 |
classifier, |
12 |
- target_cell_type = NULL, |
|
13 |
- parent_classifier = NULL, |
|
14 |
- path_to_models = "default", |
|
15 |
- zscore = TRUE, |
|
16 |
- ... |
|
17 |
-) |
|
18 |
- |
|
19 |
-\S4method{test_classifier}{Seurat,scAnnotatR}( |
|
20 | 10 |
test_obj, |
21 |
- classifier, |
|
11 |
+ assay, |
|
12 |
+ slot = NULL, |
|
13 |
+ tag_slot, |
|
22 | 14 |
target_cell_type = NULL, |
23 | 15 |
parent_classifier = NULL, |
16 |
+ parent_tag_slot = "predicted_cell_type", |
|
24 | 17 |
path_to_models = "default", |
25 |
- zscore = TRUE, |
|
26 |
- seurat_tag_slot = "active.ident", |
|
27 |
- seurat_parent_tag_slot = "predicted_cell_type", |
|
28 |
- seurat_assay = "RNA", |
|
29 |
- seurat_slot = "counts", |
|
30 |
- ... |
|
18 |
+ zscore = TRUE |
|
31 | 19 |
) |
32 | 20 |
|
33 |
-\S4method{test_classifier}{SingleCellExperiment,scAnnotatR}( |
|
34 |
- test_obj, |
|
21 |
+\S4method{test_classifier}{scAnnotatR}( |
|
35 | 22 |
classifier, |
23 |
+ test_obj, |
|
24 |
+ assay, |
|
25 |
+ slot = NULL, |
|
26 |
+ tag_slot, |
|
36 | 27 |
target_cell_type = NULL, |
37 | 28 |
parent_classifier = NULL, |
29 |
+ parent_tag_slot = "predicted_cell_type", |
|
38 | 30 |
path_to_models = "default", |
39 |
- zscore = TRUE, |
|
40 |
- sce_tag_slot = "ident", |
|
41 |
- sce_parent_tag_slot = "predicted_cell_type", |
|
42 |
- sce_assay = "logcounts", |
|
43 |
- ... |
|
31 |
+ zscore = TRUE |
|
44 | 32 |
) |
45 | 33 |
} |
46 | 34 |
\arguments{ |
47 |
-\item{test_obj}{xxobject that can be used for testing} |
|
35 |
+\item{classifier}{scAnnotatR classification model} |
|
36 |
+ |
|
37 |
+\item{test_obj}{object that can be used for testing} |
|
38 |
+ |
|
39 |
+\item{assay}{name of assay to use in test_object} |
|
48 | 40 |
|
49 |
-\item{classifier}{classification model} |
|
41 |
+\item{slot}{type of expression data to use in test_object. |
|
42 |
+For Seurat object, some available types are: "counts", "data" and "scale.data". |
|
43 |
+Ignore this if test_obj is \code{\link{SingleCellExperiment}} object.} |
|
44 |
+ |
|
45 |
+\item{tag_slot}{string, name of annotation slot |
|
46 |
+indicating cell tag/label in the testing object. |
|
47 |
+Strings indicating cell types are expected in this slot. |
|
48 |
+Expected values are string (A-Z, a-z, 0-9, no special character accepted) |
|
49 |
+or binary/logical, 0/"no"/F/FALSE: not being new cell type, |
|
50 |
+1/"yes"/T/TRUE: being new cell type.} |
|
50 | 51 |
|
51 | 52 |
\item{target_cell_type}{vector indicating other cell types than cell labels |
52 | 53 |
that can be considered as the main cell type in classifier, |
... | ... |
@@ -56,51 +57,17 @@ Default as NULL.} |
56 | 57 |
\item{parent_classifier}{\code{\link{scAnnotatR}} object |
57 | 58 |
corresponding to classification model for the parent cell type} |
58 | 59 |
|
60 |
+\item{parent_tag_slot}{string, name of tag slot in cell meta data |
|
61 |
+indicating pre-assigned/predicted parent cell type. |
|
62 |
+Default field is "predicted_cell_type". |
|
63 |
+The slot must contain only string values.} |
|
64 |
+ |
|
59 | 65 |
\item{path_to_models}{path to the folder containing the list of models. |
60 | 66 |
As default, the pretrained models in the package will be used. |
61 | 67 |
If user has trained new models, indicate the folder containing |
62 | 68 |
the new_models.rda file.} |
63 | 69 |
|
64 | 70 |
\item{zscore}{boolean, whether gene expression is transformed to zscore} |
65 |
- |
|
66 |
-\item{...}{arguments passed to other methods} |
|
67 |
- |
|
68 |
-\item{seurat_tag_slot}{string, name of annotation slot |
|
69 |
-indicating cell tag/label in the testing object. |
|
70 |
-Strings indicating cell types are expected in this slot. |
|
71 |
-For \code{\link{Seurat}} object, default value is "active.ident". |
|
72 |
-Expected values are string (A-Z, a-z, 0-9, no special character accepted) |
|
73 |
-or binary/logical, 0/"no"/F/FALSE: not being new cell type, |
|
74 |
-1/"yes"/T/TRUE: being new cell type.} |
|
75 |
- |
|
76 |
-\item{seurat_parent_tag_slot}{string, name of tag slot in cell meta data |
|
77 |
-indicating pre-assigned/predicted parent cell type. |
|
78 |
-Default field is "predicted_cell_type". |
|
79 |
-The slot must contain only string values.} |
|
80 |
- |
|
81 |
-\item{seurat_assay}{name of assay to use in |
|
82 |
-\code{\link{Seurat}} object, defaults to 'RNA' assay.} |
|
83 |
- |
|
84 |
-\item{seurat_slot}{type of expression data to use in |
|
85 |
-\code{\link{Seurat}} object. |
|
86 |
-Some available types are: "counts", "data" and "scale.data". |
|
87 |
-Default to "counts", which contains unnormalized data.} |
|
88 |
- |
|
89 |
-\item{sce_tag_slot}{string, name of annotation slot |
|
90 |
-indicating cell tag/label in the testing object. |
|
91 |
-Strings indicating cell types are expected in this slot. |
|
92 |
-Default value is "ident". |
|
93 |
-Expected values are string (A-Z, a-z, 0-9, no special character accepted) |
|
94 |
-or binary/logical, 0/"no"/F/FALSE: not being new cell type, |
|
95 |
-1/"yes"/T/TRUE: being new cell type.} |
|
96 |
- |
|
97 |
-\item{sce_parent_tag_slot}{string, name of tag slot in cell meta data |
|
98 |
-indicating pre-assigned/predicted parent cell type. |
|
99 |
-Default is "predicted_cell_type". |
|
100 |
-The slot must contain only string values.} |
|
101 |
- |
|
102 |
-\item{sce_assay}{name of assay to use in \code{\link{SingleCellExperiment}} |
|
103 |
-object, defaults to 'logcounts' assay.} |
|
104 | 71 |
} |
105 | 72 |
\value{ |
106 | 73 |
result of testing process in form of a list, |
... | ... |
@@ -125,13 +92,15 @@ data("tirosh_mel80_example") |
125 | 92 |
# train the classifier |
126 | 93 |
selected_marker_genes_B = c("CD19", "MS4A1", "CD79A") |
127 | 94 |
set.seed(123) |
128 |
-classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
129 |
-marker_genes = selected_marker_genes_B, cell_type = "B cells") |
|
95 |
+classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
96 |
+assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, |
|
97 |
+cell_type = "b cells", tag_slot = 'active.ident') |
|
130 | 98 |
|
131 | 99 |
# test the classifier, target cell type can be in other formats or |
132 | 100 |
# alternative cell type that can be considered as the classified cell type |
133 |
-classifier_b_test <- test_classifier(test_obj = tirosh_mel80_example, |
|
134 |
-classifier = classifier_b, target_cell_type = c("B cell")) |
|
101 |
+classifier_b_test <- test_classifier(classifier = classifier_b, |
|
102 |
+test_obj = tirosh_mel80_example, assay = 'RNA', slot = 'counts', |
|
103 |
+tag_slot = 'active.ident', target_cell_type = c("B cell")) |
|
135 | 104 |
classifier_b_test |
136 | 105 |
|
137 | 106 |
} |
... | ... |
@@ -2,81 +2,43 @@ |
2 | 2 |
% Please edit documentation in R/classifier.R |
3 | 3 |
\name{train_classifier} |
4 | 4 |
\alias{train_classifier} |
5 |
-\alias{train_classifier,Seurat-method} |
|
6 |
-\alias{train_classifier,SingleCellExperiment-method} |
|
7 | 5 |
\title{Train cell type classifier} |
8 | 6 |
\usage{ |
9 | 7 |
train_classifier( |
10 | 8 |
train_obj, |
9 |
+ assay, |
|
10 |
+ slot = NULL, |
|
11 | 11 |
cell_type, |
12 | 12 |
marker_genes, |
13 |
+ tag_slot, |
|
13 | 14 |
parent_cell = NA_character_, |
15 |
+ parent_tag_slot = "predicted_cell_type", |
|
14 | 16 |
parent_classifier = NULL, |
15 | 17 |
path_to_models = "default", |
16 |
- zscore = TRUE, |
|
17 |
- ... |
|
18 |
-) |
|
19 |
- |
|
20 |
-\S4method{train_classifier}{Seurat}( |
|
21 |
- train_obj, |
|
22 |
- cell_type, |
|
23 |
- marker_genes, |
|
24 |
- parent_cell = NA_character_, |
|
25 |
- parent_classifier = NULL, |
|
26 |
- path_to_models = "default", |
|
27 |
- zscore = TRUE, |
|
28 |
- seurat_tag_slot = "active.ident", |
|
29 |
- seurat_parent_tag_slot = "predicted_cell_type", |
|
30 |
- seurat_assay = "RNA", |
|
31 |
- seurat_slot = "counts", |
|
32 |
- ... |
|
33 |
-) |
|
34 |
- |
|
35 |
-\S4method{train_classifier}{SingleCellExperiment}( |
|
36 |
- train_obj, |
|
37 |
- cell_type, |
|
38 |
- marker_genes, |
|
39 |
- parent_cell = NA_character_, |
|
40 |
- parent_classifier = NULL, |
|
41 |
- path_to_models = "default", |
|
42 |
- zscore = TRUE, |
|
43 |
- sce_tag_slot = "ident", |
|
44 |
- sce_parent_tag_slot = "predicted_cell_type", |
|
45 |
- sce_assay = "logcounts", |
|
46 |
- ... |
|
18 |
+ zscore = TRUE |
|
47 | 19 |
) |
48 | 20 |
} |
49 | 21 |
\arguments{ |
50 | 22 |
\item{train_obj}{object that can be used for training the new model. |
51 | 23 |
\code{\link{Seurat}} object or \code{\link{SingleCellExperiment}} object |
52 |
-is expected. |
|
24 |
+is supported. |
|
53 | 25 |
If the training model has parent, parent_tag_slot may have been indicated. |
54 | 26 |
This field would have been filled out automatically |
55 | 27 |
if user precedently run classify_cells function. |
56 | 28 |
If no (predicted) cell type annotation provided, |
57 | 29 |
the function can be run if 1- parent_cell or 2- parent_classifier is provided.} |
58 | 30 |
|
31 |
+\item{assay}{name of assay to use in training object.} |
|
32 |
+ |
|
33 |
+\item{slot}{type of expression data to use in training object, omitted if |
|
34 |
+train_obj is \code{\link{SingleCellExperiment}} object.} |
|
35 |
+ |
|
59 | 36 |
\item{cell_type}{string indicating the name of the subtype |
60 | 37 |
This must exactly match cell tag/label if cell tag/label is a string.} |
61 | 38 |
|
62 | 39 |
\item{marker_genes}{list of marker genes used for the new training model} |
63 | 40 |
|
64 |
-\item{parent_cell}{string indicated the name of the parent cell type, |
|
65 |
-if parent cell type classifier has already been saved in model database. |
|
66 |
-Adjust path_to_models for exact database.} |
|
67 |
- |
|
68 |
-\item{parent_classifier}{classification model for the parent cell type} |
|
69 |
- |
|
70 |
-\item{path_to_models}{path to the folder containing the model database. |
|
71 |
-As default, the pretrained models in the package will be used. |
|
72 |
-If user has trained new models, indicate the folder containing the |
|
73 |
-new_models.rda file.} |
|
74 |
- |
|
75 |
-\item{zscore}{whether gene expression in train_obj is transformed to zscore} |
|
76 |
- |
|
77 |
-\item{...}{arguments passed to other methods} |
|
78 |
- |
|
79 |
-\item{seurat_tag_slot}{string, name of slot in cell meta data |
|
41 |
+\item{tag_slot}{string, name of slot in cell meta data |
|
80 | 42 |
indicating cell tag/label in the training object. |
81 | 43 |
Strings indicating cell types are expected in this slot. |
82 | 44 |
For \code{\link{Seurat}} object, default value is "active.ident". |
... | ... |
@@ -84,35 +46,24 @@ Expected values are string (A-Z, a-z, 0-9, no special character accepted) |
84 | 46 |
or binary/logical, 0/"no"/F/FALSE: not being new cell type, |
85 | 47 |
1/"yes"/T/TRUE: being new cell type.} |
86 | 48 |
|
87 |
-\item{seurat_parent_tag_slot}{string, name of a slot in cell meta data |
|
49 |
+\item{parent_cell}{string indicated the name of the parent cell type, |
|
50 |
+if parent cell type classifier has already been saved in model database. |
|
51 |
+Adjust path_to_models for exact database.} |
|
52 |
+ |
|
53 |
+\item{parent_tag_slot}{string, name of a slot in cell meta data |
|
88 | 54 |
indicating assigned/predicted cell type. Default is "predicted_cell_type". |
89 | 55 |
This slot would have been filled automatically |
90 | 56 |
if user have called classify_cells function. |
91 | 57 |
The slot must contain only string values.} |
92 | 58 |
|
93 |
-\item{seurat_assay}{name of assay to use in training object. |
|
94 |
-Default to 'RNA' assay.} |
|
95 |
- |
|
96 |
-\item{seurat_slot}{type of expression data to use in training object. |
|
97 |
-For \code{\link{Seurat}} object, available types are: "counts", "data" |
|
98 |
-and "scale.data". Default to "counts", which contains unnormalized data.} |
|
99 |
- |
|
100 |
-\item{sce_tag_slot}{string, name of annotation slot indicating |
|
101 |
-cell tag/label in the training object. |
|
102 |
-For \code{\link{SingleCellExperiment}} object, default value is "ident". |
|
103 |
-Expected values are string (A-Z, a-z, 0-9, no special character accepted) |
|
104 |
-or binary/logical, 0/"no"/F/FALSE: not being new cell type, |
|
105 |
-1/"yes"/T/TRUE: being new cell type.} |
|
59 |
+\item{parent_classifier}{classification model for the parent cell type} |
|
106 | 60 |
|
107 |
-\item{sce_parent_tag_slot}{string, name of a slot in cell meta data |
|
108 |
-indicating pre-assigned/predicted cell type. |
|
109 |
-Default field is "predicted_cell_type". |
|
110 |
-This field would have been filled automatically |
|
111 |
-when user called classify_cells function. |
|
112 |
-The slot must contain only string values.} |
|
61 |
+\item{path_to_models}{path to the folder containing the model database. |
|
62 |
+As default, the pretrained models in the package will be used. |
|
63 |
+If user has trained new models, indicate the folder containing the |
|
64 |
+new_models.rda file.} |
|
113 | 65 |
|
114 |
-\item{sce_assay}{name of assay to use in training object. |
|
115 |
-Default to 'logcounts' assay.} |
|
66 |
+\item{zscore}{whether gene expression in train_obj is transformed to zscore} |
|
116 | 67 |
} |
117 | 68 |
\value{ |
118 | 69 |
\code{\link{scAnnotatR}} object |
... | ... |
@@ -143,8 +94,9 @@ selected_marker_genes_B = c("CD19", "MS4A1", "CD79A") |
143 | 94 |
# train the classifier, the "cell_type" argument must match |
144 | 95 |
# the cell labels in the data, except upper/lower case |
145 | 96 |
set.seed(123) |
146 |
-classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
147 |
-marker_genes = selected_marker_genes_B, cell_type = "b cells") |
|
97 |
+classifier_b <- train_classifier(train_obj = tirosh_mel80_example, |
|
98 |
+assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, |
|
99 |
+cell_type = "b cells", tag_slot = 'active.ident') |
|
148 | 100 |
|
149 | 101 |
# classify cell types using B cell classifier, |
150 | 102 |
# a test classifier process may be used before applying the classifier |
... | ... |
@@ -163,7 +115,8 @@ p_marker_genes = c("SDC1", "CD19", "CD79A") |
163 | 115 |
# for the training process. |
164 | 116 |
set.seed(123) |
165 | 117 |
plasma_classifier <- train_classifier(train_obj = tirosh_mel80_example, |
166 |
-cell_type = "Plasma cell", marker_genes = p_marker_genes, |
|
167 |
-parent_classifier = classifier_b, seurat_tag_slot = 'plasma_cell_tag') |
|
118 |
+assay = 'RNA', slot = 'counts', cell_type = 'Plasma cell', |
|
119 |
+marker_genes = p_marker_genes, tag_slot = 'plasma_cell_tag', |
|
120 |
+parent_classifier = classifier_b) |
|
168 | 121 |
|
169 | 122 |
} |
... | ... |
@@ -148,8 +148,8 @@ times for one model, users can use `set.seed`. |
148 | 148 |
```{r} |
149 | 149 |
set.seed(123) |
150 | 150 |
classifier_B <- train_classifier(train_obj = train_set, cell_type = "B cells", |
151 |
- marker_genes = selected_marker_genes_B, |
|
152 |
- sce_assay = 'counts', sce_tag_slot = 'B_cell') |
|
151 |
+ marker_genes = selected_marker_genes_B, |
|
152 |
+ assay = 'counts', tag_slot = 'B_cell') |
|
153 | 153 |
``` |
154 | 154 |
```{r} |
155 | 155 |
classifier_B |
... | ... |
@@ -169,8 +169,8 @@ The `test_classifier` model automatically tests a classifier's performance |
169 | 169 |
against another dataset. Here, we used the `test_set` created before: |
170 | 170 |
|
171 | 171 |
```{r} |
172 |
-classifier_B_test <- test_classifier(test_obj = test_set, classifier = classifier_B, |
|
173 |
- sce_assay = 'counts', sce_tag_slot = 'B_cell') |
|
172 |
+classifier_B_test <- test_classifier(classifier = classifier_B, test_obj = test_set, |
|
173 |
+ assay = 'counts', tag_slot = 'B_cell') |
|
174 | 174 |
``` |
175 | 175 |
|
176 | 176 |
### Interpreting test model result |
... | ... |
@@ -178,7 +178,7 @@ Train the child classifier: |
178 | 178 |
set.seed(123) |
179 | 179 |
classifier_plasma <- train_classifier(train_obj = train_set, |
180 | 180 |
marker_genes = selected_marker_genes_plasma, cell_type = "Plasma cells", |
181 |
-sce_assay = 'counts', sce_tag_slot = 'plasma', parent_classifier = classifier_B) |
|
181 |
+assay = 'counts', tag_slot = 'plasma', parent_classifier = classifier_B) |
|
182 | 182 |
``` |
183 | 183 |
If the cells classifier has not been loaded to the current working space, |
184 | 184 |
an equivalent training process should be: |
... | ... |
@@ -186,7 +186,7 @@ an equivalent training process should be: |
186 | 186 |
set.seed(123) |
187 | 187 |
classifier_plasma <- train_classifier(train_obj = train_set, |
188 | 188 |
marker_genes = selected_marker_genes_plasma, cell_type = "Plasma cells", |
189 |
-sce_assay = 'counts', sce_tag_slot = 'plasma', parent_cell = 'B cells') |
|
189 |
+assay = 'counts', tag_slot = 'plasma', parent_cell = 'B cells') |
|
190 | 190 |
``` |
191 | 191 |
```{r} |
192 | 192 |
classifier_plasma |
... | ... |
@@ -200,7 +200,7 @@ caret_model(classifier_plasma) |
200 | 200 |
The parent classifier must be also set in test method. |
201 | 201 |
```{r} |
202 | 202 |
classifier_plasma_test <- test_classifier(test_obj = test_set, |
203 |
-classifier = classifier_plasma, sce_assay = 'counts', sce_tag_slot = 'plasma', |
|
203 |
+classifier = classifier_plasma, assay = 'counts', tag_slot = 'plasma', |
|
204 | 204 |
parent_classifier = classifier_B) |
205 | 205 |
``` |
206 | 206 |
|