Browse code

merge separate methods for Seurat and SCE signatures into one function

nttvy authored on 29/07/2021 22:23:02
Showing 21 changed files

... ...
@@ -31,6 +31,7 @@ import(methods)
31 31
 import(pROC)
32 32
 import(tools)
33 33
 importFrom(Seurat,GetAssayData)
34
+importFrom(Seurat,Idents)
34 35
 importFrom(SummarizedExperiment,assay)
35 36
 importFrom(SummarizedExperiment,colData)
36 37
 importFrom(stats,predict)
... ...
@@ -23,9 +23,9 @@ setOldClass("train")
23 23
 #' # train a classifier, for ex: B cell
24 24
 #' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
25 25
 #' set.seed(123)
26
-#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, 
27
-#'                           marker_genes = selected_marker_genes_B, 
28
-#'                           cell_type = "B cells")
26
+#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
27
+#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, 
28
+#' cell_type = "B cells", tag_slot = 'active.ident')
29 29
 #'
30 30
 #' classifier_b
31 31
 #' @export
... ...
@@ -219,8 +219,9 @@ setValidity("scAnnotatR", checkObjectValidity)
219 219
 #' data("tirosh_mel80_example")
220 220
 #' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
221 221
 #' set.seed(123)
222
-#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, 
223
-#' marker_genes = selected_marker_genes_B, cell_type = "B cells")
222
+#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
223
+#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, 
224
+#' cell_type = "B cells", tag_slot = 'active.ident')
224 225
 #' classifier_b
225 226
 #' 
226 227
 #' @export
... ...
@@ -251,8 +252,9 @@ setMethod("show", c("object" = "scAnnotatR"), function(object) {
251 252
 #' data("tirosh_mel80_example")
252 253
 #' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
253 254
 #' set.seed(123)
254
-#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, 
255
-#' cell_type = "B cells", marker_genes = selected_marker_genes_B)
255
+#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
256
+#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, 
257
+#' cell_type = "B cells", tag_slot = 'active.ident')
256 258
 #' cell_type(classifier_b)
257 259
 #' 
258 260
 #' @export
... ...
@@ -273,8 +275,9 @@ cell_type <- function(classifier) {
273 275
 #' data("tirosh_mel80_example")
274 276
 #' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
275 277
 #' set.seed(123)
276
-#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, 
277
-#' marker_genes = selected_marker_genes_B, cell_type = "B cells")
278
+#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
279
+#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, 
280
+#' cell_type = "B cells", tag_slot = 'active.ident')
278 281
 #' caret_model(classifier_b)
279 282
 #'  
280 283
 #' @export
... ...
@@ -294,8 +297,9 @@ caret_model <- function(classifier) {
294 297
 #' data("tirosh_mel80_example")
295 298
 #' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
296 299
 #' set.seed(123)
297
-#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, 
298
-#' marker_genes = selected_marker_genes_B, cell_type = "B cells")
300
+#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
301
+#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, 
302
+#' cell_type = "B cells", tag_slot = 'active.ident')
299 303
 #' marker_genes(classifier_b)
300 304
 #' 
301 305
 #' @export
... ...
@@ -315,8 +319,9 @@ marker_genes <- function(classifier) {
315 319
 #' data("tirosh_mel80_example")
316 320
 #' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
317 321
 #' set.seed(123)
318
-#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, 
319
-#' marker_genes = selected_marker_genes_B, cell_type = "B cells")
322
+#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
323
+#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, 
324
+#' cell_type = "B cells", tag_slot = 'active.ident')
320 325
 #' p_thres(classifier_b)
321 326
 #' 
322 327
 #' @export
... ...
@@ -337,8 +342,9 @@ p_thres <- function(classifier) {
337 342
 #' data("tirosh_mel80_example")
338 343
 #' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
339 344
 #' set.seed(123)
340
-#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, 
341
-#' marker_genes = selected_marker_genes_B, cell_type = "B cells")
345
+#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
346
+#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, 
347
+#' cell_type = "B cells", tag_slot = 'active.ident')
342 348
 #' parent(classifier_b)
343 349
 #' 
344 350
 #' @export
... ...
@@ -367,8 +373,9 @@ setGeneric('cell_type<-', function(classifier, value)
367 373
 #' data("tirosh_mel80_example")
368 374
 #' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
369 375
 #' set.seed(123)
370
-#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, 
371
-#' marker_genes = selected_marker_genes_B, cell_type = "B cells")
376
+#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
377
+#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, 
378
+#' cell_type = "B cells", tag_slot = 'active.ident')
372 379
 #' cell_type(classifier_b) <- "B cell"
373 380
 #' @rdname cell_type
374 381
 setReplaceMethod('cell_type', c("classifier" = "scAnnotatR"), 
... ...
@@ -402,10 +409,12 @@ setGeneric('p_thres<-', function(classifier, value)
402 409
 #' data("tirosh_mel80_example")
403 410
 #' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
404 411
 #' set.seed(123)
405
-#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, 
406
-#' marker_genes = selected_marker_genes_B, cell_type = "B cells")
407
-#' classifier_b_test <- test_classifier(test_obj = tirosh_mel80_example, 
408
-#' classifier = classifier_b)
412
+#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
413
+#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, 
414
+#' cell_type = "B cells", tag_slot = 'active.ident')
415
+#' classifier_b_test <- test_classifier(classifier = classifier_b, 
416
+#' test_obj = tirosh_mel80_example, assay = 'RNA', slot = 'counts', 
417
+#' tag_slot = 'active.ident')
409 418
 #' # assign a new threhold probability for prediction
410 419
 #' p_thres(classifier_b) <- 0.4
411 420
 #' @rdname p_thres
... ...
@@ -6,25 +6,39 @@
6 6
 #' 
7 7
 #' @param train_obj object that can be used for training the new model. 
8 8
 #' \code{\link{Seurat}} object or \code{\link{SingleCellExperiment}} object
9
-#' is expected.
9
+#' is supported.
10 10
 #' If the training model has parent, parent_tag_slot may have been indicated. 
11 11
 #' This field would have been filled out automatically 
12 12
 #' if user precedently run classify_cells function. 
13 13
 #' If no (predicted) cell type annotation provided, 
14 14
 #' the function can be run if 1- parent_cell or 2- parent_classifier is provided.
15
+#' @param assay name of assay to use in training object. 
16
+#' @param slot type of expression data to use in training object, omitted if 
17
+#' train_obj is \code{\link{SingleCellExperiment}} object.
15 18
 #' @param cell_type string indicating the name of the subtype
16 19
 #' This must exactly match cell tag/label if cell tag/label is a string.
17 20
 #' @param marker_genes list of marker genes used for the new training model
21
+#' @param tag_slot string, name of slot in cell meta data 
22
+#' indicating cell tag/label in the training object.
23
+#' Strings indicating cell types are expected in this slot.
24
+#' For \code{\link{Seurat}} object, default value is "active.ident".  
25
+#' Expected values are string (A-Z, a-z, 0-9, no special character accepted) 
26
+#' or binary/logical, 0/"no"/F/FALSE: not being new cell type, 
27
+#' 1/"yes"/T/TRUE: being new cell type.
18 28
 #' @param parent_cell string indicated the name of the parent cell type, 
19 29
 #' if parent cell type classifier has already been saved in model database.
20 30
 #' Adjust path_to_models for exact database.  
31
+#' @param parent_tag_slot string, name of a slot in cell meta data 
32
+#' indicating assigned/predicted cell type. Default is "predicted_cell_type". 
33
+#' This slot would have been filled automatically 
34
+#' if user have called classify_cells function.
35
+#' The slot must contain only string values. 
21 36
 #' @param parent_classifier classification model for the parent cell type
22 37
 #' @param path_to_models path to the folder containing the model database. 
23 38
 #' As default, the pretrained models in the package will be used. 
24 39
 #' If user has trained new models, indicate the folder containing the 
25 40
 #' new_models.rda file.
26 41
 #' @param zscore whether gene expression in train_obj is transformed to zscore
27
-#' @param ... arguments passed to other methods
28 42
 #' 
29 43
 #' @return \code{\link{scAnnotatR}} object
30 44
 #'
... ...
@@ -35,34 +49,6 @@
35 49
 #' as parent cell type. For example, when training for B cells, 
36 50
 #' plasma cells must be annotated as B cells in order to be used.
37 51
 #' 
38
-#' @export
39
-setGeneric("train_classifier", 
40
-           function(train_obj, cell_type, marker_genes, 
41
-                    parent_cell = NA_character_, 
42
-                    parent_classifier = NULL, path_to_models = "default", 
43
-                    zscore = TRUE, ...) 
44
-             standardGeneric("train_classifier"))
45
-
46
-#' @inherit train_classifier
47
-#' 
48
-#' @param seurat_tag_slot string, name of slot in cell meta data 
49
-#' indicating cell tag/label in the training object.
50
-#' Strings indicating cell types are expected in this slot.
51
-#' For \code{\link{Seurat}} object, default value is "active.ident".  
52
-#' Expected values are string (A-Z, a-z, 0-9, no special character accepted) 
53
-#' or binary/logical, 0/"no"/F/FALSE: not being new cell type, 
54
-#' 1/"yes"/T/TRUE: being new cell type.
55
-#' @param seurat_parent_tag_slot string, name of a slot in cell meta data 
56
-#' indicating assigned/predicted cell type. Default is "predicted_cell_type". 
57
-#' This slot would have been filled automatically 
58
-#' if user have called classify_cells function.
59
-#' The slot must contain only string values. 
60
-#' @param seurat_assay name of assay to use in training object. 
61
-#' Default to 'RNA' assay.
62
-#' @param seurat_slot type of expression data to use in training object. 
63
-#' For \code{\link{Seurat}} object, available types are: "counts", "data" 
64
-#' and "scale.data". Default to "counts", which contains unnormalized data.
65
-#' 
66 52
 #' @examples
67 53
 #' # load small example dataset
68 54
 #' data("tirosh_mel80_example")
... ...
@@ -76,8 +62,9 @@ setGeneric("train_classifier",
76 62
 #' # train the classifier, the "cell_type" argument must match 
77 63
 #' # the cell labels in the data, except upper/lower case
78 64
 #' set.seed(123)
79
-#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, 
80
-#' marker_genes = selected_marker_genes_B, cell_type = "b cells")
65
+#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
66
+#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, 
67
+#' cell_type = "b cells", tag_slot = 'active.ident')
81 68
 #' 
82 69
 #' # classify cell types using B cell classifier, 
83 70
 #' # a test classifier process may be used before applying the classifier 
... ...
@@ -96,18 +83,78 @@ setGeneric("train_classifier",
96 83
 #' # for the training process.
97 84
 #' set.seed(123)
98 85
 #' plasma_classifier <- train_classifier(train_obj = tirosh_mel80_example, 
99
-#' cell_type = "Plasma cell", marker_genes = p_marker_genes, 
100
-#' parent_classifier = classifier_b, seurat_tag_slot = 'plasma_cell_tag')
86
+#' assay = 'RNA', slot = 'counts', cell_type = 'Plasma cell', 
87
+#' marker_genes = p_marker_genes, tag_slot = 'plasma_cell_tag',
88
+#' parent_classifier = classifier_b)
101 89
 #' 
102
-#' @importFrom Seurat GetAssayData
90
+#' @export
91
+train_classifier <- function(train_obj, assay, slot = NULL,
92
+                             cell_type, marker_genes, tag_slot, 
93
+                             parent_cell = NA_character_, 
94
+                             parent_tag_slot = 'predicted_cell_type',
95
+                             parent_classifier = NULL, path_to_models = "default", 
96
+                             zscore = TRUE) {
97
+  if (is(train_obj, 'Seurat')) {
98
+    object <- 
99
+      train_classifier_seurat(train_obj, cell_type, marker_genes, 
100
+                              parent_cell, parent_classifier, path_to_models, 
101
+                              zscore, tag_slot, parent_tag_slot, assay, slot)
102
+  } else if (is(train_obj, 'SingleCellExperiment')) {
103
+    object <- 
104
+      train_classifier_sce(train_obj, cell_type, marker_genes, 
105
+                              parent_cell, parent_classifier, path_to_models, 
106
+                              zscore, tag_slot, parent_tag_slot, assay)
107
+  } else {
108
+    stop('Training object of not supported class', call. = FALSE)
109
+  }
110
+  
111
+  return(object)
112
+}
113
+
114
+#' Train cell type classifier, when train_obj is Seurat object
103 115
 #' 
104
-#' @rdname train_classifier
105
-setMethod("train_classifier", c("train_obj" = "Seurat"), 
106
-          function(train_obj, cell_type, marker_genes, parent_cell = NA_character_,
107
-                   parent_classifier = NULL, path_to_models = "default", 
108
-                   zscore = TRUE, seurat_tag_slot = "active.ident", 
109
-                   seurat_parent_tag_slot = "predicted_cell_type", 
110
-                   seurat_assay = 'RNA', seurat_slot = 'counts', ...) {
116
+#' @description Train a classifier for a new cell type 
117
+#' If cell type has a parent, only available for \code{\link{scAnnotatR}}
118
+#' object as parent cell classifying model.
119
+#' 
120
+#' @param train_obj Seurat object 
121
+#' @param seurat_assay name of assay to use in training object. 
122
+#' @param seurat_slot type of expression data to use in training object
123
+#' @param cell_type string indicating the name of the subtype
124
+#' This must exactly match cell tag/label if cell tag/label is a string.
125
+#' @param marker_genes list of marker genes used for the new training model
126
+#' @param seurat_tag_slot string, name of slot in cell meta data 
127
+#' indicating cell tag/label in the training object.
128
+#' Strings indicating cell types are expected in this slot.
129
+#' For \code{\link{Seurat}} object, default value is "active.ident".  
130
+#' Expected values are string (A-Z, a-z, 0-9, no special character accepted) 
131
+#' or binary/logical, 0/"no"/F/FALSE: not being new cell type, 
132
+#' 1/"yes"/T/TRUE: being new cell type.
133
+#' @param parent_cell string indicated the name of the parent cell type, 
134
+#' if parent cell type classifier has already been saved in model database.
135
+#' Adjust path_to_models for exact database.  
136
+#' @param seurat_parent_tag_slot string, name of a slot in cell meta data 
137
+#' indicating assigned/predicted cell type. Default is "predicted_cell_type". 
138
+#' This slot would have been filled automatically 
139
+#' if user have called classify_cells function.
140
+#' The slot must contain only string values. 
141
+#' @param parent_classifier classification model for the parent cell type
142
+#' @param path_to_models path to the folder containing the model database. 
143
+#' As default, the pretrained models in the package will be used. 
144
+#' If user has trained new models, indicate the folder containing the 
145
+#' new_models.rda file.
146
+#' @param zscore whether gene expression in train_obj is transformed to zscore
147
+#' 
148
+#' @return \code{\link{scAnnotatR}} object
149
+#' 
150
+#' @importFrom Seurat GetAssayData Idents
151
+#' 
152
+#' @rdname internal
153
+train_classifier_seurat <- 
154
+  function(train_obj, cell_type, marker_genes, parent_cell = NA_character_,
155
+           parent_classifier = NULL, path_to_models = "default", zscore = TRUE,
156
+           seurat_tag_slot, seurat_parent_tag_slot = 'predicted_cell_type', 
157
+           seurat_assay, seurat_slot) {
111 158
   # convert Seurat object to matrix
112 159
   mat = Seurat::GetAssayData(object = train_obj, 
113 160
                              assay = seurat_assay, slot = seurat_slot)
... ...
@@ -126,43 +173,59 @@ setMethod("train_classifier", c("train_obj" = "Seurat"),
126 173
     names(parent_tag) <- colnames(train_obj)
127 174
   } else parent_tag <- NULL
128 175
   
129
-  object <- train_classifier_func(mat, tag, cell_type, marker_genes,
176
+  object <- train_classifier_from_mat(mat, tag, cell_type, marker_genes,
130 177
                                   parent_tag, parent_cell, parent_classifier,
131 178
                                   path_to_models, zscore)
132 179
   return(object)
133
-})
180
+}
134 181
 
135
-#' @inherit train_classifier
182
+#' Train cell type classifier, when train_obj is SCE object
136 183
 #' 
137
-#' @param sce_tag_slot string, name of annotation slot indicating 
138
-#' cell tag/label in the training object.
139
-#' For \code{\link{SingleCellExperiment}} object, default value is "ident".  
184
+#' @description Train a classifier for a new cell type 
185
+#' If cell type has a parent, only available for \code{\link{scAnnotatR}}
186
+#' object as parent cell classifying model.
187
+#' 
188
+#' @param train_obj SCE object 
189
+#' @param sce_assay name of assay to use in training object. 
190
+#' @param cell_type string indicating the name of the subtype
191
+#' This must exactly match cell tag/label if cell tag/label is a string.
192
+#' @param marker_genes list of marker genes used for the new training model
193
+#' @param sce_tag_slot string, name of slot in cell meta data 
194
+#' indicating cell tag/label in the training object.
195
+#' Strings indicating cell types are expected in this slot.
196
+#' For \code{\link{Seurat}} object, default value is "active.ident".  
140 197
 #' Expected values are string (A-Z, a-z, 0-9, no special character accepted) 
141 198
 #' or binary/logical, 0/"no"/F/FALSE: not being new cell type, 
142 199
 #' 1/"yes"/T/TRUE: being new cell type.
200
+#' @param parent_cell string indicated the name of the parent cell type, 
201
+#' if parent cell type classifier has already been saved in model database.
202
+#' Adjust path_to_models for exact database.  
143 203
 #' @param sce_parent_tag_slot string, name of a slot in cell meta data 
144
-#' indicating pre-assigned/predicted cell type. 
145
-#' Default field is "predicted_cell_type".
146
-#' This field would have been filled automatically 
147
-#' when user called classify_cells function. 
204
+#' indicating assigned/predicted cell type. Default is "predicted_cell_type". 
205
+#' This slot would have been filled automatically 
206
+#' if user have called classify_cells function.
148 207
 #' The slot must contain only string values. 
149
-#' @param sce_assay name of assay to use in training object. 
150
-#' Default to 'logcounts' assay.
208
+#' @param parent_classifier classification model for the parent cell type
209
+#' @param path_to_models path to the folder containing the model database. 
210
+#' As default, the pretrained models in the package will be used. 
211
+#' If user has trained new models, indicate the folder containing the 
212
+#' new_models.rda file.
213
+#' @param zscore whether gene expression in train_obj is transformed to zscore
151 214
 #' 
215
+#' @return \code{\link{scAnnotatR}} object
216
+#'  
152 217
 #' @import SingleCellExperiment
153 218
 #' @importFrom SummarizedExperiment assay
154 219
 #' 
155
-#' @rdname train_classifier
156
-setMethod("train_classifier", c("train_obj" = "SingleCellExperiment"), 
157
-          function(train_obj, cell_type, marker_genes, parent_cell = NA_character_,
158
-                   parent_classifier = NULL, path_to_models = "default", 
159
-                   zscore = TRUE, sce_tag_slot = "ident", 
160
-                   sce_parent_tag_slot = "predicted_cell_type", 
161
-                   sce_assay = 'logcounts', ...) {
220
+#' @rdname internal
221
+train_classifier_sce <- 
222
+  function(train_obj, cell_type, marker_genes, parent_cell = NA_character_,
223
+           parent_classifier = NULL, path_to_models = "default", zscore = TRUE, 
224
+           sce_tag_slot, sce_parent_tag_slot = "predicted_cell_type", sce_assay) {
162 225
   # solve duplication of cell names
163 226
   colnames(train_obj) <- make.unique(colnames(train_obj), sep = '_')
164 227
   
165
-  # convert Seurat object to matrix
228
+  # convert SCE object to matrix
166 229
   mat = SummarizedExperiment::assay(train_obj, sce_assay)
167 230
   
168 231
   tag = SummarizedExperiment::colData(train_obj)[, sce_tag_slot]
... ...
@@ -173,12 +236,12 @@ setMethod("train_classifier", c("train_obj" = "SingleCellExperiment"),
173 236
     names(parent_tag) <- colnames(train_obj)
174 237
   } else parent_tag <- NULL
175 238
   
176
-  object <- train_classifier_func(mat, tag, cell_type, marker_genes, 
239
+  object <- train_classifier_from_mat(mat, tag, cell_type, marker_genes, 
177 240
                                   parent_tag, parent_cell, parent_classifier,
178 241
                                   path_to_models, zscore)
179 242
   
180 243
   return(object)
181
-})
244
+}
182 245
 
183 246
 #' Train cell type from matrix
184 247
 #' 
... ...
@@ -205,7 +268,7 @@ setMethod("train_classifier", c("train_obj" = "SingleCellExperiment"),
205 268
 #' @return caret trained model
206 269
 #' 
207 270
 #' @rdname internal
208
-train_classifier_func <- function(mat, tag, cell_type, marker_genes, 
271
+train_classifier_from_mat <- function(mat, tag, cell_type, marker_genes, 
209 272
                                   parent_tag, parent_cell, parent_classifier, 
210 273
                                   path_to_models, zscore) {
211 274
   #--- part of parent cell type
... ...
@@ -283,21 +346,34 @@ train_classifier_func <- function(mat, tag, cell_type, marker_genes,
283 346
 #' 
284 347
 #' @description Testing process. 
285 348
 #' 
286
-#' @param test_obj xxobject that can be used for testing
287
-#' @param classifier classification model
349
+#' @param test_obj object that can be used for testing
350
+#' @param assay name of assay to use in test_object
351
+#' @param slot type of expression data to use in test_object. 
352
+#' For Seurat object, some available types are: "counts", "data" and "scale.data".
353
+#' Ignore this if test_obj is \code{\link{SingleCellExperiment}} object.
354
+#' @param classifier scAnnotatR classification model
355
+#' @param tag_slot string, name of annotation slot 
356
+#' indicating cell tag/label in the testing object.
357
+#' Strings indicating cell types are expected in this slot. 
358
+#' Expected values are string (A-Z, a-z, 0-9, no special character accepted) 
359
+#' or binary/logical, 0/"no"/F/FALSE: not being new cell type, 
360
+#' 1/"yes"/T/TRUE: being new cell type.
288 361
 #' @param target_cell_type vector indicating other cell types than cell labels 
289 362
 #' that can be considered as the main cell type in classifier, 
290 363
 #' for example, c("plasma cell", "b cell", "b cells", "activating b cell"). 
291 364
 #' Default as NULL.
292 365
 #' @param parent_classifier \code{\link{scAnnotatR}} object
293 366
 #' corresponding to classification model for the parent cell type
367
+#' @param parent_tag_slot string, name of tag slot in cell meta data
368
+#' indicating pre-assigned/predicted parent cell type. 
369
+#' Default field is "predicted_cell_type".
370
+#' The slot must contain only string values. 
294 371
 #' @param path_to_models path to the folder containing the list of models. 
295 372
 #' As default, the pretrained models in the package will be used. 
296 373
 #' If user has trained new models, indicate the folder containing 
297 374
 #' the new_models.rda file.
298 375
 #' @param zscore boolean, whether gene expression is transformed to zscore
299
-#' @param ... arguments passed to other methods
300
-#'
376
+#' 
301 377
 #' @return result of testing process in form of a list, 
302 378
 #' including predicted values, prediction accuracy at a probability threshold, 
303 379
 #' and roc curve information.
... ...
@@ -309,60 +385,99 @@ train_classifier_func <- function(mat, tag, cell_type, marker_genes,
309 385
 #' For example, when testing for B cells, plasma cells can be annotated as 
310 386
 #' B cells, or target_cell_type is set c("plasma cells").
311 387
 #' 
388
+#' @examples
389
+#' # load small example dataset
390
+#' data("tirosh_mel80_example")
391
+#' 
392
+#' # train the classifier
393
+#' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
394
+#' set.seed(123)
395
+#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
396
+#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, 
397
+#' cell_type = "b cells", tag_slot = 'active.ident')
398
+#' 
399
+#' # test the classifier, target cell type can be in other formats or
400
+#' # alternative cell type that can be considered as the classified cell type 
401
+#' classifier_b_test <- test_classifier(classifier = classifier_b, 
402
+#' test_obj = tirosh_mel80_example, assay = 'RNA', slot = 'counts', 
403
+#' tag_slot = 'active.ident', target_cell_type = c("B cell"))
404
+#' classifier_b_test
405
+#' 
312 406
 #' @export
313
-setGeneric("test_classifier", function(test_obj, classifier, 
314
-                                       target_cell_type = NULL, 
315
-                                       parent_classifier = NULL, 
316
-                                       path_to_models = "default", 
317
-                                       zscore = TRUE, ...) 
407
+setGeneric("test_classifier", 
408
+           function(classifier, test_obj, assay, slot = NULL, tag_slot,
409
+                    target_cell_type = NULL, parent_classifier = NULL,
410
+                    parent_tag_slot = 'predicted_cell_type', 
411
+                    path_to_models = "default", zscore = TRUE) 
318 412
   standardGeneric("test_classifier"))
319 413
 
320 414
 #' @inherit test_classifier
321 415
 #' 
416
+#' @rdname test_classifier
417
+setMethod('test_classifier', c('classifier' = 'scAnnotatR'), 
418
+          function(classifier, test_obj, assay, slot = NULL, tag_slot,
419
+                   target_cell_type = NULL, parent_classifier = NULL,
420
+                   parent_tag_slot = 'predicted_cell_type', 
421
+                   path_to_models = "default", zscore = TRUE) {
422
+  if (is(test_obj, 'Seurat')) {
423
+    return_val <- 
424
+      test_classifier_seurat(test_obj, classifier, target_cell_type, 
425
+                             parent_classifier, path_to_models, zscore, 
426
+                             tag_slot, parent_tag_slot, assay, slot)
427
+  } else if (is(test_obj, 'SingleCellExperiment')) {
428
+    return_val <- 
429
+      test_classifier_sce(test_obj, classifier, target_cell_type, 
430
+                          parent_classifier, path_to_models, zscore, 
431
+                          tag_slot, parent_tag_slot, assay)
432
+  } else {
433
+    stop('Testing object of not supported class', call. = FALSE)
434
+  }
435
+  return(return_val)
436
+})
437
+
438
+#' Testing process for Seurat object
439
+#' 
440
+#' @description Testing process when test object is of type Seurat
441
+#' 
442
+#' @param test_obj Seurat object used for testing
443
+#' @param seurat_assay name of assay to use in test_object
444
+#' @param seurat_slot type of expression data to use in test_object. 
445
+#' For Seurat object, some available types are: "counts", "data" and "scale.data".
446
+#' @param classifier scAnnotatR classification model
322 447
 #' @param seurat_tag_slot string, name of annotation slot 
323 448
 #' indicating cell tag/label in the testing object.
324 449
 #' Strings indicating cell types are expected in this slot. 
325
-#' For \code{\link{Seurat}} object, default value is "active.ident". 
326 450
 #' Expected values are string (A-Z, a-z, 0-9, no special character accepted) 
327 451
 #' or binary/logical, 0/"no"/F/FALSE: not being new cell type, 
328 452
 #' 1/"yes"/T/TRUE: being new cell type.
453
+#' @param target_cell_type vector indicating other cell types than cell labels 
454
+#' that can be considered as the main cell type in classifier, 
455
+#' for example, c("plasma cell", "b cell", "b cells", "activating b cell"). 
456
+#' Default as NULL.
457
+#' @param parent_classifier \code{\link{scAnnotatR}} object
458
+#' corresponding to classification model for the parent cell type
329 459
 #' @param seurat_parent_tag_slot string, name of tag slot in cell meta data
330 460
 #' indicating pre-assigned/predicted parent cell type. 
331 461
 #' Default field is "predicted_cell_type".
332 462
 #' The slot must contain only string values. 
333
-#' @param seurat_assay name of assay to use in 
334
-#' \code{\link{Seurat}} object, defaults to 'RNA' assay.
335
-#' @param seurat_slot type of expression data to use in 
336
-#' \code{\link{Seurat}} object. 
337
-#' Some available types are: "counts", "data" and "scale.data". 
338
-#' Default to "counts", which contains unnormalized data.
339
-#'  
340
-#' @examples
341
-#' # load small example dataset
342
-#' data("tirosh_mel80_example")
343
-#' 
344
-#' # train the classifier
345
-#' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
346
-#' set.seed(123)
347
-#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, 
348
-#' marker_genes = selected_marker_genes_B, cell_type = "B cells")
463
+#' @param path_to_models path to the folder containing the list of models. 
464
+#' As default, the pretrained models in the package will be used. 
465
+#' If user has trained new models, indicate the folder containing 
466
+#' the new_models.rda file.
467
+#' @param zscore boolean, whether gene expression is transformed to zscore
349 468
 #' 
350
-#' # test the classifier, target cell type can be in other formats or
351
-#' # alternative cell type that can be considered as the classified cell type 
352
-#' classifier_b_test <- test_classifier(test_obj = tirosh_mel80_example, 
353
-#' classifier = classifier_b, target_cell_type = c("B cell"))
354
-#' classifier_b_test
469
+#' @return result of testing process in form of a list, 
470
+#' including predicted values, prediction accuracy at a probability threshold, 
471
+#' and roc curve information.
355 472
 #' 
356 473
 #' @importFrom Seurat GetAssayData
357 474
 #'
358
-#' @rdname test_classifier
359
-setMethod("test_classifier", c("test_obj" = "Seurat", 
360
-                               "classifier" = "scAnnotatR"), 
361
-          function(test_obj, classifier, target_cell_type = NULL, 
362
-                   parent_classifier = NULL, path_to_models = "default", 
363
-                   zscore = TRUE, seurat_tag_slot = "active.ident", 
364
-                   seurat_parent_tag_slot = "predicted_cell_type", 
365
-                   seurat_assay = 'RNA', seurat_slot = 'counts', ...) {
475
+#' @rdname internal
476
+test_classifier_seurat <- 
477
+  function(test_obj, classifier, target_cell_type = NULL, 
478
+           parent_classifier = NULL, path_to_models = "default", zscore = TRUE, 
479
+           seurat_tag_slot, seurat_parent_tag_slot = "predicted_cell_type", 
480
+           seurat_assay, seurat_slot) {
366 481
   . <- fpr <- tpr <- NULL
367 482
   # convert Seurat object to matrix
368 483
   mat = Seurat::GetAssayData(
... ...
@@ -382,39 +497,53 @@ setMethod("test_classifier", c("test_obj" = "Seurat",
382 497
     names(parent_tag) <- colnames(test_obj)
383 498
   } else parent_tag <- NULL
384 499
   
385
-  return_val <- test_classifier_func(mat, tag, classifier, parent_tag,
500
+  return_val <- test_classifier_from_mat(mat, tag, classifier, parent_tag,
386 501
                                      target_cell_type, parent_classifier,
387 502
                                      path_to_models, zscore)
388 503
   return(return_val)
389
-})
504
+}
390 505
 
391
-#' @inherit test_classifier
506
+#' Testing process for SCE object
507
+#' 
508
+#' @description Testing process when test object is of type SCE
392 509
 #' 
510
+#' @param test_obj SCE object used for testing
511
+#' @param sce_assay name of assay to use in test_object
512
+#' @param classifier scAnnotatR classification model
393 513
 #' @param sce_tag_slot string, name of annotation slot 
394 514
 #' indicating cell tag/label in the testing object.
395 515
 #' Strings indicating cell types are expected in this slot. 
396
-#' Default value is "ident".  
397 516
 #' Expected values are string (A-Z, a-z, 0-9, no special character accepted) 
398 517
 #' or binary/logical, 0/"no"/F/FALSE: not being new cell type, 
399 518
 #' 1/"yes"/T/TRUE: being new cell type.
519
+#' @param target_cell_type vector indicating other cell types than cell labels 
520
+#' that can be considered as the main cell type in classifier, 
521
+#' for example, c("plasma cell", "b cell", "b cells", "activating b cell"). 
522
+#' Default as NULL.
523
+#' @param parent_classifier \code{\link{scAnnotatR}} object
524
+#' corresponding to classification model for the parent cell type
400 525
 #' @param sce_parent_tag_slot string, name of tag slot in cell meta data
401 526
 #' indicating pre-assigned/predicted parent cell type. 
402
-#' Default is "predicted_cell_type".
527
+#' Default field is "predicted_cell_type".
403 528
 #' The slot must contain only string values. 
404
-#' @param sce_assay name of assay to use in \code{\link{SingleCellExperiment}}
405
-#' object, defaults to 'logcounts' assay.
406
-#'  
529
+#' @param path_to_models path to the folder containing the list of models. 
530
+#' As default, the pretrained models in the package will be used. 
531
+#' If user has trained new models, indicate the folder containing 
532
+#' the new_models.rda file.
533
+#' @param zscore boolean, whether gene expression is transformed to zscore
534
+#' 
535
+#' @return result of testing process in form of a list, 
536
+#' including predicted values, prediction accuracy at a probability threshold, 
537
+#' and roc curve information.
538
+#' 
407 539
 #' @import SingleCellExperiment
408 540
 #' @importFrom SummarizedExperiment assay
409 541
 #' 
410
-#' @rdname test_classifier
411
-setMethod("test_classifier", c("test_obj" = "SingleCellExperiment", 
412
-                               "classifier" = "scAnnotatR"), 
413
-          function(test_obj, classifier, target_cell_type = NULL, 
414
-                   parent_classifier = NULL, path_to_models = "default", 
415
-                   zscore = TRUE, sce_tag_slot = "ident", 
416
-                   sce_parent_tag_slot = "predicted_cell_type", 
417
-                   sce_assay = 'logcounts', ...) {
542
+#' @rdname internal
543
+test_classifier_sce <- 
544
+  function(test_obj, classifier, target_cell_type = NULL, 
545
+           parent_classifier = NULL, path_to_models = "default", zscore = TRUE, 
546
+           sce_tag_slot, sce_parent_tag_slot = "predicted_cell_type", sce_assay) {
418 547
   # solve duplication of cell names
419 548
   colnames(test_obj) <- make.unique(colnames(test_obj), sep = '_')
420 549
   . <- fpr <- tpr <- NULL
... ...
@@ -430,12 +559,12 @@ setMethod("test_classifier", c("test_obj" = "SingleCellExperiment",
430 559
     names(parent_tag) <- colnames(test_obj)
431 560
   } else parent_tag <- NULL
432 561
   
433
-  return_val <- test_classifier_func(mat, tag, classifier, parent_tag,
562
+  return_val <- test_classifier_from_mat(mat, tag, classifier, parent_tag,
434 563
                                      target_cell_type, parent_classifier,
435 564
                                      path_to_models, zscore)
436 565
   
437 566
   return(return_val)
438
-})
567
+}
439 568
 
440 569
 #' Run testing process from matrix and tag
441 570
 #' 
... ...
@@ -458,7 +587,7 @@ setMethod("test_classifier", c("test_obj" = "SingleCellExperiment",
458 587
 #' @return model performance statistics
459 588
 #' 
460 589
 #' @rdname internal
461
-test_classifier_func <- function(mat, tag, classifier, parent_tag, 
590
+test_classifier_from_mat <- function(mat, tag, classifier, parent_tag, 
462 591
                                  target_cell_type, parent_classifier,
463 592
                                  path_to_models, zscore) {
464 593
   # target_cell_type check
... ...
@@ -522,11 +651,13 @@ test_classifier_func <- function(mat, tag, classifier, parent_tag,
522 651
 #' # train a classifier, for ex: B cell
523 652
 #' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
524 653
 #' set.seed(123)
525
-#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, 
526
-#' marker_genes = selected_marker_genes_B, cell_type = "B cells")
654
+#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
655
+#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, 
656
+#' cell_type = "b cells", tag_slot = 'active.ident')
527 657
 #' 
528
-#' classifier_b_test <- test_classifier(test_obj = tirosh_mel80_example, 
529
-#' classifier = classifier_b)
658
+#' classifier_b_test <- test_classifier(classifier = classifier_b, 
659
+#' test_obj = tirosh_mel80_example, assay = 'RNA', slot = 'counts', 
660
+#' tag_slot = 'active.ident', target_cell_type = c("B cell"))
530 661
 #' 
531 662
 #' # run plot curve on the test result
532 663
 #' roc_curve <- plot_roc_curve(test_result = classifier_b_test)
... ...
@@ -608,14 +739,16 @@ setGeneric("classify_cells", function(classify_obj, classifiers = NULL,
608 739
 #' 
609 740
 #' # train the classifier
610 741
 #' set.seed(123)
611
-#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example, 
612
-#' marker_genes = selected_marker_genes_B, cell_type = "B cells")
742
+#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
743
+#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, 
744
+#' cell_type = "b cells", tag_slot = 'active.ident')
613 745
 #' 
614 746
 #' # do the same thing with other cell types, for example, T cells
615 747
 #' selected_marker_genes_T = c("CD4", "CD8A", "CD8B")
616 748
 #' set.seed(123)
617
-#' classifier_t <- train_classifier(train_obj = tirosh_mel80_example, 
618
-#' marker_genes = selected_marker_genes_T, cell_type = "T cells")
749
+#' classifier_t <- train_classifier(train_obj = tirosh_mel80_example,
750
+#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_T, 
751
+#' cell_type = "T cells", tag_slot = 'active.ident')
619 752
 #' 
620 753
 #' # create a list of classifiers
621 754
 #' classifier_ls <- list(classifier_b, classifier_t)
... ...
@@ -168,17 +168,8 @@ select_marker_genes <- function(mat, marker_genes) {
168 168
 #' 
169 169
 #' @return list of adjusted tag
170 170
 #' @rdname internal
171
-setGeneric("check_parent_child_coherence", 
172
-           function(mat, tag, pos_parent, parent_cell, cell_type, 
173
-                    target_cell_type) 
174
-             standardGeneric("check_parent_child_coherence"))
175
-
176
-#' @inherit check_parent_child_coherence
177
-#' 
178
-#' @rdname internal
179
-setMethod("check_parent_child_coherence", c("mat" = "dgCMatrix", 'tag' = 'vector'), 
180
-          function(mat, tag, pos_parent, parent_cell, cell_type, 
181
-                   target_cell_type) {
171
+check_parent_child_coherence <- function(mat, tag, pos_parent, parent_cell, 
172
+                                         cell_type, target_cell_type) {
182 173
   pos.val <- c(1, "yes", TRUE)
183 174
   
184 175
   # prepare (sub) cell type tag  
... ...
@@ -205,7 +196,7 @@ setMethod("check_parent_child_coherence", c("mat" = "dgCMatrix", 'tag' = 'vector
205 196
   #SummarizedExperiment::colData(obj)[, tag_slot] <- new.tag_slot
206 197
   
207 198
   return(new_tag)
208
-})
199
+}
209 200
 
210 201
 #' Filter cells from ambiguous chars and non applicable cells
211 202
 #' Ambiguous characters includes: "/", ",", "-", "+", ".", "and", 
... ...
@@ -216,33 +207,26 @@ setMethod("check_parent_child_coherence", c("mat" = "dgCMatrix", 'tag' = 'vector
216 207
 #' 
217 208
 #' @return filtered matrix and corresponding tag
218 209
 #' @rdname internal
219
-setGeneric("filter_cells", function(mat, tag) 
220
-  standardGeneric("filter_cells"))
221
-
222
-#' @inherit filter_cells
223
-#' 
224
-#' @rdname internal
225
-setMethod("filter_cells", c("mat" = "dgCMatrix", "tag" = "vector"), 
226
-          function(mat, tag) {
227
-            # define characters usually included in ambiguous cell types
228
-            # this is to avoid considering ambiguous cell types as negative cell_type
229
-            ambiguous.chars <- c("/", ",", " -", " [+]", "[.]", " and ", 
230
-                                 " or ", "_or_", "-or-", "[(]" ,"[)]", "ambiguous")
231
-            
232
-            # only eliminate cell labels containing cell_type and ambiguous.chars
233
-            ambiguous <- grepl(paste(ambiguous.chars, collapse="|"), tag)
234
-            n.applicable <- (grepl("not applicable", tag) | is.na(tag))
235
-            
236
-            if (any(ambiguous))
237
-              warning('Cell types containing "/", ",", "-", "+", ".", "and", "or", "(", ")", and "ambiguous" are considered as ambiguous. They are removed from training and testing.\n', 
238
-                      call. = FALSE, immediate. = TRUE)
239
-            #obj <- obj[, !(ambiguous | n.applicable)]
240
-            mat <- mat[, !(ambiguous | n.applicable), drop = FALSE]
241
-            tag <- tag[!(ambiguous | n.applicable)]
242
-            
243
-            filtered <- list('mat' = mat, 'tag' = tag)
244
-            return(filtered)
245
-          })
210
+filter_cells <- function(mat, tag) {
211
+  # define characters usually included in ambiguous cell types
212
+  # this is to avoid considering ambiguous cell types as negative cell_type
213
+  ambiguous.chars <- c("/", ",", " -", " [+]", "[.]", " and ", 
214
+                       " or ", "_or_", "-or-", "[(]" ,"[)]", "ambiguous")
215
+  
216
+  # only eliminate cell labels containing cell_type and ambiguous.chars
217
+  ambiguous <- grepl(paste(ambiguous.chars, collapse="|"), tag)
218
+  n.applicable <- (grepl("not applicable", tag) | is.na(tag))
219
+  
220
+  if (any(ambiguous))
221
+    warning('Cell types containing "/", ",", "-", "+", ".", "and", "or", "(", ")", and "ambiguous" are considered as ambiguous. They are removed from training and testing.\n', 
222
+            call. = FALSE, immediate. = TRUE)
223
+  #obj <- obj[, !(ambiguous | n.applicable)]
224
+  mat <- mat[, !(ambiguous | n.applicable), drop = FALSE]
225
+  tag <- tag[!(ambiguous | n.applicable)]
226
+  
227
+  filtered <- list('mat' = mat, 'tag' = tag)
228
+  return(filtered)
229
+}
246 230
 
247 231
 #' Construct tag vector
248 232
 #' 
... ...
@@ -254,25 +238,17 @@ setMethod("filter_cells", c("mat" = "dgCMatrix", "tag" = "vector"),
254 238
 #' @return a binary vector for cell tag
255 239
 #' 
256 240
 #' @rdname internal
257
-setGeneric("construct_tag_vect", 
258
-           function(tag, cell_type) 
259
-             standardGeneric("construct_tag_vect"))
260
-
261
-#' @inherit construct_tag_vect
262
-#' 
263
-#' @rdname internal
264
-setMethod("construct_tag_vect", c("tag" = "vector"), 
265
-          function(tag, cell_type) {
266
-            pos.val <- c(1, "yes", TRUE)
267
-            
268
-            # x <- SummarizedExperiment::colData(obj)[, tag_slot] 
269
-            test <- (tag %in% pos.val) | (tolower(tag) %in% tolower(cell_type))
270
-            new_tag <- ifelse(test, "yes", "no")
271
-            
272
-            named_tag = setNames(new_tag, names(tag))
273
-            
274
-            return(named_tag)
275
-          })
241
+construct_tag_vect <- function(tag, cell_type) {
242
+  pos.val <- c(1, "yes", TRUE)
243
+  
244
+  # x <- SummarizedExperiment::colData(obj)[, tag_slot] 
245
+  test <- (tag %in% pos.val) | (tolower(tag) %in% tolower(cell_type))
246
+  new_tag <- ifelse(test, "yes", "no")
247
+  
248
+  named_tag = setNames(new_tag, names(tag))
249
+  
250
+  return(named_tag)
251
+}
276 252
 
277 253
 #' Process parent classifier
278 254
 #' 
... ...
@@ -292,17 +268,8 @@ setMethod("construct_tag_vect", c("tag" = "vector"),
292 268
 #' @import dplyr
293 269
 #' 
294 270
 #' @rdname internal
295
-setGeneric("process_parent_classifier", 
296
-           function(mat, parent_tag, parent_cell_type, parent_classifier, 
297
-                    path_to_models, zscore = TRUE) 
298
-             standardGeneric("process_parent_classifier"))
299
-
300
-#' @inherit process_parent_classifier
301
-#' 
302
-#' @rdname internal
303
-setMethod("process_parent_classifier", c("mat" = "dgCMatrix"), 
304
-          function(mat, parent_tag, parent_cell_type, parent_classifier, 
305
-                   path_to_models, zscore = TRUE) {
271
+process_parent_classifier <- function(mat, parent_tag, parent_cell_type, 
272
+                                      parent_classifier, path_to_models, zscore) {
306 273
     pos_parent <- parent.classifier <- . <- model_list <- NULL
307 274
     
308 275
     if (is.na(parent_cell_type) && !is.null(parent_classifier))
... ...
@@ -368,7 +335,7 @@ setMethod("process_parent_classifier", c("mat" = "dgCMatrix"),
368 335
     return_val <- list('pos_parent' = pos_parent, 'parent_cell'= parent_cell_type,
369 336
                        'parent.classifier' = parent.classifier, 'model_list' = model_list)
370 337
     return(return_val)
371
-})
338
+}
372 339
 
373 340
 #' Make prediction
374 341
 #'
... ...
@@ -22,8 +22,9 @@
22 22
 #' # train classifier
23 23
 #' selected_marker_genes_T = c("CD4", "CD8A", "CD8B")
24 24
 #' set.seed(123)
25
-#' classifier_t <- train_classifier(train_obj = tirosh_mel80_example, 
26
-#' marker_genes = selected_marker_genes_T, cell_type = "t cells")
25
+#' classifier_t <- train_classifier(train_obj = tirosh_mel80_example,
26
+#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_T, 
27
+#' cell_type = "t cells", tag_slot = 'active.ident')
27 28
 #' 
28 29
 #' # save the trained classifier to system 
29 30
 #' # test classifier can be used before this step
... ...
@@ -150,8 +151,9 @@ plant_tree <- function(path_to_models = "default") {
150 151
 #' # train a classifier
151 152
 #' set.seed(123)
152 153
 #' selected_marker_genes_T = c("CD4", "CD8A", "CD8B")
153
-#' classifier_t <- train_classifier(train_obj = tirosh_mel80_example, 
154
-#' marker_genes = selected_marker_genes_T, cell_type = "t cells")
154
+#' classifier_t <- train_classifier(train_obj = tirosh_mel80_example,
155
+#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_T, 
156
+#' cell_type = "t cells", tag_slot = 'active.ident')
155 157
 #' 
156 158
 #' # save a classifier to system
157 159
 #' save_new_model(new_model = classifier_t, path_to_models = tempdir())
... ...
@@ -20,8 +20,9 @@ Returns the caret model of the \code{\link{scAnnotatR}} object
20 20
 data("tirosh_mel80_example")
21 21
 selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
22 22
 set.seed(123)
23
-classifier_b <- train_classifier(train_obj = tirosh_mel80_example, 
24
-marker_genes = selected_marker_genes_B, cell_type = "B cells")
23
+classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
24
+assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, 
25
+cell_type = "B cells", tag_slot = 'active.ident')
25 26
 caret_model(classifier_b)
26 27
  
27 28
 }
... ...
@@ -27,14 +27,16 @@ Returns the cell type for the given classifier.
27 27
 data("tirosh_mel80_example")
28 28
 selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
29 29
 set.seed(123)
30
-classifier_b <- train_classifier(train_obj = tirosh_mel80_example, 
31
-cell_type = "B cells", marker_genes = selected_marker_genes_B)
30
+classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
31
+assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, 
32
+cell_type = "B cells", tag_slot = 'active.ident')
32 33
 cell_type(classifier_b)
33 34
 
34 35
 data("tirosh_mel80_example")
35 36
 selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
36 37
 set.seed(123)
37
-classifier_b <- train_classifier(train_obj = tirosh_mel80_example, 
38
-marker_genes = selected_marker_genes_B, cell_type = "B cells")
38
+classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
39
+assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, 
40
+cell_type = "B cells", tag_slot = 'active.ident')
39 41
 cell_type(classifier_b) <- "B cell"
40 42
 }
... ...
@@ -107,14 +107,16 @@ selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
107 107
 
108 108
 # train the classifier
109 109
 set.seed(123)
110
-classifier_b <- train_classifier(train_obj = tirosh_mel80_example, 
111
-marker_genes = selected_marker_genes_B, cell_type = "B cells")
110
+classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
111
+assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, 
112
+cell_type = "b cells", tag_slot = 'active.ident')
112 113
 
113 114
 # do the same thing with other cell types, for example, T cells
114 115
 selected_marker_genes_T = c("CD4", "CD8A", "CD8B")
115 116
 set.seed(123)
116
-classifier_t <- train_classifier(train_obj = tirosh_mel80_example, 
117
-marker_genes = selected_marker_genes_T, cell_type = "T cells")
117
+classifier_t <- train_classifier(train_obj = tirosh_mel80_example,
118
+assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_T, 
119
+cell_type = "T cells", tag_slot = 'active.ident')
118 120
 
119 121
 # create a list of classifiers
120 122
 classifier_ls <- list(classifier_b, classifier_t)
... ...
@@ -27,8 +27,9 @@ data("tirosh_mel80_example")
27 27
 # train a classifier
28 28
 set.seed(123)
29 29
 selected_marker_genes_T = c("CD4", "CD8A", "CD8B")
30
-classifier_t <- train_classifier(train_obj = tirosh_mel80_example, 
31
-marker_genes = selected_marker_genes_T, cell_type = "t cells")
30
+classifier_t <- train_classifier(train_obj = tirosh_mel80_example,
31
+assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_T, 
32
+cell_type = "t cells", tag_slot = 'active.ident')
32 33
 
33 34
 # save a classifier to system
34 35
 save_new_model(new_model = classifier_t, path_to_models = tempdir())
... ...
@@ -13,20 +13,20 @@
13 13
 \alias{caret_model<-,scAnnotatR-method}
14 14
 \alias{marker_genes<-}
15 15
 \alias{marker_genes<-,scAnnotatR-method}
16
-\alias{train_classifier_func}
17
-\alias{test_classifier_func}
16
+\alias{train_classifier_seurat}
17
+\alias{train_classifier_sce}
18
+\alias{train_classifier_from_mat}
19
+\alias{test_classifier_seurat}
20
+\alias{test_classifier_sce}
21
+\alias{test_classifier_from_mat}
18 22
 \alias{balance_dataset}
19 23
 \alias{train_func}
20 24
 \alias{transform_to_zscore}
21 25
 \alias{select_marker_genes}
22 26
 \alias{check_parent_child_coherence}
23
-\alias{check_parent_child_coherence,dgCMatrix,vector-method}
24 27
 \alias{filter_cells}
25
-\alias{filter_cells,dgCMatrix,vector-method}
26 28
 \alias{construct_tag_vect}
27
-\alias{construct_tag_vect,vector-method}
28 29
 \alias{process_parent_classifier}
29
-\alias{process_parent_classifier,dgCMatrix-method}
30 30
 \alias{make_prediction}
31 31
 \alias{simplify_prediction}
32 32
 \alias{verify_parent}
... ...
@@ -60,7 +60,34 @@ marker_genes(classifier) <- value
60 60
 
61 61
 \S4method{marker_genes}{scAnnotatR}(classifier) <- value
62 62
 
63
-train_classifier_func(
63
+train_classifier_seurat(
64
+  train_obj,
65
+  cell_type,
66
+  marker_genes,
67
+  parent_cell = NA_character_,
68
+  parent_classifier = NULL,
69
+  path_to_models = "default",
70
+  zscore = TRUE,
71
+  seurat_tag_slot,
72
+  seurat_parent_tag_slot = "predicted_cell_type",
73
+  seurat_assay,
74
+  seurat_slot
75
+)
76
+
77
+train_classifier_sce(
78
+  train_obj,
79
+  cell_type,
80
+  marker_genes,
81
+  parent_cell = NA_character_,
82
+  parent_classifier = NULL,
83
+  path_to_models = "default",
84
+  zscore = TRUE,
85
+  sce_tag_slot,
86
+  sce_parent_tag_slot = "predicted_cell_type",
87
+  sce_assay
88
+)
89
+
90
+train_classifier_from_mat(
64 91
   mat,
65 92
   tag,
66 93
   cell_type,
... ...
@@ -72,7 +99,32 @@ train_classifier_func(
72 99
   zscore
73 100
 )
74 101
 
75
-test_classifier_func(
102
+test_classifier_seurat(
103
+  test_obj,
104
+  classifier,
105
+  target_cell_type = NULL,
106
+  parent_classifier = NULL,
107
+  path_to_models = "default",
108
+  zscore = TRUE,
109
+  seurat_tag_slot,
110
+  seurat_parent_tag_slot = "predicted_cell_type",
111
+  seurat_assay,
112
+  seurat_slot
113
+)
114
+
115
+test_classifier_sce(
116
+  test_obj,
117
+  classifier,
118
+  target_cell_type = NULL,
119
+  parent_classifier = NULL,
120
+  path_to_models = "default",
121
+  zscore = TRUE,
122
+  sce_tag_slot,
123
+  sce_parent_tag_slot = "predicted_cell_type",
124
+  sce_assay
125
+)
126
+
127
+test_classifier_from_mat(
76 128
   mat,
77 129
   tag,
78 130
   classifier,
... ...
@@ -100,39 +152,17 @@ check_parent_child_coherence(
100 152
   target_cell_type
101 153
 )
102 154
 
103
-\S4method{check_parent_child_coherence}{dgCMatrix,vector}(
104
-  mat,
105
-  tag,
106
-  pos_parent,
107
-  parent_cell,
108
-  cell_type,
109
-  target_cell_type
110
-)
111
-
112 155
 filter_cells(mat, tag)
113 156
 
114
-\S4method{filter_cells}{dgCMatrix,vector}(mat, tag)
115
-
116 157
 construct_tag_vect(tag, cell_type)
117 158
 
118
-\S4method{construct_tag_vect}{vector}(tag, cell_type)
119
-
120 159
 process_parent_classifier(
121 160
   mat,
122 161
   parent_tag,
123 162
   parent_cell_type,
124 163
   parent_classifier,
125 164
   path_to_models,
126
-  zscore = TRUE
127
-)
128
-
129
-\S4method{process_parent_classifier}{dgCMatrix}(
130
-  mat,
131
-  parent_tag,
132
-  parent_cell_type,
133
-  parent_classifier,
134
-  path_to_models,
135
-  zscore = TRUE
165
+  zscore
136 166
 )
137 167
 
138 168
 make_prediction(mat, classifier, pred_cells, ignore_ambiguous_result = TRUE)
... ...
@@ -166,12 +196,7 @@ download_data_file(verbose = FALSE)
166 196
 
167 197
 \item{value}{the new classifier}
168 198
 
169
-\item{mat}{expression matrix}
170
-
171
-\item{tag}{tag of data}
172
-
173
-\item{parent_tag}{vector, named list indicating pre-assigned/predicted 
174
-parent cell type}
199
+\item{train_obj}{SCE object}
175 200
 
176 201
 \item{parent_cell}{name of parent cell type}
177 202
 
... ...
@@ -183,6 +208,46 @@ to classification model for the parent cell type}
183 208
 \item{zscore}{boolean indicating the transformation of gene expression 
184 209
 in object to zscore or not}
185 210
 
211
+\item{seurat_tag_slot}{string, name of annotation slot 
212
+indicating cell tag/label in the testing object.
213
+Strings indicating cell types are expected in this slot. 
214
+Expected values are string (A-Z, a-z, 0-9, no special character accepted) 
215
+or binary/logical, 0/"no"/F/FALSE: not being new cell type, 
216
+1/"yes"/T/TRUE: being new cell type.}
217
+
218
+\item{seurat_parent_tag_slot}{string, name of tag slot in cell meta data
219
+indicating pre-assigned/predicted parent cell type. 
220
+Default field is "predicted_cell_type".
221
+The slot must contain only string values.}
222
+
223
+\item{seurat_assay}{name of assay to use in test_object}
224
+
225
+\item{seurat_slot}{type of expression data to use in test_object. 
226
+For Seurat object, some available types are: "counts", "data" and "scale.data".}
227
+
228
+\item{sce_tag_slot}{string, name of annotation slot 
229
+indicating cell tag/label in the testing object.
230
+Strings indicating cell types are expected in this slot. 
231
+Expected values are string (A-Z, a-z, 0-9, no special character accepted) 
232
+or binary/logical, 0/"no"/F/FALSE: not being new cell type, 
233
+1/"yes"/T/TRUE: being new cell type.}
234
+
235
+\item{sce_parent_tag_slot}{string, name of tag slot in cell meta data
236
+indicating pre-assigned/predicted parent cell type. 
237
+Default field is "predicted_cell_type".
238
+The slot must contain only string values.}
239
+
240
+\item{sce_assay}{name of assay to use in test_object}
241
+
242
+\item{mat}{expression matrix}
243
+
244
+\item{tag}{tag of data}
245
+
246
+\item{parent_tag}{vector, named list indicating pre-assigned/predicted 
247
+parent cell type}
248
+
249
+\item{test_obj}{SCE object used for testing}
250
+
186 251
 \item{target_cell_type}{alternative cell types (in case of testing classifier)}
187 252
 
188 253
 \item{pos_parent}{a vector indicating parent classifier prediction}
... ...
@@ -230,8 +295,20 @@ the classifier with the new marker genes
230 295
 
231 296
 scAnnotatR object with the new marker genes.
232 297
 
298
+\code{\link{scAnnotatR}} object
299
+
300
+\code{\link{scAnnotatR}} object
301
+
233 302
 caret trained model
234 303
 
304
+result of testing process in form of a list, 
305
+including predicted values, prediction accuracy at a probability threshold, 
306
+and roc curve information.
307
+
308
+result of testing process in form of a list, 
309
+including predicted values, prediction accuracy at a probability threshold, 
310
+and roc curve information.
311
+
235 312
 model performance statistics
236 313
 
237 314
 a list of balanced count matrix
... ...
@@ -266,10 +343,22 @@ path to the downloaded file in cache
266 343
 \description{
267 344
 Check if a scAnnotatR object is valid
268 345
 
346
+Train a classifier for a new cell type 
347
+If cell type has a parent, only available for \code{\link{scAnnotatR}}
348
+object as parent cell classifying model.
349
+
350
+Train a classifier for a new cell type 
351
+If cell type has a parent, only available for \code{\link{scAnnotatR}}
352
+object as parent cell classifying model.
353
+
269 354
 Train a classifier for a new cell type from expression matrix
270 355
 and tag 
271 356
 If cell type has a parent, only available for \code{\link{scAnnotatR}}
272 357
 object as parent cell classifying model.
273 358
 
359
+Testing process when test object is of type Seurat
360
+
361
+Testing process when test object is of type SCE
362
+
274 363
 Testing process from matrix and tag
275 364
 }
... ...
@@ -19,8 +19,9 @@ Returns the set of marker genes for the given classifier.
19 19
 data("tirosh_mel80_example")
20 20
 selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
21 21
 set.seed(123)
22
-classifier_b <- train_classifier(train_obj = tirosh_mel80_example, 
23
-marker_genes = selected_marker_genes_B, cell_type = "B cells")
22
+classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
23
+assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, 
24
+cell_type = "B cells", tag_slot = 'active.ident')
24 25
 marker_genes(classifier_b)
25 26
 
26 27
 }
... ...
@@ -27,17 +27,20 @@ Returns the probability threshold for the given classifier.
27 27
 data("tirosh_mel80_example")
28 28
 selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
29 29
 set.seed(123)
30
-classifier_b <- train_classifier(train_obj = tirosh_mel80_example, 
31
-marker_genes = selected_marker_genes_B, cell_type = "B cells")
30
+classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
31
+assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, 
32
+cell_type = "B cells", tag_slot = 'active.ident')
32 33
 p_thres(classifier_b)
33 34
 
34 35
 data("tirosh_mel80_example")
35 36
 selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
36 37
 set.seed(123)
37
-classifier_b <- train_classifier(train_obj = tirosh_mel80_example, 
38
-marker_genes = selected_marker_genes_B, cell_type = "B cells")
39
-classifier_b_test <- test_classifier(test_obj = tirosh_mel80_example, 
40
-classifier = classifier_b)
38
+classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
39
+assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, 
40
+cell_type = "B cells", tag_slot = 'active.ident')
41
+classifier_b_test <- test_classifier(classifier = classifier_b, 
42
+test_obj = tirosh_mel80_example, assay = 'RNA', slot = 'counts', 
43
+tag_slot = 'active.ident')
41 44
 # assign a new threhold probability for prediction
42 45
 p_thres(classifier_b) <- 0.4
43 46
 }
... ...
@@ -19,8 +19,9 @@ Returns the parent of the cell type corresponding to the given classifier.
19 19
 data("tirosh_mel80_example")
20 20
 selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
21 21
 set.seed(123)
22
-classifier_b <- train_classifier(train_obj = tirosh_mel80_example, 
23
-marker_genes = selected_marker_genes_B, cell_type = "B cells")
22
+classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
23
+assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, 
24
+cell_type = "B cells", tag_slot = 'active.ident')
24 25
 parent(classifier_b)
25 26
 
26 27
 }
... ...
@@ -22,11 +22,13 @@ data("tirosh_mel80_example")
22 22
 # train a classifier, for ex: B cell
23 23
 selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
24 24
 set.seed(123)
25
-classifier_b <- train_classifier(train_obj = tirosh_mel80_example, 
26
-marker_genes = selected_marker_genes_B, cell_type = "B cells")
25
+classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
26
+assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, 
27
+cell_type = "b cells", tag_slot = 'active.ident')
27 28
 
28
-classifier_b_test <- test_classifier(test_obj = tirosh_mel80_example, 
29
-classifier = classifier_b)
29
+classifier_b_test <- test_classifier(classifier = classifier_b, 
30
+test_obj = tirosh_mel80_example, assay = 'RNA', slot = 'counts', 
31
+tag_slot = 'active.ident', target_cell_type = c("B cell"))
30 32
 
31 33
 # run plot curve on the test result
32 34
 roc_curve <- plot_roc_curve(test_result = classifier_b_test)
... ...
@@ -33,8 +33,9 @@ data("tirosh_mel80_example")
33 33
 # train classifier
34 34
 selected_marker_genes_T = c("CD4", "CD8A", "CD8B")
35 35
 set.seed(123)
36
-classifier_t <- train_classifier(train_obj = tirosh_mel80_example, 
37
-marker_genes = selected_marker_genes_T, cell_type = "t cells")
36
+classifier_t <- train_classifier(train_obj = tirosh_mel80_example,
37
+assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_T, 
38
+cell_type = "t cells", tag_slot = 'active.ident')
38 39
 
39 40
 # save the trained classifier to system 
40 41
 # test classifier can be used before this step
... ...
@@ -50,9 +50,9 @@ data("tirosh_mel80_example")
50 50
 # train a classifier, for ex: B cell
51 51
 selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
52 52
 set.seed(123)
53
-classifier_b <- train_classifier(train_obj = tirosh_mel80_example, 
54
-                          marker_genes = selected_marker_genes_B, 
55
-                          cell_type = "B cells")
53
+classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
54
+assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, 
55
+cell_type = "B cells", tag_slot = 'active.ident')
56 56
 
57 57
 classifier_b
58 58
 }
... ...
@@ -19,8 +19,9 @@ Show object
19 19
 data("tirosh_mel80_example")
20 20
 selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
21 21
 set.seed(123)
22
-classifier_b <- train_classifier(train_obj = tirosh_mel80_example, 
23
-marker_genes = selected_marker_genes_B, cell_type = "B cells")
22
+classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
23
+assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, 
24
+cell_type = "B cells", tag_slot = 'active.ident')
24 25
 classifier_b
25 26
 
26 27
 }
... ...
@@ -2,51 +2,52 @@
2 2
 % Please edit documentation in R/classifier.R
3 3
 \name{test_classifier}
4 4
 \alias{test_classifier}
5
-\alias{test_classifier,Seurat,scAnnotatR-method}
6
-\alias{test_classifier,SingleCellExperiment,scAnnotatR-method}
5
+\alias{test_classifier,scAnnotatR-method}
7 6
 \title{Testing process.}
8 7
 \usage{
9 8
 test_classifier(
10
-  test_obj,
11 9
   classifier,
12
-  target_cell_type = NULL,
13
-  parent_classifier = NULL,
14
-  path_to_models = "default",
15
-  zscore = TRUE,
16
-  ...
17
-)
18
-
19
-\S4method{test_classifier}{Seurat,scAnnotatR}(
20 10
   test_obj,
21
-  classifier,
11
+  assay,
12
+  slot = NULL,
13
+  tag_slot,
22 14
   target_cell_type = NULL,
23 15
   parent_classifier = NULL,
16
+  parent_tag_slot = "predicted_cell_type",
24 17
   path_to_models = "default",
25
-  zscore = TRUE,
26
-  seurat_tag_slot = "active.ident",
27
-  seurat_parent_tag_slot = "predicted_cell_type",
28
-  seurat_assay = "RNA",
29
-  seurat_slot = "counts",
30
-  ...
18
+  zscore = TRUE
31 19
 )
32 20
 
33
-\S4method{test_classifier}{SingleCellExperiment,scAnnotatR}(
34
-  test_obj,
21
+\S4method{test_classifier}{scAnnotatR}(
35 22
   classifier,
23
+  test_obj,
24
+  assay,
25
+  slot = NULL,
26
+  tag_slot,
36 27
   target_cell_type = NULL,
37 28
   parent_classifier = NULL,
29
+  parent_tag_slot = "predicted_cell_type",
38 30
   path_to_models = "default",
39
-  zscore = TRUE,
40
-  sce_tag_slot = "ident",
41
-  sce_parent_tag_slot = "predicted_cell_type",
42
-  sce_assay = "logcounts",
43
-  ...
31
+  zscore = TRUE
44 32
 )
45 33
 }
46 34
 \arguments{
47
-\item{test_obj}{xxobject that can be used for testing}
35
+\item{classifier}{scAnnotatR classification model}
36
+
37
+\item{test_obj}{object that can be used for testing}
38
+
39
+\item{assay}{name of assay to use in test_object}
48 40
 
49
-\item{classifier}{classification model}
41
+\item{slot}{type of expression data to use in test_object. 
42
+For Seurat object, some available types are: "counts", "data" and "scale.data".
43
+Ignore this if test_obj is \code{\link{SingleCellExperiment}} object.}
44
+
45
+\item{tag_slot}{string, name of annotation slot 
46
+indicating cell tag/label in the testing object.
47
+Strings indicating cell types are expected in this slot. 
48
+Expected values are string (A-Z, a-z, 0-9, no special character accepted) 
49
+or binary/logical, 0/"no"/F/FALSE: not being new cell type, 
50
+1/"yes"/T/TRUE: being new cell type.}
50 51
 
51 52
 \item{target_cell_type}{vector indicating other cell types than cell labels 
52 53
 that can be considered as the main cell type in classifier, 
... ...
@@ -56,51 +57,17 @@ Default as NULL.}
56 57
 \item{parent_classifier}{\code{\link{scAnnotatR}} object
57 58
 corresponding to classification model for the parent cell type}
58 59
 
60
+\item{parent_tag_slot}{string, name of tag slot in cell meta data
61
+indicating pre-assigned/predicted parent cell type. 
62
+Default field is "predicted_cell_type".
63
+The slot must contain only string values.}
64
+
59 65
 \item{path_to_models}{path to the folder containing the list of models. 
60 66
 As default, the pretrained models in the package will be used. 
61 67
 If user has trained new models, indicate the folder containing 
62 68
 the new_models.rda file.}
63 69
 
64 70
 \item{zscore}{boolean, whether gene expression is transformed to zscore}
65
-
66
-\item{...}{arguments passed to other methods}
67
-
68
-\item{seurat_tag_slot}{string, name of annotation slot 
69
-indicating cell tag/label in the testing object.
70
-Strings indicating cell types are expected in this slot. 
71
-For \code{\link{Seurat}} object, default value is "active.ident". 
72
-Expected values are string (A-Z, a-z, 0-9, no special character accepted) 
73
-or binary/logical, 0/"no"/F/FALSE: not being new cell type, 
74
-1/"yes"/T/TRUE: being new cell type.}
75
-
76
-\item{seurat_parent_tag_slot}{string, name of tag slot in cell meta data
77
-indicating pre-assigned/predicted parent cell type. 
78
-Default field is "predicted_cell_type".
79
-The slot must contain only string values.}
80
-
81
-\item{seurat_assay}{name of assay to use in 
82
-\code{\link{Seurat}} object, defaults to 'RNA' assay.}
83
-
84
-\item{seurat_slot}{type of expression data to use in 
85
-\code{\link{Seurat}} object. 
86
-Some available types are: "counts", "data" and "scale.data". 
87
-Default to "counts", which contains unnormalized data.}
88
-
89
-\item{sce_tag_slot}{string, name of annotation slot 
90
-indicating cell tag/label in the testing object.
91
-Strings indicating cell types are expected in this slot. 
92
-Default value is "ident".  
93
-Expected values are string (A-Z, a-z, 0-9, no special character accepted) 
94
-or binary/logical, 0/"no"/F/FALSE: not being new cell type, 
95
-1/"yes"/T/TRUE: being new cell type.}
96
-
97
-\item{sce_parent_tag_slot}{string, name of tag slot in cell meta data
98
-indicating pre-assigned/predicted parent cell type. 
99
-Default is "predicted_cell_type".
100
-The slot must contain only string values.}
101
-
102
-\item{sce_assay}{name of assay to use in \code{\link{SingleCellExperiment}}
103
-object, defaults to 'logcounts' assay.}
104 71
 }
105 72
 \value{
106 73
 result of testing process in form of a list, 
... ...
@@ -125,13 +92,15 @@ data("tirosh_mel80_example")
125 92
 # train the classifier
126 93
 selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
127 94
 set.seed(123)
128
-classifier_b <- train_classifier(train_obj = tirosh_mel80_example, 
129
-marker_genes = selected_marker_genes_B, cell_type = "B cells")
95
+classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
96
+assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, 
97
+cell_type = "b cells", tag_slot = 'active.ident')
130 98
 
131 99
 # test the classifier, target cell type can be in other formats or
132 100
 # alternative cell type that can be considered as the classified cell type 
133
-classifier_b_test <- test_classifier(test_obj = tirosh_mel80_example, 
134
-classifier = classifier_b, target_cell_type = c("B cell"))
101
+classifier_b_test <- test_classifier(classifier = classifier_b, 
102
+test_obj = tirosh_mel80_example, assay = 'RNA', slot = 'counts', 
103
+tag_slot = 'active.ident', target_cell_type = c("B cell"))
135 104
 classifier_b_test
136 105
 
137 106
 }
... ...
@@ -2,81 +2,43 @@
2 2
 % Please edit documentation in R/classifier.R
3 3
 \name{train_classifier}
4 4
 \alias{train_classifier}
5
-\alias{train_classifier,Seurat-method}
6
-\alias{train_classifier,SingleCellExperiment-method}
7 5
 \title{Train cell type classifier}
8 6
 \usage{
9 7
 train_classifier(
10 8
   train_obj,
9
+  assay,
10
+  slot = NULL,
11 11
   cell_type,
12 12
   marker_genes,
13
+  tag_slot,
13 14
   parent_cell = NA_character_,
15
+  parent_tag_slot = "predicted_cell_type",
14 16
   parent_classifier = NULL,
15 17
   path_to_models = "default",
16
-  zscore = TRUE,
17
-  ...
18
-)
19
-
20
-\S4method{train_classifier}{Seurat}(
21
-  train_obj,
22
-  cell_type,
23
-  marker_genes,
24
-  parent_cell = NA_character_,
25
-  parent_classifier = NULL,
26
-  path_to_models = "default",
27
-  zscore = TRUE,
28
-  seurat_tag_slot = "active.ident",
29
-  seurat_parent_tag_slot = "predicted_cell_type",
30
-  seurat_assay = "RNA",
31
-  seurat_slot = "counts",
32
-  ...
33
-)
34
-
35
-\S4method{train_classifier}{SingleCellExperiment}(
36
-  train_obj,
37
-  cell_type,
38
-  marker_genes,
39
-  parent_cell = NA_character_,
40
-  parent_classifier = NULL,
41
-  path_to_models = "default",
42
-  zscore = TRUE,
43
-  sce_tag_slot = "ident",
44
-  sce_parent_tag_slot = "predicted_cell_type",
45
-  sce_assay = "logcounts",
46
-  ...
18
+  zscore = TRUE
47 19
 )
48 20
 }
49 21
 \arguments{
50 22
 \item{train_obj}{object that can be used for training the new model. 
51 23
 \code{\link{Seurat}} object or \code{\link{SingleCellExperiment}} object
52
-is expected.
24
+is supported.
53 25
 If the training model has parent, parent_tag_slot may have been indicated. 
54 26
 This field would have been filled out automatically 
55 27
 if user precedently run classify_cells function. 
56 28
 If no (predicted) cell type annotation provided, 
57 29
 the function can be run if 1- parent_cell or 2- parent_classifier is provided.}
58 30
 
31
+\item{assay}{name of assay to use in training object.}
32
+
33
+\item{slot}{type of expression data to use in training object, omitted if 
34
+train_obj is \code{\link{SingleCellExperiment}} object.}
35
+
59 36
 \item{cell_type}{string indicating the name of the subtype
60 37
 This must exactly match cell tag/label if cell tag/label is a string.}
61 38
 
62 39
 \item{marker_genes}{list of marker genes used for the new training model}
63 40
 
64
-\item{parent_cell}{string indicated the name of the parent cell type, 
65
-if parent cell type classifier has already been saved in model database.
66
-Adjust path_to_models for exact database.}
67
-
68
-\item{parent_classifier}{classification model for the parent cell type}
69
-
70
-\item{path_to_models}{path to the folder containing the model database. 
71
-As default, the pretrained models in the package will be used. 
72
-If user has trained new models, indicate the folder containing the 
73
-new_models.rda file.}
74
-
75
-\item{zscore}{whether gene expression in train_obj is transformed to zscore}
76
-
77
-\item{...}{arguments passed to other methods}
78
-
79
-\item{seurat_tag_slot}{string, name of slot in cell meta data 
41
+\item{tag_slot}{string, name of slot in cell meta data 
80 42
 indicating cell tag/label in the training object.
81 43
 Strings indicating cell types are expected in this slot.
82 44
 For \code{\link{Seurat}} object, default value is "active.ident".  
... ...
@@ -84,35 +46,24 @@ Expected values are string (A-Z, a-z, 0-9, no special character accepted)
84 46
 or binary/logical, 0/"no"/F/FALSE: not being new cell type, 
85 47
 1/"yes"/T/TRUE: being new cell type.}
86 48
 
87
-\item{seurat_parent_tag_slot}{string, name of a slot in cell meta data 
49
+\item{parent_cell}{string indicated the name of the parent cell type, 
50
+if parent cell type classifier has already been saved in model database.
51
+Adjust path_to_models for exact database.}
52
+
53
+\item{parent_tag_slot}{string, name of a slot in cell meta data 
88 54
 indicating assigned/predicted cell type. Default is "predicted_cell_type". 
89 55
 This slot would have been filled automatically 
90 56
 if user have called classify_cells function.
91 57
 The slot must contain only string values.}
92 58
 
93
-\item{seurat_assay}{name of assay to use in training object. 
94
-Default to 'RNA' assay.}
95
-
96
-\item{seurat_slot}{type of expression data to use in training object. 
97
-For \code{\link{Seurat}} object, available types are: "counts", "data" 
98
-and "scale.data". Default to "counts", which contains unnormalized data.}
99
-
100
-\item{sce_tag_slot}{string, name of annotation slot indicating 
101
-cell tag/label in the training object.
102
-For \code{\link{SingleCellExperiment}} object, default value is "ident".  
103
-Expected values are string (A-Z, a-z, 0-9, no special character accepted) 
104
-or binary/logical, 0/"no"/F/FALSE: not being new cell type, 
105
-1/"yes"/T/TRUE: being new cell type.}
59
+\item{parent_classifier}{classification model for the parent cell type}
106 60
 
107
-\item{sce_parent_tag_slot}{string, name of a slot in cell meta data 
108
-indicating pre-assigned/predicted cell type. 
109
-Default field is "predicted_cell_type".
110
-This field would have been filled automatically 
111
-when user called classify_cells function. 
112
-The slot must contain only string values.}
61
+\item{path_to_models}{path to the folder containing the model database. 
62
+As default, the pretrained models in the package will be used. 
63
+If user has trained new models, indicate the folder containing the 
64
+new_models.rda file.}
113 65
 
114
-\item{sce_assay}{name of assay to use in training object. 
115
-Default to 'logcounts' assay.}
66
+\item{zscore}{whether gene expression in train_obj is transformed to zscore}
116 67
 }
117 68
 \value{
118 69
 \code{\link{scAnnotatR}} object
... ...
@@ -143,8 +94,9 @@ selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
143 94
 # train the classifier, the "cell_type" argument must match 
144 95
 # the cell labels in the data, except upper/lower case
145 96
 set.seed(123)
146
-classifier_b <- train_classifier(train_obj = tirosh_mel80_example, 
147
-marker_genes = selected_marker_genes_B, cell_type = "b cells")
97
+classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
98
+assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, 
99
+cell_type = "b cells", tag_slot = 'active.ident')
148 100
 
149 101
 # classify cell types using B cell classifier, 
150 102
 # a test classifier process may be used before applying the classifier 
... ...
@@ -163,7 +115,8 @@ p_marker_genes = c("SDC1", "CD19", "CD79A")
163 115
 # for the training process.
164 116
 set.seed(123)
165 117
 plasma_classifier <- train_classifier(train_obj = tirosh_mel80_example, 
166
-cell_type = "Plasma cell", marker_genes = p_marker_genes, 
167
-parent_classifier = classifier_b, seurat_tag_slot = 'plasma_cell_tag')
118
+assay = 'RNA', slot = 'counts', cell_type = 'Plasma cell', 
119
+marker_genes = p_marker_genes, tag_slot = 'plasma_cell_tag',
120
+parent_classifier = classifier_b)
168 121
 
169 122
 }
... ...
@@ -148,8 +148,8 @@ times for one model, users can use `set.seed`.
148 148
 ```{r}
149 149
 set.seed(123)
150 150
 classifier_B <- train_classifier(train_obj = train_set, cell_type = "B cells", 
151
-                          marker_genes = selected_marker_genes_B,
152
-                          sce_assay = 'counts', sce_tag_slot = 'B_cell')
151
+                                 marker_genes = selected_marker_genes_B,
152
+                                 assay = 'counts', tag_slot = 'B_cell')
153 153
 ```
154 154
 ```{r}
155 155
 classifier_B
... ...
@@ -169,8 +169,8 @@ The `test_classifier` model automatically tests a classifier's performance
169 169
 against another dataset. Here, we used the `test_set` created before:
170 170
 
171 171
 ```{r}
172
-classifier_B_test <- test_classifier(test_obj = test_set, classifier = classifier_B, 
173
-                              sce_assay = 'counts', sce_tag_slot = 'B_cell')
172
+classifier_B_test <- test_classifier(classifier = classifier_B, test_obj = test_set,  
173
+                                     assay = 'counts', tag_slot = 'B_cell')
174 174
 ```
175 175
 
176 176
 ### Interpreting test model result
... ...
@@ -178,7 +178,7 @@ Train the child classifier:
178 178
 set.seed(123)
179 179
 classifier_plasma <- train_classifier(train_obj = train_set, 
180 180
 marker_genes = selected_marker_genes_plasma, cell_type = "Plasma cells", 
181
-sce_assay = 'counts', sce_tag_slot = 'plasma', parent_classifier = classifier_B)
181
+assay = 'counts', tag_slot = 'plasma', parent_classifier = classifier_B)
182 182
 ```
183 183
 If the  cells classifier has not been loaded to the current working space, 
184 184
 an equivalent training process should be:
... ...
@@ -186,7 +186,7 @@ an equivalent training process should be:
186 186
 set.seed(123)
187 187
 classifier_plasma <- train_classifier(train_obj = train_set, 
188 188
 marker_genes = selected_marker_genes_plasma, cell_type = "Plasma cells", 
189
-sce_assay = 'counts', sce_tag_slot = 'plasma', parent_cell = 'B cells')
189
+assay = 'counts', tag_slot = 'plasma', parent_cell = 'B cells')
190 190
 ```
191 191
 ```{r}
192 192
 classifier_plasma
... ...
@@ -200,7 +200,7 @@ caret_model(classifier_plasma)
200 200
 The parent classifier must be also set in test method.
201 201
 ```{r}
202 202
 classifier_plasma_test <- test_classifier(test_obj = test_set, 
203
-classifier = classifier_plasma, sce_assay = 'counts', sce_tag_slot = 'plasma', 
203
+classifier = classifier_plasma, assay = 'counts', tag_slot = 'plasma', 
204 204
 parent_classifier = classifier_B)
205 205
 ```
206 206