Browse code

fix bugs + adapt vignettes to scRNAseq dataset

Former-commit-id: fa2385410e4cefdc694545daa48892d15ffe8921

nttvy authored on 14/11/2020 11:50:55
Showing 8 changed files

... ...
@@ -24,7 +24,7 @@ Imports:
24 24
     utils
25 25
 Suggests: 
26 26
     knitr,
27
-    SingleCellClassR.data,
27
+    scRNAseq,
28 28
     testthat
29 29
 VignetteBuilder: knitr
30 30
 Depends: R (>= 4.0), Seurat, SingleCellExperiment, SummarizedExperiment
... ...
@@ -170,7 +170,8 @@ setMethod("train_classifier", c("train_obj" = "Seurat"),
170 170
   clf$trainingData <- clf$resample <- clf$resampledCM <- NULL 
171 171
   p_thres <- 0.5
172 172
   
173
-  object <- SingleCellClassR(cell_type, clf, features, p_thres, NA_character_)
173
+  object <- SingleCellClassR(cell_type, clf, labels(clf$terms), p_thres, 
174
+                             NA_character_)
174 175
   
175 176
   # only assign parent if pretrained model for parent cell type is avai
176 177
   if ((!is.null(parent_process$parent.clf) 
... ...
@@ -266,7 +267,7 @@ setMethod("train_classifier", c("train_obj" = "SingleCellExperiment"),
266 267
   clf <- train_func(balance_ds$mat, balance_ds$tag)
267 268
   
268 269
   p_thres <- 0.5
269
-  object <- SingleCellClassR(cell_type, clf, features, p_thres, 
270
+  object <- SingleCellClassR(cell_type, clf, labels(clf$terms), p_thres, 
270 271
                              NA_character_)
271 272
   
272 273
   # only assign parent if pretrained model for parent cell type is avai
... ...
@@ -665,7 +666,7 @@ setMethod("classify_cells", c("classify_obj" = "Seurat"),
665 666
   
666 667
   if (any(pred_cells != "")) {
667 668
     pred_cells <- gsub("/$", "", pred_cells)
668
-  
669
+    
669 670
     # ignore ambiguous results
670 671
     if (ignore_ambiguous_result == TRUE) {
671 672
       pred_cells <- unlist(lapply(pred_cells, 
... ...
@@ -728,12 +729,16 @@ setMethod("classify_cells", c("classify_obj" = "SingleCellExperiment"),
728 729
     prediction <- make_prediction(filtered_mat, classifier, pred_cells, ignore_ambiguous_result)
729 730
     pred <- prediction$pred
730 731
     pred_cells <- prediction$pred_cells
731
-    
732 732
     # add prediction to meta data: 2 cols: p, class 
733 733
     for (colname in colnames(pred)) {
734
-      classify_obj[[colname]] <- pred[, colname, drop = FALSE]
734
+      classify_obj[[colname]] <- unlist(lapply(colnames(classify_obj), 
735
+                                               function(x) 
736
+                                                 if (x %in% rownames(pred)) {pred[x, colname]} 
737
+                                               else {NA}))
738
+      #pred[, colname, drop = FALSE, ]
735 739
     }
736 740
   }
741
+  
737 742
   if (any(pred_cells != "")) {
738 743
     pred_cells <- gsub("/$", "", pred_cells)
739 744
     
... ...
@@ -26,7 +26,7 @@
26 26
 #' @description Pretrained classifier obtained by training and testing on 
27 27
 #' The Tabula Muris Consortium.
28 28
 #' @docType data
29
-#' @usage default_models_n
29
+#' @usage default_models_m
30 30
 #' @format a list of \code{\link{SingleCellClassR}} objects
31 31
 #' @author Vy Nguyen, November 2020
32 32
 #' @keywords datasets
... ...
@@ -615,17 +615,16 @@ make_prediction <- function(mat, classifier, pred_cells, ignore_ambiguous_result
615 615
       if(x[1] >= classifier@p_thres) {"yes"} else {"no"}))
616 616
   rownames(pred) <- rownames(mat)
617 617
   
618
-  # append a sumary to whole predicted cell type
618
+  # append a summary to whole predicted cell type
619 619
   pred_cells <- unlist(lapply(cells,
620 620
   function(i)
621 621
     if (i %in% rownames(pred) && pred[i, "class"] == "yes") {
622 622
       if (ignore_ambiguous_result == TRUE && !is.na(classifier@parent) &&
623 623
           gsub("/", "", pred_cells[i]) == classifier@parent)
624
-      { classifier@cell_type }
624
+      { paste0(classifier@cell_type, "/") }
625 625
       else { paste0(pred_cells[i], classifier@cell_type, "/") }
626 626
     }
627 627
     else { pred_cells[i] }))
628
-  
629 628
   names(pred_cells) <- cells
630 629
 
631 630
   # remove no column and rename yes column to p
... ...
@@ -706,12 +705,13 @@ verify_parent <- function(mat, classifier, meta.data) {
706 705
   # parent clf, if avai, always has to be applied before children clf.
707 706
   parent_slot <- paste0(c(unlist(strsplit(classifier@parent, split = " ")), "class"), collapse = "_")
708 707
   if (parent_slot %in% colnames(meta.data)) {
709
-    parent_pred <- meta.data[, parent_slot, drop = FALSE]
710
-    pos_parent <- rownames(parent_pred[parent_pred == 'yes', , drop=FALSE]) 
708
+    parent_pred <- meta.data[, parent_slot]
709
+    pos_parent <- colnames(mat)[parent_pred == 'yes'] 
711 710
   } else {
712 711
     warning(paste0('Parent classifier of ', classifier@cell_type, 'cannot be applied.\n 
713 712
                    Please list/save parent classifier before child(ren) classifier.\n
714
-                   Skip applying classification models for ', classifier@cell_type, ' and its parent cell type.\n'), call. = FALSE, immediate. = TRUE)
713
+                   Skip applying classification models for ', classifier@cell_type, 
714
+                   ' and its parent cell type.\n'), call. = FALSE, immediate. = TRUE)
715 715
   }
716 716
   
717 717
   if (!is.null(pos_parent)) {
... ...
@@ -1 +1 @@
1
-c03fea4600edc3605663fbcfda019841a4dc9df3
2 1
\ No newline at end of file
2
+17113b58d0b1fd5d2f5297d30e533276223aca7a
3 3
\ No newline at end of file
... ...
@@ -8,7 +8,7 @@
8 8
 a list of \code{\link{SingleCellClassR}} objects
9 9
 }
10 10
 \usage{
11
-default_models_n
11
+default_models_m
12 12
 }
13 13
 \description{
14 14
 Pretrained classifier obtained by training and testing on 
... ...
@@ -29,41 +29,67 @@ any other cell type.
29 29
 
30 30
 ## Preparing train object and test object
31 31
 
32
-The workflow starts from a couple of Seurat objects where cells have been 
32
+The workflow starts from a couple of objects where cells have been 
33 33
 assigned to be different cell types. To do this, users may have annotated 
34
-scRNA-seq data (by a FACS-sorting process, for example), create a Seurat 
35
-object based on the sequencing data and assign the predetermined cell types 
36
-as Seurat meta data. If the scRNA-seq data has not been annotated yet, 
37
-another possible approach is to follow the basic Seurat workflow until 
38
-assigning cell type identity to clusters.
34
+scRNA-seq data (by a FACS-sorting process, for example), create a Seurat/
35
+SingleCellExperiment (SCE) object based on the sequencing data and assign the 
36
+predetermined cell types as cell meta data. If the scRNA-seq data has not 
37
+been annotated yet, another possible approach is to follow the basic 
38
+workflow (Seurat, for example) until assigning cell type identity to clusters.
39
+
40
+In this vignette, we use the human lung dataset from Zilionis et al., 2019,
41
+which is available in the scRNAseq (2.4.0) library. The dataset is stored as a
42
+SCE object.
39 43
 
40 44
 To start the training workflow, we first load the neccessary libraries. 
41 45
 ```{r}
42 46
 library(SingleCellClassR)
43
-library(SingleCellClassR.data)
47
+library(scRNAseq)
44 48
 ```
45 49
 
46
-One Seurat object will be used as train object, while the other is the test 
47
-object. In this example, we used Sade-Feldman dataset to create the train 
48
-object.
50
+Load the dataset:
49 51
 
50 52
 ```{r}
51
-data("feldman_seurat")
52
-feldman_seurat
53
+zilionis <- ZilionisLungData()
53 54
 ```
54 55
 
55
-We load Jerby-Arnon dataset for the testing object.
56
+We cut this dataset into two parts, one for the training and the other for the testing.
56 57
 ```{r}
57
-data("jerby_seurat")
58
-jerby_seurat
58
+pivot = ncol(zilionis)%/%2
59
+train_set <- zilionis[, 1:pivot]
60
+test_set <- zilionis[, (1+pivot):ncol(zilionis)]
59 61
 ```
60
-In our example, the cell type meta data is indicated as the active 
61
-identification of the Seurat object (in both train object and test 
62
-object). If cell type is stored in another slot of object meta data, 
62
+
63
+In this dataset, the cell type meta data is stored in the *Most likely LM22 cell type*
64
+slot of the SingleCellExperiment object (in both train object and test object). 
65
+If cell type is stored in another slot of object meta data, 
63 66
 the slot/tag slot name must be then provided as a parameter in the 
64 67
 train and test method. 
65 68
 ```{r}
66
-head(Idents(feldman_seurat))
69
+unique(train_set$`Most likely LM22 cell type`)
70
+```
71
+```{r}
72
+unique(test_set$`Most likely LM22 cell type`)
73
+```
74
+
75
+We want to train a classifier for B cells and their phenotypes. Considering memory B cells,
76
+naive B cells and plasma cells as B cell phenotypes, we convert all those cells to a uniform 
77
+cell label, ie. B cells. All non B cells are converted into 'others'.
78
+
79
+```{r}
80
+# change cell label
81
+train_set$B_cell <- unlist(lapply(train_set$`Most likely LM22 cell type`,
82
+                                  function(x) if (is.na(x)) {'ambiguous'} else if (x %in% c('Plasma cells', 'B cells memory', 'B cells naive')) {'B cells'} else {'others'}))
83
+
84
+test_set$B_cell <- unlist(lapply(test_set$`Most likely LM22 cell type`,
85
+                                 function(x) if (is.na(x)) {'ambiguous'} else if (x %in% c('Plasma cells', 'B cells memory', 'B cells naive')) {'B cells'} else {'others'}))
86
+```
87
+
88
+We observe that there are cells marked NAs. Those can be understood as 1/different from all indicated cell types or 2/any unknown cell types. Here we consider the second case, ie. we don't know whether they are positive or negative to B cells. To avoid the affect of those NAs cells, we can assign them as 'ambiguous'. All cells tagged 'ambiguous' will be ignored by SingleCellClassR from training and testing. 
89
+
90
+We may want to check the number of cells in each category:
91
+```{r}
92
+table(train_set$B_cell)
67 93
 ```
68 94
 
69 95
 ## Defining set of features
... ...
@@ -83,13 +109,13 @@ selected_features_B <- c("CD19", "MS4A1", "SDC1", "CD79A", "CD79B",
83 109
 ## Train model
84 110
 
85 111
 When the model is being trained, three most important information must be 
86
-provided are: the Seurat object used for training, the set of applied features
112
+provided are: the Seurat/SCE object used for training, the set of applied features
87 113
 and the cell type defining the trained model. 
88 114
 
89 115
 Cell type corresponding to the trained model must exist among identities 
90 116
 assigned to cells in the trained Seurat object. Remember if cell types 
91 117
 are not indicated as active identification of the trained object, name 
92
-of the tag slot in object meta data must be provided to the tag_slot parameter. 
118
+of the tag slot in object meta data must be provided to the sce_tag_slot parameter. 
93 119
 
94 120
 When training on a imbalanced dataset, the trained model may bias toward the 
95 121
 majority group and ignore the presence of the minority group. To avoid this, 
... ...
@@ -99,8 +125,8 @@ from the majority group. To use the same set of cells while training multiple
99 125
 times for one model, users can use set.seed. 
100 126
 ```{r}
101 127
 set.seed(123)
102
-clf_B <- train_classifier(train_obj = feldman_seurat, 
103
-features = selected_features_B, cell_type = "B cells")
128
+clf_B <- train_classifier(train_obj = train_set, cell_type = "B cells", features = selected_features_B,
129
+                          sce_assay = 'counts', sce_tag_slot = 'B_cell')
104 130
 ```
105 131
 ```{r}
106 132
 clf_B
... ...
@@ -115,7 +141,8 @@ clf(clf_B)
115 141
 ## Test model
116 142
 
117 143
 ```{r}
118
-clf_B_test <- test_classifier(test_obj = jerby_seurat, classifier = clf_B)
144
+clf_B_test <- test_classifier(test_obj = test_set, classifier = clf_B, 
145
+                              sce_assay = 'counts', sce_tag_slot = 'B_cell')
119 146
 ```
120 147
 
121 148
 ### Interpreting test model result
... ...
@@ -138,7 +165,7 @@ With the same classification model, the sensitivity and the specification of cla
138 165
 clf_B_test$overall_roc
139 166
 ```
140 167
 
141
-In this example of B cell classifier, the current threshold is at 0.5. The sensitivity is 0.9932203, and the specificity is 0.9890493 (FPR = 0.010950643). The higher sensitivity (0.9943503) can be reached if we set the p_thres at 0.4. However, we will have lower specificity (FPR = 0.013966037), which means that we misclassify more stranger cells as B cells. In contradiction, we may not retrieve all actual B cells with higher p_thres (0.6, for example).
168
+In this example of B cell classifier, the current threshold is at 0.5. The higher sensitivity can be reached if we set the p_thres at 0.4. However, we will then have lower specificity, which means that we misclassify more stranger cells as B cells. In contradiction, we may not retrieve all actual B cells with higher p_thres (0.6, for example).
142 169
 
143 170
 There is of course a certain trade-off between the sensitivity and the specificity of the model. Depending on the need of the project or the user-own preference, a probability threshold giving higher sensitivity or higher specificity can be chosen. In our perspective, p_thres at 0.5 is a good choice for the current B cell model.
144 171
 
... ...
@@ -50,33 +50,74 @@ clf_B <- default_models[['B cells']]
50 50
 clf_B
51 51
 ```
52 52
 ## Preparing train object and test object
53
+
53 54
 Same as training for basic models, training for a child model also requires 
54
-a train (Seurat) object and a test (Seurat) object. All Seurat objects must 
55
+a train (Seurat/SCE) object and a test (Seurat/SCE) object. All objects must 
55 56
 have a slot in meta data indicating the type of cells. Tag slot indicating 
56 57
 parent cell type can also be provided. In this case, parent cell type tag 
57 58
 will further be tested for coherence with the provided parent classifier.
59
+
60
+Cell tagged as child cell type but incoherent to parent cell type will be 
61
+removed from training and testing for the child cell type classifier. 
62
+
63
+In this vignette, we use the human lung dataset from Zilionis et al., 2019,
64
+which is available in the scRNAseq (2.4.0) library. The dataset is stored as a
65
+SCE object.
66
+
67
+To start the training workflow, we first load the neccessary libraries. 
58 68
 ```{r}
59
-library(SingleCellClassR.data)
60
-data("feldman_seurat")
69
+library(scRNAseq)
61 70
 ```
71
+
72
+Load the dataset:
73
+
62 74
 ```{r}
63
-# view train data
64
-feldman_seurat
75
+zilionis <- ZilionisLungData()
65 76
 ```
77
+
78
+We cut this dataset into two parts, one for the training and the other for the testing.
66 79
 ```{r}
67
-# tag slot indicating parent cell type, B cells
68
-head(Idents(feldman_seurat))
80
+pivot = ncol(zilionis)%/%2
81
+train_set <- zilionis[, 1:pivot]
82
+test_set <- zilionis[, (1+pivot):ncol(zilionis)]
69 83
 ```
84
+
85
+In this dataset, the cell type meta data is stored in the *Most likely LM22 cell type*
86
+slot of the SCE object (in both train object and test object). 
87
+If cell type is stored in another slot of object meta data, 
88
+the slot/tag slot name must be then provided as a parameter in the 
89
+train and test method. 
90
+
70 91
 ```{r}
71
-# tag slot indicating cell subset, plasma cells
72
-head(feldman_seurat[['Plasma_cells']])
92
+table(train_set$`Most likely LM22 cell type`)
73 93
 ```
74
-A second object for testing:
75 94
 ```{r}
76
-data("jerby_seurat")
95
+table(test_set$`Most likely LM22 cell type`)
96
+```
97
+
98
+Unlike the example of the training basic model, we will remove all NAs cells in order to reduce computationalc complexity.
77 99
 
78
-# view test data
79
-jerby_seurat
100
+```{r}
101
+# remove NAs cells
102
+train_set <- train_set[, !is.na(train_set$`Most likely LM22 cell type`)]
103
+test_set <- test_set[, !is.na(test_set$`Most likely LM22 cell type`)]
104
+```
105
+
106
+```{r}
107
+# convert cell label: 
108
+# 1 - positive to plasma cells, 
109
+# 0 - negative to plasma cells
110
+train_set$plasma <- unlist(lapply(train_set$`Most likely LM22 cell type`,
111
+                                  function(x) if (x == 'Plasma cells') {1} else {0}))
112
+
113
+test_set$plasma <- unlist(lapply(test_set$`Most likely LM22 cell type`,
114
+                                 function(x) if (x == 'Plasma cells') {1} else {0}))
115
+```
116
+
117
+We may want to check the number of cells in each category:
118
+```{r}
119
+table(train_set$plasma)
120
+# 1: plasma cells, 0: not plasma cells
80 121
 ```
81 122
 
82 123
 ## Defining set of features
... ...
@@ -109,17 +150,17 @@ path.to.models = '.'*
109 150
 Train the child classifier:
110 151
 ```{r}
111 152
 set.seed(123)
112
-clf_plasma <- train_classifier(train_obj = feldman_seurat, 
153
+clf_plasma <- train_classifier(train_obj = train_set, 
113 154
 features = selected_features_plasma, cell_type = "Plasma cells", 
114
-seurat_tag_slot = 'Plasma_cells', parent_clf = clf_B)
155
+sce_assay = 'counts', sce_tag_slot = 'plasma', parent_clf = clf_B)
115 156
 ```
116 157
 If B cells classifier has not been loaded to current working space, 
117 158
 an equivalent training process should be:
118 159
 ```{r}
119 160
 set.seed(123)
120
-clf_plasma <- train_classifier(train_obj = feldman_seurat, 
161
+clf_plasma <- train_classifier(train_obj = train_set, 
121 162
 features = selected_features_plasma, cell_type = "Plasma cells", 
122
-seurat_tag_slot = 'Plasma_cells', parent_cell = 'B cells')
163
+sce_assay = 'counts', sce_tag_slot = 'plasma', parent_cell = 'B cells')
123 164
 ```
124 165
 ```{r}
125 166
 clf_plasma
... ...
@@ -132,35 +173,17 @@ clf(clf_plasma)
132 173
 
133 174
 Parent classifier must be also indicated in test method.
134 175
 ```{r}
135
-clf_plasma_test <- test_classifier(test_obj = jerby_seurat, 
136
-classifier = clf_plasma, seurat_tag_slot = 'Plasma_cells', parent_clf = clf_B)
176
+clf_plasma_test <- test_classifier(test_obj = test_set, 
177
+classifier = clf_plasma, sce_assay = 'counts', sce_tag_slot = 'plasma', 
178
+parent_clf = clf_B)
137 179
 ```
138 180
 
139 181
 ### Interpreting test model result
182
+The test result obtained from a child model can be interpreted in the same way
183
+as we do with the model for basic cell types. We can change the prediction 
184
+probability threshold according to the research project or personal preference 
185
+and plot a roc curve.
140 186
 
141
-Same as testing a basic classification models, testing a child classifier also returns an object, which is a list of: *test_tag*, *pred*, *acc*, *auc*, *overall_roc*
142
-  
143
-The *overall_roc* is a summary of True Positive Rate (sensitivity) and False Positive Rate (1 - specificity) obtained by the trained model according to different thresholds:
144
-
145
-```{r}
146
-clf_plasma_test$overall_roc
147
-```
148
-
149
-We see that with the same TPR (*= 1.0*), the FPR increases if *p_thres* increases to *0.7*. Therefore, this is the better result as compare to the result produced by *p_thres = 0.5*. We change the prediction probability threshold as:
150
-
151
-```{r}
152
-p_thres(clf_plasma) <- 0.7
153
-```
154
-
155
-Hence the prediction result was changed as:
156
-```{r}
157
-clf_plasma_test <- test_classifier(test_obj = jerby_seurat, 
158
-classifier = clf_plasma, seurat_tag_slot = 'Plasma_cells', parent_clf = clf_B)
159
-```
160
-
161
-Now the specificity increased from *0.963759909399774* to *0.994337485843715* and with the same number of actual plasma cells (*65*), the number of predicted cells decreased from *97* to *70*.
162
-
163
-The ROC curve remains the same with the same AUC score:
164 187
 ```{r}
165 188
 print(clf_plasma_test$auc)
166 189
 roc_curve <- plot_roc_curve(test_result = clf_plasma_test)
... ...
@@ -172,11 +195,13 @@ plot(roc_curve)
172 195
 In order to save child classifier, parent classifier must have existed in the 
173 196
 classifier database, either in the package default database or in user-defined 
174 197
 database.
198
+
175 199
 ```{r}
176 200
 # see list of available model in package
177 201
 data("default_models")
178 202
 names(default_models)
179 203
 ```
204
+
180 205
 In our package, default models include already models classifying plasma cells.
181 206
 Therefore, we will save this model to a new local database specified by the 
182 207
 *path.to.models* parameter. If you start with a fresh new local database, 
... ...
@@ -195,61 +220,39 @@ save_new_model(new_model = clf_plasma, path.to.models = getwd(),
195 220
 
196 221
 When we save the B cells' classifier and the plasma cells' classifier, a local database is newly created. We can use this new database to classify cells in a Seurat or SingleCellExperiment object.
197 222
 
198
-Let's try to classify Jerby-Arnon dataset:
223
+Let's try to classify cells in the test set:
199 224
 ```{r}
200
-classified_jerby <- classify_cells(classify_obj = jerby_seurat, 
201
-                                   cell_types = 'all', path_to_models = getwd())
225
+classified <- classify_cells(classify_obj = test_set, sce_assay = 'counts', 
226
+                             cell_types = 'all', path_to_models = getwd())
202 227
 ```
203 228
 
204 229
 Using the *classify_cells()* function, we have to indicate exactly the repository containing the database that the models has recently been saved to. In the previous section, we saved our new models to the current working directory. 
205 230
 
206
-In the *classified_jerby* object, the classification process added new columns to the cell meta data, including the *predicted_cell_type* and *most_probable_cell_type* columns. Let's take a look at the original plasma cells tag:
207
-
208
-```{r}
209
-# get a summary of the plasma cells tag
210
-table(classified_jerby[['Plasma_cells']][,1])
211
-# 1 corresponds to cells positive to B cells and 0 corresponds to cells negative to B cells
212
-```
213
-65 cells (labeled 1) are plasma cells and 7121 cells are not plasma cells.
231
+In the *classified* object, the classification process added new columns to the cell meta data, including the *predicted_cell_type* and *most_probable_cell_type* columns. 
214 232
 
215 233
 If we use the full prediction to compare with actual plasma tag, we obtain this result: 
216 234
 ```{r}
217 235
 # compare the prediction with actual cell tag
218
-table(classified_jerby[['predicted_cell_type']][,1], classified_jerby[['Plasma_cells']][,1])
236
+table(classified$predicted_cell_type, classified$plasma)
237
+# plasma cell is child cell type of B cell
238
+# so of course, all predicted plasma cells are predicted B cells 
219 239
 ```
220 240
 
221
-We find that 65 actual plasma cells were assigned as B cells and as plasma cells at the same time. This is a reasonable result because in this example, we consider all plasma cells are B cells.
222
-
223
-However, comparing the actual tag with the most probable prediction, we obtain: 
241
+When comparing the actual tag with the most probable prediction, we obtain: 
224 242
 ```{r}
225 243
 # compare the prediction with actual cell tag
226
-table(classified_jerby[['most_probable_cell_type']][,1], classified_jerby[['Plasma_cells']][,1])
244
+table(classified$most_probable_cell_type, classified$plasma)
227 245
 ```
228 246
 
229
-This is a quite surprise result. Among the 65 actual plasma cells, only 1 was identified but 64 others were identified as B cells. In contradiction, the testing process of *clf_plasma* proved that this model could detect all plasma cells in the same dataset. Why this can happen? This contradictory can be explained.
230
-
231
-The *predicted_cell_type* takes all predictions having the probabilities satisfying the corresponding probability thresholds. In this case, SingleCellClassR takes all predicted B cells having probability from 0.5 and all predicted plasma cells having probability from 0.7. Meanwhile, the *most_probable_cell_type* takes only the cell type which gives highest prediction probability. Here, 64 predicted plasma cells were also predicted as B cells. However, they have a higher probability to be B cells than to be plasma cells. Therefore, they were classified as B cells in the *most_probable_cell_type*.
247
+The number of identified plasma cells is different in the *predicted_cell_type* slot and in the *most_probable_cell_type*. This is because the *predicted_cell_type* takes all predictions having the probabilities satisfying the corresponding probability thresholds. Meanwhile, the *most_probable_cell_type* takes only the cell type which gives highest prediction probability. 
232 248
 
233 249
 To have all plasma cells specified as plasma cells, we can set the *ignore_ambiguous_result* to TRUE. This option will actually hide all ambiguous prediction in case we have more distinct cell types. In the parent-chid(ren) relationship of cell types, the more specified cell types/phenotypes will be reported. Of course, we don't obtain the *most_probable_cell_type* in cell meta data.
234 250
 
235 251
 ```{r}
236
-classified_jerby <- classify_cells(classify_obj = jerby_seurat, 
237
-                                   cell_types = 'all', path_to_models = getwd(),
238
-                                   ignore_ambiguous_result = TRUE)
239
-table(classified_jerby[['predicted_cell_type']][,1], classified_jerby[['Plasma_cells']][,1])
240
-```
241
-
242
-To check the B cell classification with actual B cells, we can do as follows:
243
-
244
-```{r}
245
-classified_jerby <- classify_cells(classify_obj = jerby_seurat, 
246
-                                   cell_types = 'all', path_to_models = getwd())
247
-# convert B cells label to binaries to ignore details of other cell types
248
-label_B <- Idents(classified_jerby)
249
-label_B <- unlist(lapply(label_B, function(x) if (x == 'B cells') {1} else 0))
250
-
251
-# compare prediction with cell label
252
-table(classified_jerby[['most_probable_cell_type']][,1], label_B)
252
+classified <- classify_cells(classify_obj = test_set, sce_assay = 'counts',
253
+                             cell_types = 'all', path_to_models = getwd(),
254
+                             ignore_ambiguous_result = TRUE)
255
+table(classified$predicted_cell_type, classified$plasma)
253 256
 ```
254 257
 
255 258
 ## Session Info