Browse code

solve issue caused by hypen (-) in feature symbols

Former-commit-id: ed6465c683ccfb56e96c7a980b59413817c3d98f

nttvy authored on 24/03/2021 19:58:03
Showing 3 changed files

... ...
@@ -440,6 +440,7 @@ parent <- function(classifier) {
440 440
     
441 441
     # set new features
442 442
     new_features <- labels(value$terms)
443
+    new_features <- gsub('_', '-', new_features) # convert underscore to hyphen if exists
443 444
     features(classifier) <- new_features
444 445
   } else {
445 446
     stop("Can only assign new classifier for a cell type that has no parent.
... ...
@@ -155,6 +155,9 @@ setMethod("train_classifier", c("train_obj" = "Seurat"),
155 155
   # transform list to factor
156 156
   train_tag <- factor(train_tag, levels = c('yes', 'no'))
157 157
   
158
+  # convert hyphen (-) by underscore (_)
159
+  colnames(mat) <- gsub('-', '_', colnames(mat))
160
+  
158 161
   # train
159 162
   clf <- train_func(mat, train_tag)
160 163
   
... ...
@@ -162,7 +165,9 @@ setMethod("train_classifier", c("train_obj" = "Seurat"),
162 165
   clf$resampledCM <- NULL 
163 166
   p_thres <- 0.5
164 167
   
165
-  object <- scClassifR(cell_type, clf, labels(clf$terms), p_thres, 
168
+  features <- labels(clf$terms)
169
+  features <- gsub('_', '-', features) # convert back underscore to hyphen
170
+  object <- scClassifR(cell_type, clf, features, p_thres, 
166 171
                              NA_character_)
167 172
   
168 173
   # only assign parent if pretrained model for parent cell type is avai
... ...
@@ -256,15 +261,19 @@ setMethod("train_classifier", c("train_obj" = "SingleCellExperiment"),
256 261
   # transform list to factor
257 262
   train_tag <- factor(train_tag, levels = c('yes', 'no'))
258 263
   
264
+  # convert hyphen (-) by underscore (_)
265
+  colnames(mat) <- gsub('-', '_', colnames(mat))
266
+  
259 267
   # train
260 268
   clf <- train_func(mat, train_tag)
261 269
   
262
-  
263 270
   # remove this info to reduce memory
264 271
   clf$resampledCM <- NULL 
265 272
   p_thres <- 0.5
266 273
   
267
-  object <- scClassifR(cell_type, clf, labels(clf$terms), p_thres, 
274
+  features <- labels(clf$terms)
275
+  features <- gsub('_', '-', features) # convert back underscore to hyphen
276
+  object <- scClassifR(cell_type, clf, features, p_thres, 
268 277
                              NA_character_)
269 278
   
270 279
   # only assign parent if pretrained model for parent cell type is avai
... ...
@@ -497,6 +497,9 @@ setMethod("process_parent_clf", c("obj" = "Seurat"),
497 497
         filtered_mat <- transform_to_zscore(filtered_mat)
498 498
       }
499 499
       
500
+      # to avoid problem triggered by '-' in gene names
501
+      colnames(filtered_mat) <- gsub('-', '_', colnames(filtered_mat))
502
+      
500 503
       # predict
501 504
       pred = stats::predict(clf(parent.clf), filtered_mat, type = "prob") %>% 
502 505
            dplyr::mutate('class' = apply(., 1, 
... ...
@@ -581,6 +584,9 @@ setMethod("process_parent_clf", c("obj" = "SingleCellExperiment"),
581 584
         filtered_mat <- transform_to_zscore(filtered_mat)
582 585
       }
583 586
       
587
+      # to avoid problem triggered by '-' in gene names
588
+      colnames(filtered_mat) <- gsub('-', '_', colnames(filtered_mat))
589
+      
584 590
       # predict
585 591
       pred = stats::predict(clf(parent.clf), filtered_mat, type = "prob") %>% 
586 592
         dplyr::mutate('class' = apply(., 1, 
... ...
@@ -626,6 +632,9 @@ make_prediction <- function(mat, classifier, pred_cells,
626 632
   . <- NULL
627 633
   cells <- names(pred_cells)
628 634
   
635
+  # to avoid problem triggered by '-' in gene names
636
+  colnames(mat) <- gsub('-', '_', colnames(mat))
637
+  
629 638
   # predict
630 639
   pred = stats::predict(clf(classifier), mat, type = "prob") %>%
631 640
     dplyr::mutate('class' = apply(., 1, function(x)
... ...
@@ -773,6 +782,10 @@ verify_parent <- function(mat, classifier, meta.data) {
773 782
 test_performance <- function(mat, classifier, tag) {
774 783
   overall.roc <- . <- NULL
775 784
   
785
+  # to avoid problem triggered by '-' in gene names
786
+  colnames(mat) <- gsub('-', '_', colnames(mat))
787
+  #labels(clf(classifier)$terms) <- gsub('-', '_', labels(clf(classifier)$terms))
788
+    
776 789
   tag <- unlist(lapply(tag, function(x) if (x == 'yes') {1} else {0}))
777 790
   
778 791
   iter <- unique(sort(c(p_thres(classifier), seq(0.1, 0.9, by = 0.1))))