Browse code

add warning when ambiguous cell types are found

Former-commit-id: 75b5a2865c4bcbfa231b4ebf4a5c49e805c8b5c4

nttvy authored on 30/05/2021 18:17:41
Showing 1 changed files

... ...
@@ -272,6 +272,8 @@ setMethod("check_parent_child_coherence", c("obj" = "SingleCellExperiment"),
272 272
 })
273 273
 
274 274
 #' Filter cells from ambiguous chars and non applicable cells
275
+#' Ambiguous characters includes: "/", ",", "-", "+", ".", "and", 
276
+#' "or", "(", ")", "ambiguous"
275 277
 #' 
276 278
 #' @param obj object
277 279
 #' @param tag_slot slot in cell meta data indicating cell type
... ...
@@ -290,8 +292,8 @@ setGeneric("filter_cells", function(obj, tag_slot)
290 292
 setMethod("filter_cells", c("obj" = "Seurat"), function(obj, tag_slot) {
291 293
   # define characters usually included in ambiguous cell types
292 294
   # this is to avoid considering ambiguous cell types as negative cell_type
293
-  ambiguous.chars <- c("/", ",", " -", " [+]", "[.]", "and", 
294
-                       "or", "[(]" ,"[)]", "ambiguous")
295
+  ambiguous.chars <- c("/", ",", " -", " [+]", "[.]", " and ", 
296
+                       " or ", "_or_", "-or-", "[(]" ,"[)]", "ambiguous")
295 297
   
296 298
   # only eliminate cell labels containing cell_type and ambiguous.chars
297 299
   if (tag_slot == "active.ident") {
... ...
@@ -311,7 +313,10 @@ setMethod("filter_cells", c("obj" = "Seurat"), function(obj, tag_slot) {
311 313
   n.applicable.cells <- 
312 314
     rownames(cell.tags[grepl("not applicable", cell.tags[, 1]) 
313 315
                        | is.na(cell.tags[, 1]),, drop = FALSE])
314
-  
316
+  if (any(ambiguous))
317
+    warning('Cell types containing "/", ",", "-", "+", ".", "and", "or", "(", 
318
+            ")", and "ambiguous" are considered as ambiguous. They are removed
319
+            from training and testing.', call. = FALSE, immediate. = TRUE)
315 320
   keeping.cells <- 
316 321
     colnames(obj)[!((colnames(obj) %in% ambiguous.cells) 
317 322
                     | colnames(obj) %in% n.applicable.cells)]
... ...
@@ -346,6 +351,10 @@ setMethod("filter_cells", c("obj" = "SingleCellExperiment"),
346 351
   ambiguous <- grepl(paste(ambiguous.chars, collapse="|"), cell.tags)
347 352
   n.applicable <- (grepl("not applicable", cell.tags) | is.na(cell.tags))
348 353
   
354
+  if (any(ambiguous))
355
+    warning('Cell types containing "/", ",", "-", "+", ".", "and", "or", "(", 
356
+            ")", and "ambiguous" are considered as ambiguous. They are removed
357
+            from training and testing.', call. = FALSE, immediate. = TRUE)
349 358
   obj <- obj[, !(ambiguous | n.applicable)]
350 359
   
351 360
   return(obj)