Bioconductor Code: scAnnotatR

Raw Blame Patch Log History
#' Train cell type classifier
#' 
#' @description Train a classifier for a new cell type. 
#' If cell type has a parent, only available for \code{\link{scClassifR}}
#' object as parent cell classifying model.
#' 
#' @param train_obj object that can be used for training the new model. 
#' \code{\link{Seurat}} object or \code{\link{SingleCellExperiment}} object
#' is expected.
#' If the training model has parent, parent_tag_slot may have been indicated. 
#' This field would have been filled out automatically 
#' if user precedently run classify_cells function. 
#' If no (predicted) cell type annotation provided, 
#' the function can be run if 1- parent_cell or 2- parent_clf is provided.
#' @param cell_type string indicating the name of the subtype
#' This must exactly match cell tag/label if cell tag/label is a string.
#' @param features list of features used for the new training model
#' @param parent_cell string indicated the name of the parent cell type, 
#' if parent cell type classifier has already been saved in model database.
#' Adjust path_to_models for exact database.  
#' @param parent_clf classification model for the parent cell type
#' @param path_to_models path to the folder containing the model database. 
#' As default, the pretrained models in the package will be used. 
#' If user has trained new models, indicate the folder containing the 
#' new_models.rda file.
#' @param zscore whether gene expression in train_obj is transformed to zscore
#' @param ... arguments passed to other methods
#' 
#' @return \code{\link{scClassifR}} object
#'
#' @note Only one cell type is expected for each cell in object. 
#' Ambiguous cell type, such as: "T cells/NK cells/ILC", 
#' will be ignored from training.
#' Subtypes used in training model for parent cell types must be indicated
#' as parent cell type. For example, when training for B cells, 
#' plasma cells must be annotated as B cells in order to be used.
#' 
#' @export
setGeneric("train_classifier", 
           function(train_obj, cell_type, features, 
                    parent_cell = NA_character_, 
                    parent_clf = NULL, path_to_models = c("default", "."), 
                    zscore = TRUE, ...) 
             standardGeneric("train_classifier"))

#' @inherit train_classifier
#' 
#' @param seurat_tag_slot string, name of slot in cell meta data 
#' indicating cell tag/label in the training object.
#' Strings indicating cell types are expected in this slot. 
#' For \code{\link{Seurat}} object, default value is "active.ident".  
#' Expected values are string or binary/logical, 
#' 0/"no"/F/FALSE: not being new cell type, 1/"yes"/T/TRUE: being new cell type.
#' @param seurat_parent_tag_slot string, name of a slot in cell meta data 
#' indicating assigned/predicted cell type. Default is "predicted_cell_type". 
#' This slot would have been filled automatically 
#' if user have called classify_cells function.
#' The slot must contain only string values. 
#' @param seurat_assay name of assay to use in training object. 
#' Default to 'RNA' assay.
#' @param seurat_slot type of expression data to use in training object. 
#' For \code{\link{Seurat}} object, available types are: "counts", "data" 
#' and "scale.data". Default to "counts", which contains unnormalized data.
#' 
#' @examples
#' # load small example dataset
#' data("tirosh_mel80_example")
#' 
#' # this dataset already contains pre-defined cell labels
#' table(Seurat::Idents(tirosh_mel80_example))
#' 
#' # define genes to use to classify this cell type (B cells in this example)
#' selected_features_B = c("CD19", "MS4A1", "CD79A")
#' 
#' # train the classifier, the "cell_type" argument must match 
#' # the cell labels in the data, except upper/lower case
#' set.seed(123)
#' clf_b <- train_classifier(train_obj = tirosh_mel80_example, 
#' features = selected_features_B, cell_type = "b cells")
#' 
#' # classify cell types using B cell classifier, 
#' # a test classifier process may be used before applying the classifier 
#' tirosh_mel80_example <- classify_cells(classify_obj = tirosh_mel80_example, 
#' classifiers = c(clf_b))
#' 
#' # tag all cells that are plasma cells (random example here)
#' tirosh_mel80_example[['plasma_cell_tag']] <- c(rep(1, 80), rep(0, 400))
#' 
#' # set new features for the subtype
#' p_features = c("SDC1", "CD19", "CD79A")
#' 
#' # train the classifier, the "B cell" classifier is used as parent. 
#' # This means, only cells already classified as "B cells" will be evaluated.
#' # the "tag_slot" parameter tells the classifier to use this cell meta data
#' # for the training process.
#' set.seed(123)
#' plasma_clf <- train_classifier(train_obj = tirosh_mel80_example, 
#' cell_type = "Plasma cell", features = p_features, parent_clf = clf_b, 
#' seurat_tag_slot = 'plasma_cell_tag')
#' 
#' @importFrom Seurat GetAssayData
#' 
#' @rdname train_classifier
setMethod("train_classifier", c("train_obj" = "Seurat"), 
          function(train_obj, cell_type, features, parent_cell = NA_character_,
                   parent_clf = NULL, path_to_models = c("default", "."), 
                   zscore = TRUE, seurat_tag_slot = "active.ident", 
                   seurat_parent_tag_slot = "predicted_cell_type", 
                   seurat_assay = 'RNA', seurat_slot = 'counts', ...) {
  #--- part of parent cell type
  parent_process <- process_parent_clf(
    train_obj, seurat_parent_tag_slot, parent_cell, parent_clf, 
    path_to_models, zscore, seurat_assay, seurat_slot
  )
  
  # check parent-child coherence
  if (!is.null(parent_process$pos_parent)) {
    check_res <- check_parent_child_coherence(
      train_obj, parent_process$pos_parent, parent_process$parent_cell, 
      cell_type, cell_type, seurat_tag_slot
    )
    train_obj <- check_res$adjusted_object
    seurat_tag_slot <- check_res$adjusted_tag_slot
  }
  
  #--- end of part of parent cell type
  
  #--- part of cell type
  # filter cells
  train_obj <- filter_cells(train_obj, seurat_tag_slot)
  
  # convert Seurat object to matrix
  mat = Seurat::GetAssayData(object = train_obj, 
                             assay = seurat_assay, slot = seurat_slot)
  
  # feature selection
  mat <- select_features(mat, features)
  
  # transpose mat
  mat <- t(as.matrix(mat))
  
  # transform mat to zscore values
  if (zscore == TRUE)
    mat <- transform_to_zscore(mat)
  
  # construct cell tag to yes/no values
  train_tag <- construct_tag_vect(train_obj, cell_type, seurat_tag_slot)
  
  # if cell type not found in tag
  if (all(train_tag != "yes")) {
    stop("Cell type not available in train data. Please verify cell type.", 
         call. = FALSE)
  }

  # transform list to factor
  train_tag <- factor(train_tag, levels = c('yes', 'no'))
  
  # convert hyphen (-) by underscore (_)
  colnames(mat) <- gsub('-', '_', colnames(mat))
  
  # train
  clf <- train_func(mat, train_tag)
  
  # remove this info to reduce memory
  clf$resampledCM <- NULL 
  p_thres <- 0.5
  
  features <- labels(clf$terms)
  features <- gsub('_', '-', features) # convert back underscore to hyphen
  object <- scClassifR(cell_type, clf, features, p_thres, 
                             NA_character_)
  
  # only assign parent if pretrained model for parent cell type is avai
  parent_check <- 
    (
      !is.null(parent_process$parent.clf) && 
        tolower(cell_type(parent_process$parent.clf)) == 
        tolower(parent_process$parent_cell)
    ) || (
      tolower(parent_process$parent_cell) %in% 
        tolower(names(parent_process$model_list))
    )
  if (parent_check) parent(object) <- parent_process$parent_cell
  
  return(object)
})

#' @inherit train_classifier
#' 
#' @param sce_tag_slot string, name of annotation slot indicating 
#' cell tag/label in the training object.
#' For \code{\link{SingleCellExperiment}} object, default value is "ident".  
#' Expected values are string or binary/logical, 
#' 0/"no"/F/FALSE: not being new cell type, 1/"yes"/T/TRUE: being new cell type.
#' @param sce_parent_tag_slot string, name of a slot in cell meta data 
#' indicating pre-assigned/predicted cell type. 
#' Default field is "predicted_cell_type".
#' This field would have been filled automatically 
#' when user called classify_cells function. 
#' The slot must contain only string values. 
#' @param sce_assay name of assay to use in training object. 
#' Default to 'logcounts' assay.
#' 
#' @import SingleCellExperiment
#' @importFrom SummarizedExperiment assay
#' 
#' @rdname train_classifier
setMethod("train_classifier", c("train_obj" = "SingleCellExperiment"), 
          function(train_obj, cell_type, features, parent_cell = NA_character_,
                   parent_clf = NULL, path_to_models = c("default", "."), 
                   zscore = TRUE, sce_tag_slot = "ident", 
                   sce_parent_tag_slot = "predicted_cell_type", 
                   sce_assay = 'logcounts', ...) {
  # solve duplication of cell names
  colnames(train_obj) <- make.unique(colnames(train_obj), sep = '_')
            
  #--- part of parent cell type
  parent_process <- process_parent_clf(
    train_obj, sce_parent_tag_slot, parent_cell, parent_clf, 
    path_to_models, zscore, sce_assay
  )
  
  # check parent-child coherence
  if (!is.null(parent_process$pos_parent)) {
    check_res <- check_parent_child_coherence(
      train_obj, parent_process$pos_parent, parent_process$parent_cell,
      cell_type,cell_type,sce_tag_slot
    )
    train_obj <- check_res$adjusted_object
    sce_tag_slot <- check_res$adjusted_tag_slot
  }
  
  #--- end of part of parent cell type
  
  #--- part of cell type
  # filter cells
  train_obj <- filter_cells(train_obj, sce_tag_slot)
  
  # convert Seurat object to matrix
  mat = SummarizedExperiment::assay(train_obj, sce_assay)
  
  # feature selection
  mat <- select_features(mat, features)
  
  # transpose mat
  mat <- t(as.matrix(mat))
  
  # transform mat to zscore values
  if (zscore == TRUE)
    mat <- transform_to_zscore(mat)
  
  # construct cell tag to yes/no values
  train_tag <- construct_tag_vect(train_obj, cell_type, sce_tag_slot)
  
  # if cell type not found in tag
  if (all(train_tag != "yes")) {
    stop("Cell type not available in train data. Please verify cell type.", 
         call. = FALSE)
  }
  
  # transform list to factor
  train_tag <- factor(train_tag, levels = c('yes', 'no'))
  
  # convert hyphen (-) by underscore (_)
  colnames(mat) <- gsub('-', '_', colnames(mat))
  
  # train
  clf <- train_func(mat, train_tag)
  
  # remove this info to reduce memory
  clf$resampledCM <- NULL 
  p_thres <- 0.5
  
  features <- labels(clf$terms)
  features <- gsub('_', '-', features) # convert back underscore to hyphen
  object <- scClassifR(cell_type, clf, features, p_thres, 
                             NA_character_)
  
  # only assign parent if pretrained model for parent cell type is avai
  parent_check <-
    (
      !is.null(parent_process$parent.clf) && 
        tolower(cell_type(parent_process$parent.clf)) == 
        tolower(parent_process$parent_cell)
    ) || (
      tolower(parent_process$parent_cell) %in% 
        tolower(names(parent_process$model_list))
    )
  if (parent_check) parent(object) <- parent_process$parent_cell
  
  return(object)
})

#' Testing process.
#' 
#' @description Testing process. 
#' 
#' @param test_obj xxobject that can be used for testing
#' @param classifier classification model
#' @param target_cell_type vector indicating other cell types than cell labels 
#' that can be considered as the main cell type in classifier, 
#' for example, c("plasma cell", "b cell", "b cells", "activating b cell"). 
#' Default as NULL.
#' @param parent_clf \code{\link{scClassifR}} object
#' corresponding to classification model for the parent cell type
#' @param path_to_models path to the folder containing the list of models. 
#' As default, the pretrained models in the package will be used. 
#' If user has trained new models, indicate the folder containing 
#' the new_models.rda file.
#' @param zscore boolean, whether gene expression is transformed to zscore
#' @param ... arguments passed to other methods
#'
#' @return result of testing process in form of a list, 
#' including predicted values, prediction accuracy at a probability threshold, 
#' and roc curve information.
#' 
#' @note Only one cell type is expected for each cell. 
#' Ambiguous cell type, such as: "T cells/NK cells/ILC", will be ignored.
#' Subtypes used in testing model for parent cell types can be indicated 
#' as parent cell type, or can be indicated in target_cell_type. 
#' For example, when testing for B cells, plasma cells can be annotated as 
#' B cells, or target_cell_type is set c("plasma cells").
#' 
#' @export
setGeneric("test_classifier", function(test_obj, classifier, 
                                       target_cell_type = NULL, 
                                       parent_clf = NULL, 
                                       path_to_models = c("default", "."), 
                                       zscore = TRUE, ...) 
  standardGeneric("test_classifier"))

#' @inherit test_classifier
#' 
#' @param seurat_tag_slot string, name of annotation slot 
#' indicating cell tag/label in the testing object.
#' Strings indicating cell types are expected in this slot. 
#' For \code{\link{Seurat}} object, default value is "active.ident". 
#' Expected values is string or binary/logical, 
#' 0/"no"/F/FALSE: not being new cell type, 
#' 1/"yes"/T/TRUE: being new cell type.
#' @param seurat_parent_tag_slot string, name of tag slot in cell meta data
#' indicating pre-assigned/predicted parent cell type. 
#' Default field is "predicted_cell_type".
#' The slot must contain only string values. 
#' @param seurat_assay name of assay to use in 
#' \code{\link{Seurat}} object, defaults to 'RNA' assay.
#' @param seurat_slot type of expression data to use in 
#' \code{\link{Seurat}} object. 
#' Some available types are: "counts", "data" and "scale.data". 
#' Default to "counts", which contains unnormalized data.
#'  
#' @examples
#' # load small example dataset
#' data("tirosh_mel80_example")
#' 
#' # train the classifier
#' selected_features_B = c("CD19", "MS4A1", "CD79A")
#' set.seed(123)
#' clf_b <- train_classifier(train_obj = tirosh_mel80_example, 
#' features = selected_features_B, cell_type = "B cells")
#' 
#' # test the classifier, target cell type can be in other formats or
#' # alternative cell type that can be considered as the classified cell type 
#' clf_b_test <- test_classifier(test_obj = tirosh_mel80_example, 
#' classifier = clf_b, target_cell_type = c("B cell"))
#' clf_b_test
#' 
#' @importFrom Seurat GetAssayData
#'
#' @rdname test_classifier
setMethod("test_classifier", c("test_obj" = "Seurat", 
                               "classifier" = "scClassifR"), 
          function(test_obj, classifier, target_cell_type = NULL, 
                   parent_clf = NULL, path_to_models = c("default", "."), 
                   zscore = TRUE, seurat_tag_slot = "active.ident", 
                   seurat_parent_tag_slot = "predicted_cell_type", 
                   seurat_assay = 'RNA', seurat_slot = 'counts', ...) {
  . <- fpr <- tpr <- NULL
  
  # target_cell_type check
  if (!tolower(cell_type(classifier)) %in% tolower(target_cell_type)) {
    target_cell_type <- append(target_cell_type, cell_type(classifier))
  }
  
  #--- parent cell type
  # process parent clf
  parent_process <- process_parent_clf(
    test_obj, seurat_parent_tag_slot, parent(classifier), parent_clf,
    path_to_models, zscore, seurat_assay, seurat_slot
  )
  
  # check parent-child coherence
  if (!is.null(parent_process$pos_parent)) {
    check_res <- check_parent_child_coherence(
      test_obj, parent_process$pos_parent, parent(classifier), 
      cell_type(classifier), target_cell_type, seurat_tag_slot
    )
    test_obj <- check_res$adjusted_object
    seurat_tag_slot <- check_res$adjusted_tag_slot
  }
  
  #--- children cell type 
  # filter cells
  test_obj <- filter_cells(test_obj, seurat_tag_slot)
  
  # convert Seurat object to matrix
  test_mat = Seurat::GetAssayData(
    object = test_obj, assay = seurat_assay, slot = seurat_slot)
  
  # perform features selection
  test_mat <- select_features(test_mat, features(classifier))
  
  # transpose mat
  test_mat <- t(as.matrix(test_mat))
  
  # transform mat to zscore values
  if (zscore == TRUE) test_mat <- transform_to_zscore(test_mat)
  
  # construct cell tag to yes/no values
  test_tag <- construct_tag_vect(test_obj, target_cell_type, seurat_tag_slot)
  
  # if cell type not found in tag
  if (all(test_tag != "yes")) {
    stop("Cell type ", cell_type(classifier), 
    " is not available in the test data. 
    Overwrite the target cell type using the target_cell_type parameter 
    or verify that you chose the correct test dataset.", 
         call. = FALSE)
  }
  
  return_val = test_performance(test_mat, classifier, test_tag)
  return(return_val)
})

#' @inherit test_classifier
#' 
#' @param sce_tag_slot string, name of annotation slot 
#' indicating cell tag/label in the testing object.
#' Strings indicating cell types are expected in this slot. 
#' Default value is "ident".  
#' Expected values are string or binary/logical, 
#' 0/"no"/F/FALSE: not being new cell type, 
#' 1/"yes"/T/TRUE: being new cell type.
#' @param sce_parent_tag_slot string, name of tag slot in cell meta data
#' indicating pre-assigned/predicted parent cell type. 
#' Default is "predicted_cell_type".
#' The slot must contain only string values. 
#' @param sce_assay name of assay to use in \code{\link{SingleCellExperiment}}
#' object, defaults to 'logcounts' assay.
#'  
#' @import SingleCellExperiment
#' @importFrom SummarizedExperiment assay
#' 
#' @rdname test_classifier
setMethod("test_classifier", c("test_obj" = "SingleCellExperiment", 
                               "classifier" = "scClassifR"), 
          function(test_obj, classifier, target_cell_type = NULL, 
                   parent_clf = NULL, path_to_models = c("default", "."), 
                   zscore = TRUE, sce_tag_slot = "ident", 
                   sce_parent_tag_slot = "predicted_cell_type", 
                   sce_assay = 'logcounts', ...) {
  # solve duplication of cell names
  colnames(test_obj) <- make.unique(colnames(test_obj), sep = '_')
  . <- fpr <- tpr <- NULL
  
  # target_cell_type check
  if (!tolower(cell_type(classifier)) %in% tolower(target_cell_type)) {
    target_cell_type <- append(target_cell_type, cell_type(classifier))
  }
  
  #--- parent cell type
  # process parent clf
  parent_process <- process_parent_clf(
    test_obj, sce_parent_tag_slot, parent(classifier), parent_clf, 
    path_to_models, zscore, sce_assay
  )
  
  # check parent-child coherence
  if (!is.null(parent_process$pos_parent)) {
    check_res <- check_parent_child_coherence(
      test_obj, parent_process$pos_parent, parent(classifier), 
      cell_type(classifier), target_cell_type, sce_tag_slot
    )
    test_obj <- check_res$adjusted_object
    sce_tag_slot <- check_res$adjusted_tag_slot
  }
  
  #--- children cell type 
  # filter cells
  test_obj <- filter_cells(test_obj, sce_tag_slot)
  
  # convert Seurat object to matrix
  test_mat = SummarizedExperiment::assay(test_obj, sce_assay)
  
  # perform features selection
  test_mat <- select_features(test_mat, features(classifier))
  
  # transpose mat
  test_mat <- t(as.matrix(test_mat))
  
  # transform mat to zscore values
  if (zscore == TRUE) test_mat <- transform_to_zscore(test_mat)
  
  # construct cell tag to yes/no values
  test_tag <- construct_tag_vect(test_obj, target_cell_type, sce_tag_slot)
  
  # if cell type not found in tag
  if (all(test_tag != "yes")) {
    stop("Cell type ", cell_type(classifier), " is not available in the test data. 
          Overwrite the target cell type using the target_cell_type parameter or verify that you chose the correct test dataset.", 
         call. = FALSE)
  }
  
  return_val <- test_performance(test_mat, classifier, test_tag)
  
  return(return_val)
})

#' Plot roc curve
#' 
#' @param test_result result of test_classifier function
#' 
#' @return ggplot2 roc curve
#' @examples
#' # load small example dataset
#' data("tirosh_mel80_example")
#' 
#' # train a classifier, for ex: B cell
#' selected_features_B = c("CD19", "MS4A1", "CD79A")
#' set.seed(123)
#' clf_b <- train_classifier(train_obj = tirosh_mel80_example, 
#' features = selected_features_B, cell_type = "B cells")
#' 
#' clf_b_test <- test_classifier(test_obj = tirosh_mel80_example, 
#' classifier = clf_b)
#' 
#' # run plot curve on the test result
#' roc_curve <- plot_roc_curve(test_result = clf_b_test)
#' @import ROCR
#' @import ggplot2
#' @export
plot_roc_curve <- function(test_result) {
  fpr <- tpr <- NULL
  
  data <- rbind(c(0, 0), test_result$overall_roc[, c(2:3)], c(1, 1))
  colnames(data) <- c('fpr', 'tpr')
  
  # plot ROC curve
  q <- ggplot2::ggplot(data=data.frame(data), aes(x=fpr, y=tpr)) 
  q <- q + ggplot2::geom_line() 
  q <- q + ggplot2::xlab("False Positive Rate (1-Specificity)") 
  q <- q + ggplot2::ylab("True Positive Rate (Sensitivity)") 
  q
  return(q)
}

#' Classify cells from multiple models
#' 
#' @param classify_obj the object containing cells to be classified
#' @param classifiers list of classification models. 
#' The model is obtained from train_classifier function or available in current 
#' working space. 
#' Users may test the model using test_classifier before using this function.
#' If classifiers contain classifiers for sub cell types, classifiers for
#' parent cell type must be indicated first in order to be applied before
#' children classifiers.
#' If classifiers is NULL, the method will use all classifiers in database.
#' @param cell_types list of cell types containing models to be used
#' for classification, only applicable if the models have been saved to package.
#' @param path_to_models path to the folder containing the list of models. 
#' As default value, the pretrained models in the package will be used. 
#' If user has trained new models, indicate the folder containing the 
#' new_models.rda file.
#' @param ignore_ambiguous_result return all ambiguous predictions
#' (multiple cell types) to empty
#' When this parameter turns to TRUE, 
#' most probably predicted cell types will be ignored.  
#' @param ... arguments passed to other methods
#' 
#' @return the input object with new slots in cells meta data
#' New slots are: predicted_cell_type, most_probable_cell_type,
#' slots in form of [cell_type]_p and [cell_type]_class 
#' 
#' @export
setGeneric("classify_cells", function(classify_obj, classifiers = NULL, 
                                      cell_types = "all", 
                                      path_to_models = c("default", "."), 
                                      ignore_ambiguous_result = FALSE, ...) 
  standardGeneric("classify_cells"))

#' @inherit classify_cells
#' 
#' @param seurat_assay name of assay to use in 
#' \code{\link{Seurat}} object, defaults to 'RNA' assay.
#' @param seurat_slot type of expression data to use in 
#' \code{\link{Seurat}} object. Some available types are: 
#' "counts", "data" and "scale.data". 
#' Default to "counts", which is unnormalized data.
#' 
#' @examples
#' # load small example dataset
#' data("tirosh_mel80_example")
#' 
#' # train one classifier for one cell type, for ex, B cell
#' # define genes to use to classify this cell type
#' selected_features_B = c("CD19", "MS4A1", "CD79A")
#' 
#' # train the classifier
#' set.seed(123)
#' clf_b <- train_classifier(train_obj = tirosh_mel80_example, 
#' features = selected_features_B, cell_type = "B cells")
#' 
#' # do the same thing with other cell types, for example, T cells
#' selected_features_T = c("CD4", "CD8A", "CD8B")
#' set.seed(123)
#' clf_t <- train_classifier(train_obj = tirosh_mel80_example, 
#' features = selected_features_T, cell_type = "T cells")
#' 
#' # create a list of classifiers
#' classifier_ls <- list(clf_b, clf_t)
#' 
#' # classify cells with list of classifiers
#' seurat.obj <- classify_cells(classify_obj = tirosh_mel80_example, 
#' classifiers = classifier_ls)
#' 
#' @importFrom Seurat GetAssayData
#' @import dplyr
#' @importFrom stats predict
#' 
#' @rdname classify_cells
setMethod("classify_cells", c("classify_obj" = "Seurat"), 
          function(classify_obj, classifiers = NULL, cell_types = "all", 
                   path_to_models = c("default", "."), 
                   ignore_ambiguous_result = FALSE, seurat_assay = 'RNA', 
                   seurat_slot = 'counts', ...) {
  mat = Seurat::GetAssayData(object = classify_obj, 
                             assay = seurat_assay, slot = seurat_slot)
  
  # create an empty cell type for all cells
  pred_cells <- c(rep("", ncol(mat))) 
  names(pred_cells) <- colnames(mat)
  
  if (is.null(classifiers)) { 
    model_list <- load_models(path_to_models)
    
    if (cell_types >= 1 && cell_types != "all") 
      classifiers = model_list[cell_types]
    else classifiers <- model_list
  }
  
  # run predictors
  for (classifier in classifiers) {
    if (!is.na(parent(classifier))) {
      applicable_mat <- verify_parent(mat, classifier, classify_obj[[]])
      if (is.null(applicable_mat)) next # no parent clf provided or no positive to parent clf
    } else applicable_mat <- mat

    filtered_mat <- select_features(applicable_mat, features(classifier))
    filtered_mat <- t(as.matrix(filtered_mat))
    filtered_mat <- transform_to_zscore(filtered_mat)
    
    prediction <- make_prediction(
      filtered_mat, classifier, pred_cells, ignore_ambiguous_result
    )
    pred <- prediction$pred
    pred_cells <- prediction$pred_cells
    
    # add prediction to meta data: each classifier/celltype has 2 col: p, class
    for (colname in colnames(pred)) {
      classify_obj[[colname]] <- pred[, colname, drop = FALSE]
    }
  }
  
  if (any(pred_cells != "")) {
    pred_cells <- gsub("/$", "", pred_cells)
    
    # ignore ambiguous results
    if (ignore_ambiguous_result == TRUE) {
      pred_cells <- unlist(
        lapply(pred_cells, function(x) 
          if (length(unlist(strsplit(x, split = '/'))) <= 1) {x} 
          else {'ambiguous'})
      )
    } 
    
    # add cell type to meta data
    classify_obj[['predicted_cell_type']] <- pred_cells
    
    # simplify result can only happen when not ignore ambiguous results
    if (ignore_ambiguous_result == FALSE) 
      classify_obj[['most_probable_cell_type']] <- simplify_prediction(
        classify_obj[[]], as.matrix(mat), classifiers
    )
  }
  
  return(classify_obj)
})

#' @inherit classify_cells
#' 
#' @param sce_assay name of assay to use in 
#' \code{\link{SingleCellExperiment}} object, 
#' defaults to 'logcounts' assay.
#' 
#' @import SingleCellExperiment
#' @importFrom SummarizedExperiment assay colData
#' 
#' @rdname classify_cells
setMethod("classify_cells", c("classify_obj" = "SingleCellExperiment"), 
          function(classify_obj, classifiers = NULL, cell_types = "all", 
                   path_to_models = c("default", "."), 
                   ignore_ambiguous_result = FALSE, 
                   sce_assay = 'logcounts', ...) {
  # solve duplication of cell names
  colnames(classify_obj) <- make.unique(colnames(classify_obj), sep = '_')
  
  mat = SummarizedExperiment::assay(classify_obj, sce_assay)
  
  # create an empty cell type for all cells
  pred_cells <- c(rep("", ncol(mat))) 
  names(pred_cells) <- colnames(mat)
  
  if (is.null(classifiers)) { 
    model_list <- load_models(path_to_models)
    
    if (cell_types>=1 && cell_types!="all") classifiers = model_list[cell_types]
    else classifiers <- model_list
  }
  
  # run predictors
  for (classifier in classifiers) {
    if (!is.na(parent(classifier))) { 
      applicable_mat <- verify_parent(mat, classifier, colData(classify_obj))
      if (is.null(applicable_mat)) next # no parent clf provided or no positive to parent clf
    } else applicable_mat <- mat

    filtered_mat <- select_features(applicable_mat, features(classifier))
    filtered_mat <- t(as.matrix(filtered_mat))
    filtered_mat <- transform_to_zscore(filtered_mat)
    
    prediction <- make_prediction(
      filtered_mat, classifier, pred_cells, ignore_ambiguous_result
    )
    
    pred <- prediction$pred
    pred_cells <- prediction$pred_cells
    # add prediction to meta data: 2 cols: p, class 
    for (colname in colnames(pred)) {
      classify_obj[[colname]] <- unlist(
        lapply(colnames(classify_obj), function(x)
          if (x %in% rownames(pred)) {pred[x, colname]}
          else {NA})
      )
    }
  }
  
  if (any(pred_cells != "")) {
    pred_cells <- gsub("/$", "", pred_cells)
    
    # double check if there is more than one predicted cell type
    if (ignore_ambiguous_result == TRUE) {
      pred_cells <- unlist(
        lapply(pred_cells, function(x) 
          if (length(unlist(strsplit(x, split = '/'))) <= 1) {x} 
          else {'ambiguous'})
        )
    } 
    
    # add cell type to meta data
    classify_obj$predicted_cell_type <- pred_cells
    
    # this will be ignored if ignore ambiguous result is on
    if (ignore_ambiguous_result == FALSE) 
      classify_obj$most_probable_cell_type <- simplify_prediction(
        as.matrix(SummarizedExperiment::colData(classify_obj)), 
        as.matrix(mat), classifiers
      )
  }
  return(classify_obj)
})