Bioconductor Code: scAnnotatR

Browse code

merge separate methods for Seurat and SCE signatures into one function

nttvy authored on 29/07/2021 22:23:02
Showing 21 changed files

NAMESPACE index 47d6e0b..76e75da 100644
R/class.R index 200b1a7..66ac3ef 100644
R/classifier.R index a7cd727..3ab5285 100644
R/support.R index 1256169..e1e7ec1 100644
R/tree.R index 582f865..4032e7a 100644
man/caret_model.Rd index c3381b0..3015848 100644
man/cell_type.Rd index ab8c62d..8df94c2 100644
man/classify_cells.Rd index b63b0f3..2661a1d 100644
man/delete_model.Rd index 467f90c..8965ce0 100644
man/internal.Rd index e113366..6d73158 100644
man/marker_genes.Rd index 8c5006d..8ef5e5f 100644
man/p_thres.Rd index 4b20fb3..e564659 100644
man/parent.Rd index 436b95f..dd3e668 100644
man/plot_roc_curve.Rd index 6277c55..7c06379 100644
man/save_new_model.Rd index 9afe56b..7240f4a 100644
man/scAnnotatR.Rd index 0a5ffb8..39a09c2 100644
man/show.Rd index ed0d69e..234a020 100644
man/test_classifier.Rd index 00d9d2a..2112a62 100644
man/train_classifier.Rd index ff4ba0a..a0fe3ff 100644
vignettes/training-basic-model.Rmd index adf6204..6b7db25 100644
vignettes/training-child-model.Rmd index e4f31e2..091d7e8 100644

History View file @ a212ecf

@@ -31,6 +31,7 @@ import(methods)
                      import(pROC)
                      import(tools)
                      importFrom(Seurat,GetAssayData)
                     +importFrom(Seurat,Idents)
                      importFrom(SummarizedExperiment,assay)
                      importFrom(SummarizedExperiment,colData)
                      importFrom(stats,predict)

R/class.R

History View file @ a212ecf

@@ -23,9 +23,9 @@ setOldClass("train")
                      #' # train a classifier, for ex: B cell
                      #' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
                      #' set.seed(123)
                     -#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     -#'                           marker_genes = selected_marker_genes_B,
                     -#'                           cell_type = "B cells")
                     +#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     +#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B,
                     +#' cell_type = "B cells", tag_slot = 'active.ident')
                      #'
                      #' classifier_b
                      #' @export
@@ -219,8 +219,9 @@ setValidity("scAnnotatR", checkObjectValidity)
                      #' data("tirosh_mel80_example")
                      #' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
                      #' set.seed(123)
                     -#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     -#' marker_genes = selected_marker_genes_B, cell_type = "B cells")
                     +#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     +#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B,
                     +#' cell_type = "B cells", tag_slot = 'active.ident')
                      #' classifier_b
                      #'
                      #' @export
@@ -251,8 +252,9 @@ setMethod("show", c("object" = "scAnnotatR"), function(object) {
                      #' data("tirosh_mel80_example")
                      #' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
                      #' set.seed(123)
                     -#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     -#' cell_type = "B cells", marker_genes = selected_marker_genes_B)
                     +#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     +#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B,
                     +#' cell_type = "B cells", tag_slot = 'active.ident')
                      #' cell_type(classifier_b)
                      #'
                      #' @export
@@ -273,8 +275,9 @@ cell_type <- function(classifier) {
                      #' data("tirosh_mel80_example")
                      #' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
                      #' set.seed(123)
                     -#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     -#' marker_genes = selected_marker_genes_B, cell_type = "B cells")
                     +#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     +#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B,
                     +#' cell_type = "B cells", tag_slot = 'active.ident')
                      #' caret_model(classifier_b)
                      #'
                      #' @export
@@ -294,8 +297,9 @@ caret_model <- function(classifier) {
                      #' data("tirosh_mel80_example")
                      #' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
                      #' set.seed(123)
                     -#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     -#' marker_genes = selected_marker_genes_B, cell_type = "B cells")
                     +#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     +#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B,
                     +#' cell_type = "B cells", tag_slot = 'active.ident')
                      #' marker_genes(classifier_b)
                      #'
                      #' @export
@@ -315,8 +319,9 @@ marker_genes <- function(classifier) {
                      #' data("tirosh_mel80_example")
                      #' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
                      #' set.seed(123)
                     -#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     -#' marker_genes = selected_marker_genes_B, cell_type = "B cells")
                     +#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     +#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B,
                     +#' cell_type = "B cells", tag_slot = 'active.ident')
                      #' p_thres(classifier_b)
                      #'
                      #' @export
@@ -337,8 +342,9 @@ p_thres <- function(classifier) {
                      #' data("tirosh_mel80_example")
                      #' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
                      #' set.seed(123)
                     -#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     -#' marker_genes = selected_marker_genes_B, cell_type = "B cells")
                     +#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     +#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B,
                     +#' cell_type = "B cells", tag_slot = 'active.ident')
                      #' parent(classifier_b)
                      #'
                      #' @export
@@ -367,8 +373,9 @@ setGeneric('cell_type<-', function(classifier, value)
                      #' data("tirosh_mel80_example")
                      #' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
                      #' set.seed(123)
                     -#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     -#' marker_genes = selected_marker_genes_B, cell_type = "B cells")
                     +#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     +#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B,
                     +#' cell_type = "B cells", tag_slot = 'active.ident')
                      #' cell_type(classifier_b) <- "B cell"
                      #' @rdname cell_type
                      setReplaceMethod('cell_type', c("classifier" = "scAnnotatR"),
@@ -402,10 +409,12 @@ setGeneric('p_thres<-', function(classifier, value)
                      #' data("tirosh_mel80_example")
                      #' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
                      #' set.seed(123)
                     -#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     -#' marker_genes = selected_marker_genes_B, cell_type = "B cells")
                     -#' classifier_b_test <- test_classifier(test_obj = tirosh_mel80_example,
                     -#' classifier = classifier_b)
                     +#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     +#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B,
                     +#' cell_type = "B cells", tag_slot = 'active.ident')
                     +#' classifier_b_test <- test_classifier(classifier = classifier_b,
                     +#' test_obj = tirosh_mel80_example, assay = 'RNA', slot = 'counts',
                     +#' tag_slot = 'active.ident')
                      #' # assign a new threhold probability for prediction
                      #' p_thres(classifier_b) <- 0.4
                      #' @rdname p_thres

R/classifier.R

History View file @ a212ecf

@@ -6,25 +6,39 @@
                      #'
                      #' @param train_obj object that can be used for training the new model.
                      #' \code{\link{Seurat}} object or \code{\link{SingleCellExperiment}} object
                     -#' is expected.
                     +#' is supported.
                      #' If the training model has parent, parent_tag_slot may have been indicated.
                      #' This field would have been filled out automatically
                      #' if user precedently run classify_cells function.
                      #' If no (predicted) cell type annotation provided,
                      #' the function can be run if 1- parent_cell or 2- parent_classifier is provided.
                     +#' @param assay name of assay to use in training object.
                     +#' @param slot type of expression data to use in training object, omitted if
                     +#' train_obj is \code{\link{SingleCellExperiment}} object.
                      #' @param cell_type string indicating the name of the subtype
                      #' This must exactly match cell tag/label if cell tag/label is a string.
                      #' @param marker_genes list of marker genes used for the new training model
                     +#' @param tag_slot string, name of slot in cell meta data
                     +#' indicating cell tag/label in the training object.
                     +#' Strings indicating cell types are expected in this slot.
                     +#' For \code{\link{Seurat}} object, default value is "active.ident".
                     +#' Expected values are string (A-Z, a-z, 0-9, no special character accepted)
                     +#' or binary/logical, 0/"no"/F/FALSE: not being new cell type,
                     +#' 1/"yes"/T/TRUE: being new cell type.
                      #' @param parent_cell string indicated the name of the parent cell type,
                      #' if parent cell type classifier has already been saved in model database.
                      #' Adjust path_to_models for exact database.
                     +#' @param parent_tag_slot string, name of a slot in cell meta data
                     +#' indicating assigned/predicted cell type. Default is "predicted_cell_type".
                     +#' This slot would have been filled automatically
                     +#' if user have called classify_cells function.
                     +#' The slot must contain only string values.
                      #' @param parent_classifier classification model for the parent cell type
                      #' @param path_to_models path to the folder containing the model database.
                      #' As default, the pretrained models in the package will be used.
                      #' If user has trained new models, indicate the folder containing the
                      #' new_models.rda file.
                      #' @param zscore whether gene expression in train_obj is transformed to zscore
                     -#' @param ... arguments passed to other methods
                      #'
                      #' @return \code{\link{scAnnotatR}} object
                      #'
@@ -35,34 +49,6 @@
                      #' as parent cell type. For example, when training for B cells,
                      #' plasma cells must be annotated as B cells in order to be used.
                      #'
                     -#' @export
                     -setGeneric("train_classifier",
                     -           function(train_obj, cell_type, marker_genes,
                     -                    parent_cell = NA_character_,
                     -                    parent_classifier = NULL, path_to_models = "default",
                     -                    zscore = TRUE, ...)
                     -             standardGeneric("train_classifier"))
+                    -
                     -#' @inherit train_classifier
                     -#'
                     -#' @param seurat_tag_slot string, name of slot in cell meta data
                     -#' indicating cell tag/label in the training object.
                     -#' Strings indicating cell types are expected in this slot.
                     -#' For \code{\link{Seurat}} object, default value is "active.ident".
                     -#' Expected values are string (A-Z, a-z, 0-9, no special character accepted)
                     -#' or binary/logical, 0/"no"/F/FALSE: not being new cell type,
                     -#' 1/"yes"/T/TRUE: being new cell type.
                     -#' @param seurat_parent_tag_slot string, name of a slot in cell meta data
                     -#' indicating assigned/predicted cell type. Default is "predicted_cell_type".
                     -#' This slot would have been filled automatically
                     -#' if user have called classify_cells function.
                     -#' The slot must contain only string values.
                     -#' @param seurat_assay name of assay to use in training object.
                     -#' Default to 'RNA' assay.
                     -#' @param seurat_slot type of expression data to use in training object.
                     -#' For \code{\link{Seurat}} object, available types are: "counts", "data"
                     -#' and "scale.data". Default to "counts", which contains unnormalized data.
                     -#'
                      #' @examples
                      #' # load small example dataset
                      #' data("tirosh_mel80_example")
@@ -76,8 +62,9 @@ setGeneric("train_classifier",
                      #' # train the classifier, the "cell_type" argument must match
                      #' # the cell labels in the data, except upper/lower case
                      #' set.seed(123)
                     -#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     -#' marker_genes = selected_marker_genes_B, cell_type = "b cells")
                     +#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     +#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B,
                     +#' cell_type = "b cells", tag_slot = 'active.ident')
                      #'
                      #' # classify cell types using B cell classifier,
                      #' # a test classifier process may be used before applying the classifier
@@ -96,18 +83,78 @@ setGeneric("train_classifier",
                      #' # for the training process.
                      #' set.seed(123)
                      #' plasma_classifier <- train_classifier(train_obj = tirosh_mel80_example,
                     -#' cell_type = "Plasma cell", marker_genes = p_marker_genes,
                     -#' parent_classifier = classifier_b, seurat_tag_slot = 'plasma_cell_tag')
                     +#' assay = 'RNA', slot = 'counts', cell_type = 'Plasma cell',
                     +#' marker_genes = p_marker_genes, tag_slot = 'plasma_cell_tag',
                     +#' parent_classifier = classifier_b)
                      #'
                     -#' @importFrom Seurat GetAssayData
                     +#' @export
                     +train_classifier <- function(train_obj, assay, slot = NULL,
                     +                             cell_type, marker_genes, tag_slot,
                     +                             parent_cell = NA_character_,
                     +                             parent_tag_slot = 'predicted_cell_type',
                     +                             parent_classifier = NULL, path_to_models = "default",
                     +                             zscore = TRUE) {
                     +  if (is(train_obj, 'Seurat')) {
                     +    object <-
                     +      train_classifier_seurat(train_obj, cell_type, marker_genes,
                     +                              parent_cell, parent_classifier, path_to_models,
                     +                              zscore, tag_slot, parent_tag_slot, assay, slot)
                     +  } else if (is(train_obj, 'SingleCellExperiment')) {
                     +    object <-
                     +      train_classifier_sce(train_obj, cell_type, marker_genes,
                     +                              parent_cell, parent_classifier, path_to_models,
                     +                              zscore, tag_slot, parent_tag_slot, assay)
                     +  } else {
                     +    stop('Training object of not supported class', call. = FALSE)
                     +  }
+                    +
                     +  return(object)
                     +}
+                    +
                     +#' Train cell type classifier, when train_obj is Seurat object
                      #'
                     -#' @rdname train_classifier
                     -setMethod("train_classifier", c("train_obj" = "Seurat"),
                     -          function(train_obj, cell_type, marker_genes, parent_cell = NA_character_,
                     -                   parent_classifier = NULL, path_to_models = "default",
                     -                   zscore = TRUE, seurat_tag_slot = "active.ident",
                     -                   seurat_parent_tag_slot = "predicted_cell_type",
                     -                   seurat_assay = 'RNA', seurat_slot = 'counts', ...) {
                     +#' @description Train a classifier for a new cell type
                     +#' If cell type has a parent, only available for \code{\link{scAnnotatR}}
                     +#' object as parent cell classifying model.
                     +#'
                     +#' @param train_obj Seurat object
                     +#' @param seurat_assay name of assay to use in training object.
                     +#' @param seurat_slot type of expression data to use in training object
                     +#' @param cell_type string indicating the name of the subtype
                     +#' This must exactly match cell tag/label if cell tag/label is a string.
                     +#' @param marker_genes list of marker genes used for the new training model
                     +#' @param seurat_tag_slot string, name of slot in cell meta data
                     +#' indicating cell tag/label in the training object.
                     +#' Strings indicating cell types are expected in this slot.
                     +#' For \code{\link{Seurat}} object, default value is "active.ident".
                     +#' Expected values are string (A-Z, a-z, 0-9, no special character accepted)
                     +#' or binary/logical, 0/"no"/F/FALSE: not being new cell type,
                     +#' 1/"yes"/T/TRUE: being new cell type.
                     +#' @param parent_cell string indicated the name of the parent cell type,
                     +#' if parent cell type classifier has already been saved in model database.
                     +#' Adjust path_to_models for exact database.
                     +#' @param seurat_parent_tag_slot string, name of a slot in cell meta data
                     +#' indicating assigned/predicted cell type. Default is "predicted_cell_type".
                     +#' This slot would have been filled automatically
                     +#' if user have called classify_cells function.
                     +#' The slot must contain only string values.
                     +#' @param parent_classifier classification model for the parent cell type
                     +#' @param path_to_models path to the folder containing the model database.
                     +#' As default, the pretrained models in the package will be used.
                     +#' If user has trained new models, indicate the folder containing the
                     +#' new_models.rda file.
                     +#' @param zscore whether gene expression in train_obj is transformed to zscore
                     +#'
                     +#' @return \code{\link{scAnnotatR}} object
                     +#'
                     +#' @importFrom Seurat GetAssayData Idents
                     +#'
                     +#' @rdname internal
                     +train_classifier_seurat <-
                     +  function(train_obj, cell_type, marker_genes, parent_cell = NA_character_,
                     +           parent_classifier = NULL, path_to_models = "default", zscore = TRUE,
                     +           seurat_tag_slot, seurat_parent_tag_slot = 'predicted_cell_type',
                     +           seurat_assay, seurat_slot) {
                        # convert Seurat object to matrix
                        mat = Seurat::GetAssayData(object = train_obj,
                                                   assay = seurat_assay, slot = seurat_slot)
@@ -126,43 +173,59 @@ setMethod("train_classifier", c("train_obj" = "Seurat"),
                          names(parent_tag) <- colnames(train_obj)
                        } else parent_tag <- NULL
                     -  object <- train_classifier_func(mat, tag, cell_type, marker_genes,
                     +  object <- train_classifier_from_mat(mat, tag, cell_type, marker_genes,
                                                        parent_tag, parent_cell, parent_classifier,
                                                        path_to_models, zscore)
                        return(object)
                     -})
                     +}
                     -#' @inherit train_classifier
                     +#' Train cell type classifier, when train_obj is SCE object
                      #'
                     -#' @param sce_tag_slot string, name of annotation slot indicating
                     -#' cell tag/label in the training object.
                     -#' For \code{\link{SingleCellExperiment}} object, default value is "ident".
                     +#' @description Train a classifier for a new cell type
                     +#' If cell type has a parent, only available for \code{\link{scAnnotatR}}
                     +#' object as parent cell classifying model.
                     +#'
                     +#' @param train_obj SCE object
                     +#' @param sce_assay name of assay to use in training object.
                     +#' @param cell_type string indicating the name of the subtype
                     +#' This must exactly match cell tag/label if cell tag/label is a string.
                     +#' @param marker_genes list of marker genes used for the new training model
                     +#' @param sce_tag_slot string, name of slot in cell meta data
                     +#' indicating cell tag/label in the training object.
                     +#' Strings indicating cell types are expected in this slot.
                     +#' For \code{\link{Seurat}} object, default value is "active.ident".
                      #' Expected values are string (A-Z, a-z, 0-9, no special character accepted)
                      #' or binary/logical, 0/"no"/F/FALSE: not being new cell type,
                      #' 1/"yes"/T/TRUE: being new cell type.
                     +#' @param parent_cell string indicated the name of the parent cell type,
                     +#' if parent cell type classifier has already been saved in model database.
                     +#' Adjust path_to_models for exact database.
                      #' @param sce_parent_tag_slot string, name of a slot in cell meta data
                     -#' indicating pre-assigned/predicted cell type.
                     -#' Default field is "predicted_cell_type".
                     -#' This field would have been filled automatically
                     -#' when user called classify_cells function.
                     +#' indicating assigned/predicted cell type. Default is "predicted_cell_type".
                     +#' This slot would have been filled automatically
                     +#' if user have called classify_cells function.
                      #' The slot must contain only string values.
                     -#' @param sce_assay name of assay to use in training object.
                     -#' Default to 'logcounts' assay.
                     +#' @param parent_classifier classification model for the parent cell type
                     +#' @param path_to_models path to the folder containing the model database.
                     +#' As default, the pretrained models in the package will be used.
                     +#' If user has trained new models, indicate the folder containing the
                     +#' new_models.rda file.
                     +#' @param zscore whether gene expression in train_obj is transformed to zscore
                      #'
                     +#' @return \code{\link{scAnnotatR}} object
                     +#'
                      #' @import SingleCellExperiment
                      #' @importFrom SummarizedExperiment assay
                      #'
                     -#' @rdname train_classifier
                     -setMethod("train_classifier", c("train_obj" = "SingleCellExperiment"),
                     -          function(train_obj, cell_type, marker_genes, parent_cell = NA_character_,
                     -                   parent_classifier = NULL, path_to_models = "default",
                     -                   zscore = TRUE, sce_tag_slot = "ident",
                     -                   sce_parent_tag_slot = "predicted_cell_type",
                     -                   sce_assay = 'logcounts', ...) {
                     +#' @rdname internal
                     +train_classifier_sce <-
                     +  function(train_obj, cell_type, marker_genes, parent_cell = NA_character_,
                     +           parent_classifier = NULL, path_to_models = "default", zscore = TRUE,
                     +           sce_tag_slot, sce_parent_tag_slot = "predicted_cell_type", sce_assay) {
                        # solve duplication of cell names
                        colnames(train_obj) <- make.unique(colnames(train_obj), sep = '_')
                     -  # convert Seurat object to matrix
                     +  # convert SCE object to matrix
                        mat = SummarizedExperiment::assay(train_obj, sce_assay)
                        tag = SummarizedExperiment::colData(train_obj)[, sce_tag_slot]
@@ -173,12 +236,12 @@ setMethod("train_classifier", c("train_obj" = "SingleCellExperiment"),
                          names(parent_tag) <- colnames(train_obj)
                        } else parent_tag <- NULL
                     -  object <- train_classifier_func(mat, tag, cell_type, marker_genes,
                     +  object <- train_classifier_from_mat(mat, tag, cell_type, marker_genes,
                                                        parent_tag, parent_cell, parent_classifier,
                                                        path_to_models, zscore)
                        return(object)
                     -})
                     +}
                      #' Train cell type from matrix
                      #'
@@ -205,7 +268,7 @@ setMethod("train_classifier", c("train_obj" = "SingleCellExperiment"),
                      #' @return caret trained model
                      #'
                      #' @rdname internal
                     -train_classifier_func <- function(mat, tag, cell_type, marker_genes,
                     +train_classifier_from_mat <- function(mat, tag, cell_type, marker_genes,
                                                        parent_tag, parent_cell, parent_classifier,
                                                        path_to_models, zscore) {
                        #--- part of parent cell type
@@ -283,21 +346,34 @@ train_classifier_func <- function(mat, tag, cell_type, marker_genes,
                      #'
                      #' @description Testing process.
                      #'
                     -#' @param test_obj xxobject that can be used for testing
                     -#' @param classifier classification model
                     +#' @param test_obj object that can be used for testing
                     +#' @param assay name of assay to use in test_object
                     +#' @param slot type of expression data to use in test_object.
                     +#' For Seurat object, some available types are: "counts", "data" and "scale.data".
                     +#' Ignore this if test_obj is \code{\link{SingleCellExperiment}} object.
                     +#' @param classifier scAnnotatR classification model
                     +#' @param tag_slot string, name of annotation slot
                     +#' indicating cell tag/label in the testing object.
                     +#' Strings indicating cell types are expected in this slot.
                     +#' Expected values are string (A-Z, a-z, 0-9, no special character accepted)
                     +#' or binary/logical, 0/"no"/F/FALSE: not being new cell type,
                     +#' 1/"yes"/T/TRUE: being new cell type.
                      #' @param target_cell_type vector indicating other cell types than cell labels
                      #' that can be considered as the main cell type in classifier,
                      #' for example, c("plasma cell", "b cell", "b cells", "activating b cell").
                      #' Default as NULL.
                      #' @param parent_classifier \code{\link{scAnnotatR}} object
                      #' corresponding to classification model for the parent cell type
                     +#' @param parent_tag_slot string, name of tag slot in cell meta data
                     +#' indicating pre-assigned/predicted parent cell type.
                     +#' Default field is "predicted_cell_type".
                     +#' The slot must contain only string values.
                      #' @param path_to_models path to the folder containing the list of models.
                      #' As default, the pretrained models in the package will be used.
                      #' If user has trained new models, indicate the folder containing
                      #' the new_models.rda file.
                      #' @param zscore boolean, whether gene expression is transformed to zscore
                     -#' @param ... arguments passed to other methods
                     -#'
                     +#'
                      #' @return result of testing process in form of a list,
                      #' including predicted values, prediction accuracy at a probability threshold,
                      #' and roc curve information.
@@ -309,60 +385,99 @@ train_classifier_func <- function(mat, tag, cell_type, marker_genes,
                      #' For example, when testing for B cells, plasma cells can be annotated as
                      #' B cells, or target_cell_type is set c("plasma cells").
                      #'
                     +#' @examples
                     +#' # load small example dataset
                     +#' data("tirosh_mel80_example")
                     +#'
                     +#' # train the classifier
                     +#' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
                     +#' set.seed(123)
                     +#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     +#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B,
                     +#' cell_type = "b cells", tag_slot = 'active.ident')
                     +#'
                     +#' # test the classifier, target cell type can be in other formats or
                     +#' # alternative cell type that can be considered as the classified cell type
                     +#' classifier_b_test <- test_classifier(classifier = classifier_b,
                     +#' test_obj = tirosh_mel80_example, assay = 'RNA', slot = 'counts',
                     +#' tag_slot = 'active.ident', target_cell_type = c("B cell"))
                     +#' classifier_b_test
                     +#'
                      #' @export
                     -setGeneric("test_classifier", function(test_obj, classifier,
                     -                                       target_cell_type = NULL,
                     -                                       parent_classifier = NULL,
                     -                                       path_to_models = "default",
                     -                                       zscore = TRUE, ...)
                     +setGeneric("test_classifier",
                     +           function(classifier, test_obj, assay, slot = NULL, tag_slot,
                     +                    target_cell_type = NULL, parent_classifier = NULL,
                     +                    parent_tag_slot = 'predicted_cell_type',
                     +                    path_to_models = "default", zscore = TRUE)
                        standardGeneric("test_classifier"))
                      #' @inherit test_classifier
                      #'
                     +#' @rdname test_classifier
                     +setMethod('test_classifier', c('classifier' = 'scAnnotatR'),
                     +          function(classifier, test_obj, assay, slot = NULL, tag_slot,
                     +                   target_cell_type = NULL, parent_classifier = NULL,
                     +                   parent_tag_slot = 'predicted_cell_type',
                     +                   path_to_models = "default", zscore = TRUE) {
                     +  if (is(test_obj, 'Seurat')) {
                     +    return_val <-
                     +      test_classifier_seurat(test_obj, classifier, target_cell_type,
                     +                             parent_classifier, path_to_models, zscore,
                     +                             tag_slot, parent_tag_slot, assay, slot)
                     +  } else if (is(test_obj, 'SingleCellExperiment')) {
                     +    return_val <-
                     +      test_classifier_sce(test_obj, classifier, target_cell_type,
                     +                          parent_classifier, path_to_models, zscore,
                     +                          tag_slot, parent_tag_slot, assay)
                     +  } else {
                     +    stop('Testing object of not supported class', call. = FALSE)
                     +  }
                     +  return(return_val)
                     +})
+                    +
                     +#' Testing process for Seurat object
                     +#'
                     +#' @description Testing process when test object is of type Seurat
                     +#'
                     +#' @param test_obj Seurat object used for testing
                     +#' @param seurat_assay name of assay to use in test_object
                     +#' @param seurat_slot type of expression data to use in test_object.
                     +#' For Seurat object, some available types are: "counts", "data" and "scale.data".
                     +#' @param classifier scAnnotatR classification model
                      #' @param seurat_tag_slot string, name of annotation slot
                      #' indicating cell tag/label in the testing object.
                      #' Strings indicating cell types are expected in this slot.
                     -#' For \code{\link{Seurat}} object, default value is "active.ident".
                      #' Expected values are string (A-Z, a-z, 0-9, no special character accepted)
                      #' or binary/logical, 0/"no"/F/FALSE: not being new cell type,
                      #' 1/"yes"/T/TRUE: being new cell type.
                     +#' @param target_cell_type vector indicating other cell types than cell labels
                     +#' that can be considered as the main cell type in classifier,
                     +#' for example, c("plasma cell", "b cell", "b cells", "activating b cell").
                     +#' Default as NULL.
                     +#' @param parent_classifier \code{\link{scAnnotatR}} object
                     +#' corresponding to classification model for the parent cell type
                      #' @param seurat_parent_tag_slot string, name of tag slot in cell meta data
                      #' indicating pre-assigned/predicted parent cell type.
                      #' Default field is "predicted_cell_type".
                      #' The slot must contain only string values.
                     -#' @param seurat_assay name of assay to use in
                     -#' \code{\link{Seurat}} object, defaults to 'RNA' assay.
                     -#' @param seurat_slot type of expression data to use in
                     -#' \code{\link{Seurat}} object.
                     -#' Some available types are: "counts", "data" and "scale.data".
                     -#' Default to "counts", which contains unnormalized data.
                     -#'
                     -#' @examples
                     -#' # load small example dataset
                     -#' data("tirosh_mel80_example")
                     -#'
                     -#' # train the classifier
                     -#' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
                     -#' set.seed(123)
                     -#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     -#' marker_genes = selected_marker_genes_B, cell_type = "B cells")
                     +#' @param path_to_models path to the folder containing the list of models.
                     +#' As default, the pretrained models in the package will be used.
                     +#' If user has trained new models, indicate the folder containing
                     +#' the new_models.rda file.
                     +#' @param zscore boolean, whether gene expression is transformed to zscore
                      #'
                     -#' # test the classifier, target cell type can be in other formats or
                     -#' # alternative cell type that can be considered as the classified cell type
                     -#' classifier_b_test <- test_classifier(test_obj = tirosh_mel80_example,
                     -#' classifier = classifier_b, target_cell_type = c("B cell"))
                     -#' classifier_b_test
                     +#' @return result of testing process in form of a list,
                     +#' including predicted values, prediction accuracy at a probability threshold,
                     +#' and roc curve information.
                      #'
                      #' @importFrom Seurat GetAssayData
                      #'
                     -#' @rdname test_classifier
                     -setMethod("test_classifier", c("test_obj" = "Seurat",
                     -                               "classifier" = "scAnnotatR"),
                     -          function(test_obj, classifier, target_cell_type = NULL,
                     -                   parent_classifier = NULL, path_to_models = "default",
                     -                   zscore = TRUE, seurat_tag_slot = "active.ident",
                     -                   seurat_parent_tag_slot = "predicted_cell_type",
                     -                   seurat_assay = 'RNA', seurat_slot = 'counts', ...) {
                     +#' @rdname internal
                     +test_classifier_seurat <-
                     +  function(test_obj, classifier, target_cell_type = NULL,
                     +           parent_classifier = NULL, path_to_models = "default", zscore = TRUE,
                     +           seurat_tag_slot, seurat_parent_tag_slot = "predicted_cell_type",
                     +           seurat_assay, seurat_slot) {
                        . <- fpr <- tpr <- NULL
                        # convert Seurat object to matrix
                        mat = Seurat::GetAssayData(
@@ -382,39 +497,53 @@ setMethod("test_classifier", c("test_obj" = "Seurat",
                          names(parent_tag) <- colnames(test_obj)
                        } else parent_tag <- NULL
                     -  return_val <- test_classifier_func(mat, tag, classifier, parent_tag,
                     +  return_val <- test_classifier_from_mat(mat, tag, classifier, parent_tag,
                                                           target_cell_type, parent_classifier,
                                                           path_to_models, zscore)
                        return(return_val)
                     -})
                     +}
                     -#' @inherit test_classifier
                     +#' Testing process for SCE object
                     +#'
                     +#' @description Testing process when test object is of type SCE
                      #'
                     +#' @param test_obj SCE object used for testing
                     +#' @param sce_assay name of assay to use in test_object
                     +#' @param classifier scAnnotatR classification model
                      #' @param sce_tag_slot string, name of annotation slot
                      #' indicating cell tag/label in the testing object.
                      #' Strings indicating cell types are expected in this slot.
                     -#' Default value is "ident".
                      #' Expected values are string (A-Z, a-z, 0-9, no special character accepted)
                      #' or binary/logical, 0/"no"/F/FALSE: not being new cell type,
                      #' 1/"yes"/T/TRUE: being new cell type.
                     +#' @param target_cell_type vector indicating other cell types than cell labels
                     +#' that can be considered as the main cell type in classifier,
                     +#' for example, c("plasma cell", "b cell", "b cells", "activating b cell").
                     +#' Default as NULL.
                     +#' @param parent_classifier \code{\link{scAnnotatR}} object
                     +#' corresponding to classification model for the parent cell type
                      #' @param sce_parent_tag_slot string, name of tag slot in cell meta data
                      #' indicating pre-assigned/predicted parent cell type.
                     -#' Default is "predicted_cell_type".
                     +#' Default field is "predicted_cell_type".
                      #' The slot must contain only string values.
                     -#' @param sce_assay name of assay to use in \code{\link{SingleCellExperiment}}
                     -#' object, defaults to 'logcounts' assay.
                     -#'
                     +#' @param path_to_models path to the folder containing the list of models.
                     +#' As default, the pretrained models in the package will be used.
                     +#' If user has trained new models, indicate the folder containing
                     +#' the new_models.rda file.
                     +#' @param zscore boolean, whether gene expression is transformed to zscore
                     +#'
                     +#' @return result of testing process in form of a list,
                     +#' including predicted values, prediction accuracy at a probability threshold,
                     +#' and roc curve information.
                     +#'
                      #' @import SingleCellExperiment
                      #' @importFrom SummarizedExperiment assay
                      #'
                     -#' @rdname test_classifier
                     -setMethod("test_classifier", c("test_obj" = "SingleCellExperiment",
                     -                               "classifier" = "scAnnotatR"),
                     -          function(test_obj, classifier, target_cell_type = NULL,
                     -                   parent_classifier = NULL, path_to_models = "default",
                     -                   zscore = TRUE, sce_tag_slot = "ident",
                     -                   sce_parent_tag_slot = "predicted_cell_type",
                     -                   sce_assay = 'logcounts', ...) {
                     +#' @rdname internal
                     +test_classifier_sce <-
                     +  function(test_obj, classifier, target_cell_type = NULL,
                     +           parent_classifier = NULL, path_to_models = "default", zscore = TRUE,
                     +           sce_tag_slot, sce_parent_tag_slot = "predicted_cell_type", sce_assay) {
                        # solve duplication of cell names
                        colnames(test_obj) <- make.unique(colnames(test_obj), sep = '_')
                        . <- fpr <- tpr <- NULL
@@ -430,12 +559,12 @@ setMethod("test_classifier", c("test_obj" = "SingleCellExperiment",
                          names(parent_tag) <- colnames(test_obj)
                        } else parent_tag <- NULL
                     -  return_val <- test_classifier_func(mat, tag, classifier, parent_tag,
                     +  return_val <- test_classifier_from_mat(mat, tag, classifier, parent_tag,
                                                           target_cell_type, parent_classifier,
                                                           path_to_models, zscore)
                        return(return_val)
                     -})
                     +}
                      #' Run testing process from matrix and tag
                      #'
@@ -458,7 +587,7 @@ setMethod("test_classifier", c("test_obj" = "SingleCellExperiment",
                      #' @return model performance statistics
                      #'
                      #' @rdname internal
                     -test_classifier_func <- function(mat, tag, classifier, parent_tag,
                     +test_classifier_from_mat <- function(mat, tag, classifier, parent_tag,
                                                       target_cell_type, parent_classifier,
                                                       path_to_models, zscore) {
                        # target_cell_type check
@@ -522,11 +651,13 @@ test_classifier_func <- function(mat, tag, classifier, parent_tag,
                      #' # train a classifier, for ex: B cell
                      #' selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
                      #' set.seed(123)
                     -#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     -#' marker_genes = selected_marker_genes_B, cell_type = "B cells")
                     +#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     +#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B,
                     +#' cell_type = "b cells", tag_slot = 'active.ident')
                      #'
                     -#' classifier_b_test <- test_classifier(test_obj = tirosh_mel80_example,
                     -#' classifier = classifier_b)
                     +#' classifier_b_test <- test_classifier(classifier = classifier_b,
                     +#' test_obj = tirosh_mel80_example, assay = 'RNA', slot = 'counts',
                     +#' tag_slot = 'active.ident', target_cell_type = c("B cell"))
                      #'
                      #' # run plot curve on the test result
                      #' roc_curve <- plot_roc_curve(test_result = classifier_b_test)
@@ -608,14 +739,16 @@ setGeneric("classify_cells", function(classify_obj, classifiers = NULL,
                      #'
                      #' # train the classifier
                      #' set.seed(123)
                     -#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     -#' marker_genes = selected_marker_genes_B, cell_type = "B cells")
                     +#' classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     +#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B,
                     +#' cell_type = "b cells", tag_slot = 'active.ident')
                      #'
                      #' # do the same thing with other cell types, for example, T cells
                      #' selected_marker_genes_T = c("CD4", "CD8A", "CD8B")
                      #' set.seed(123)
                     -#' classifier_t <- train_classifier(train_obj = tirosh_mel80_example,
                     -#' marker_genes = selected_marker_genes_T, cell_type = "T cells")
                     +#' classifier_t <- train_classifier(train_obj = tirosh_mel80_example,
                     +#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_T,
                     +#' cell_type = "T cells", tag_slot = 'active.ident')
                      #'
                      #' # create a list of classifiers
                      #' classifier_ls <- list(classifier_b, classifier_t)

R/support.R

History View file @ a212ecf

@@ -168,17 +168,8 @@ select_marker_genes <- function(mat, marker_genes) {
                      #'
                      #' @return list of adjusted tag
                      #' @rdname internal
                     -setGeneric("check_parent_child_coherence",
                     -           function(mat, tag, pos_parent, parent_cell, cell_type,
                     -                    target_cell_type)
                     -             standardGeneric("check_parent_child_coherence"))
+                    -
                     -#' @inherit check_parent_child_coherence
                     -#'
                     -#' @rdname internal
                     -setMethod("check_parent_child_coherence", c("mat" = "dgCMatrix", 'tag' = 'vector'),
                     -          function(mat, tag, pos_parent, parent_cell, cell_type,
                     -                   target_cell_type) {
                     +check_parent_child_coherence <- function(mat, tag, pos_parent, parent_cell,
                     +                                         cell_type, target_cell_type) {
                        pos.val <- c(1, "yes", TRUE)
                        # prepare (sub) cell type tag
@@ -205,7 +196,7 @@ setMethod("check_parent_child_coherence", c("mat" = "dgCMatrix", 'tag' = 'vector
                        #SummarizedExperiment::colData(obj)[, tag_slot] <- new.tag_slot
                        return(new_tag)
                     -})
                     +}
                      #' Filter cells from ambiguous chars and non applicable cells
                      #' Ambiguous characters includes: "/", ",", "-", "+", ".", "and",
@@ -216,33 +207,26 @@ setMethod("check_parent_child_coherence", c("mat" = "dgCMatrix", 'tag' = 'vector
                      #'
                      #' @return filtered matrix and corresponding tag
                      #' @rdname internal
                     -setGeneric("filter_cells", function(mat, tag)
                     -  standardGeneric("filter_cells"))
+                    -
                     -#' @inherit filter_cells
                     -#'
                     -#' @rdname internal
                     -setMethod("filter_cells", c("mat" = "dgCMatrix", "tag" = "vector"),
                     -          function(mat, tag) {
                     -            # define characters usually included in ambiguous cell types
                     -            # this is to avoid considering ambiguous cell types as negative cell_type
                     -            ambiguous.chars <- c("/", ",", " -", " [+]", "[.]", " and ",
                     -                                 " or ", "_or_", "-or-", "[(]" ,"[)]", "ambiguous")
+                    -
                     -            # only eliminate cell labels containing cell_type and ambiguous.chars
                     -            ambiguous <- grepl(paste(ambiguous.chars, collapse="|"), tag)
                     -            n.applicable <- (grepl("not applicable", tag) | is.na(tag))
+                    -
                     -            if (any(ambiguous))
                     -              warning('Cell types containing "/", ",", "-", "+", ".", "and", "or", "(", ")", and "ambiguous" are considered as ambiguous. They are removed from training and testing.\n',
                     -                      call. = FALSE, immediate. = TRUE)
                     -            #obj <- obj[, !(ambiguous | n.applicable)]
                     -            mat <- mat[, !(ambiguous | n.applicable), drop = FALSE]
                     -            tag <- tag[!(ambiguous | n.applicable)]
+                    -
                     -            filtered <- list('mat' = mat, 'tag' = tag)
                     -            return(filtered)
                     -          })
                     +filter_cells <- function(mat, tag) {
                     +  # define characters usually included in ambiguous cell types
                     +  # this is to avoid considering ambiguous cell types as negative cell_type
                     +  ambiguous.chars <- c("/", ",", " -", " [+]", "[.]", " and ",
                     +                       " or ", "_or_", "-or-", "[(]" ,"[)]", "ambiguous")
+                    +
                     +  # only eliminate cell labels containing cell_type and ambiguous.chars
                     +  ambiguous <- grepl(paste(ambiguous.chars, collapse="|"), tag)
                     +  n.applicable <- (grepl("not applicable", tag) | is.na(tag))
+                    +
                     +  if (any(ambiguous))
                     +    warning('Cell types containing "/", ",", "-", "+", ".", "and", "or", "(", ")", and "ambiguous" are considered as ambiguous. They are removed from training and testing.\n',
                     +            call. = FALSE, immediate. = TRUE)
                     +  #obj <- obj[, !(ambiguous | n.applicable)]
                     +  mat <- mat[, !(ambiguous | n.applicable), drop = FALSE]
                     +  tag <- tag[!(ambiguous | n.applicable)]
+                    +
                     +  filtered <- list('mat' = mat, 'tag' = tag)
                     +  return(filtered)
                     +}
                      #' Construct tag vector
                      #'
@@ -254,25 +238,17 @@ setMethod("filter_cells", c("mat" = "dgCMatrix", "tag" = "vector"),
                      #' @return a binary vector for cell tag
                      #'
                      #' @rdname internal
                     -setGeneric("construct_tag_vect",
                     -           function(tag, cell_type)
                     -             standardGeneric("construct_tag_vect"))
+                    -
                     -#' @inherit construct_tag_vect
                     -#'
                     -#' @rdname internal
                     -setMethod("construct_tag_vect", c("tag" = "vector"),
                     -          function(tag, cell_type) {
                     -            pos.val <- c(1, "yes", TRUE)
+                    -
                     -            # x <- SummarizedExperiment::colData(obj)[, tag_slot]
                     -            test <- (tag %in% pos.val) | (tolower(tag) %in% tolower(cell_type))
                     -            new_tag <- ifelse(test, "yes", "no")
+                    -
                     -            named_tag = setNames(new_tag, names(tag))
+                    -
                     -            return(named_tag)
                     -          })
                     +construct_tag_vect <- function(tag, cell_type) {
                     +  pos.val <- c(1, "yes", TRUE)
+                    +
                     +  # x <- SummarizedExperiment::colData(obj)[, tag_slot]
                     +  test <- (tag %in% pos.val) | (tolower(tag) %in% tolower(cell_type))
                     +  new_tag <- ifelse(test, "yes", "no")
+                    +
                     +  named_tag = setNames(new_tag, names(tag))
+                    +
                     +  return(named_tag)
                     +}
                      #' Process parent classifier
                      #'
@@ -292,17 +268,8 @@ setMethod("construct_tag_vect", c("tag" = "vector"),
                      #' @import dplyr
                      #'
                      #' @rdname internal
                     -setGeneric("process_parent_classifier",
                     -           function(mat, parent_tag, parent_cell_type, parent_classifier,
                     -                    path_to_models, zscore = TRUE)
                     -             standardGeneric("process_parent_classifier"))
+                    -
                     -#' @inherit process_parent_classifier
                     -#'
                     -#' @rdname internal
                     -setMethod("process_parent_classifier", c("mat" = "dgCMatrix"),
                     -          function(mat, parent_tag, parent_cell_type, parent_classifier,
                     -                   path_to_models, zscore = TRUE) {
                     +process_parent_classifier <- function(mat, parent_tag, parent_cell_type,
                     +                                      parent_classifier, path_to_models, zscore) {
                          pos_parent <- parent.classifier <- . <- model_list <- NULL
                          if (is.na(parent_cell_type) && !is.null(parent_classifier))
@@ -368,7 +335,7 @@ setMethod("process_parent_classifier", c("mat" = "dgCMatrix"),
                          return_val <- list('pos_parent' = pos_parent, 'parent_cell'= parent_cell_type,
                                             'parent.classifier' = parent.classifier, 'model_list' = model_list)
                          return(return_val)
                     -})
                     +}
                      #' Make prediction
                      #'

R/tree.R

History View file @ a212ecf

@@ -22,8 +22,9 @@
                      #' # train classifier
                      #' selected_marker_genes_T = c("CD4", "CD8A", "CD8B")
                      #' set.seed(123)
                     -#' classifier_t <- train_classifier(train_obj = tirosh_mel80_example,
                     -#' marker_genes = selected_marker_genes_T, cell_type = "t cells")
                     +#' classifier_t <- train_classifier(train_obj = tirosh_mel80_example,
                     +#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_T,
                     +#' cell_type = "t cells", tag_slot = 'active.ident')
                      #'
                      #' # save the trained classifier to system
                      #' # test classifier can be used before this step
@@ -150,8 +151,9 @@ plant_tree <- function(path_to_models = "default") {
                      #' # train a classifier
                      #' set.seed(123)
                      #' selected_marker_genes_T = c("CD4", "CD8A", "CD8B")
                     -#' classifier_t <- train_classifier(train_obj = tirosh_mel80_example,
                     -#' marker_genes = selected_marker_genes_T, cell_type = "t cells")
                     +#' classifier_t <- train_classifier(train_obj = tirosh_mel80_example,
                     +#' assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_T,
                     +#' cell_type = "t cells", tag_slot = 'active.ident')
                      #'
                      #' # save a classifier to system
                      #' save_new_model(new_model = classifier_t, path_to_models = tempdir())

man/caret_model.Rd

History View file @ a212ecf

@@ -20,8 +20,9 @@ Returns the caret model of the \code{\link{scAnnotatR}} object
                      data("tirosh_mel80_example")
                      selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
                      set.seed(123)
                     -classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     -marker_genes = selected_marker_genes_B, cell_type = "B cells")
                     +classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     +assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B,
                     +cell_type = "B cells", tag_slot = 'active.ident')
                      caret_model(classifier_b)
+                     }

man/cell_type.Rd

History View file @ a212ecf

@@ -27,14 +27,16 @@ Returns the cell type for the given classifier.
                      data("tirosh_mel80_example")
                      selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
                      set.seed(123)
                     -classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     -cell_type = "B cells", marker_genes = selected_marker_genes_B)
                     +classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     +assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B,
                     +cell_type = "B cells", tag_slot = 'active.ident')
                      cell_type(classifier_b)
                      data("tirosh_mel80_example")
                      selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
                      set.seed(123)
                     -classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     -marker_genes = selected_marker_genes_B, cell_type = "B cells")
                     +classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     +assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B,
                     +cell_type = "B cells", tag_slot = 'active.ident')
                      cell_type(classifier_b) <- "B cell"
+                     }

man/classify_cells.Rd

History View file @ a212ecf

@@ -107,14 +107,16 @@ selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
                      # train the classifier
                      set.seed(123)
                     -classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     -marker_genes = selected_marker_genes_B, cell_type = "B cells")
                     +classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     +assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B,
                     +cell_type = "b cells", tag_slot = 'active.ident')
                      # do the same thing with other cell types, for example, T cells
                      selected_marker_genes_T = c("CD4", "CD8A", "CD8B")
                      set.seed(123)
                     -classifier_t <- train_classifier(train_obj = tirosh_mel80_example,
                     -marker_genes = selected_marker_genes_T, cell_type = "T cells")
                     +classifier_t <- train_classifier(train_obj = tirosh_mel80_example,
                     +assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_T,
                     +cell_type = "T cells", tag_slot = 'active.ident')
                      # create a list of classifiers
                      classifier_ls <- list(classifier_b, classifier_t)

man/delete_model.Rd

History View file @ a212ecf

@@ -27,8 +27,9 @@ data("tirosh_mel80_example")
                      # train a classifier
                      set.seed(123)
                      selected_marker_genes_T = c("CD4", "CD8A", "CD8B")
                     -classifier_t <- train_classifier(train_obj = tirosh_mel80_example,
                     -marker_genes = selected_marker_genes_T, cell_type = "t cells")
                     +classifier_t <- train_classifier(train_obj = tirosh_mel80_example,
                     +assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_T,
                     +cell_type = "t cells", tag_slot = 'active.ident')
                      # save a classifier to system
                      save_new_model(new_model = classifier_t, path_to_models = tempdir())

man/internal.Rd

History View file @ a212ecf

@@ -13,20 +13,20 @@
                      \alias{caret_model<-,scAnnotatR-method}
                      \alias{marker_genes<-}
                      \alias{marker_genes<-,scAnnotatR-method}
                     -\alias{train_classifier_func}
                     -\alias{test_classifier_func}
                     +\alias{train_classifier_seurat}
                     +\alias{train_classifier_sce}
                     +\alias{train_classifier_from_mat}
                     +\alias{test_classifier_seurat}
                     +\alias{test_classifier_sce}
                     +\alias{test_classifier_from_mat}
                      \alias{balance_dataset}
                      \alias{train_func}
                      \alias{transform_to_zscore}
                      \alias{select_marker_genes}
                      \alias{check_parent_child_coherence}
                     -\alias{check_parent_child_coherence,dgCMatrix,vector-method}
                      \alias{filter_cells}
                     -\alias{filter_cells,dgCMatrix,vector-method}
                      \alias{construct_tag_vect}
                     -\alias{construct_tag_vect,vector-method}
                      \alias{process_parent_classifier}
                     -\alias{process_parent_classifier,dgCMatrix-method}
                      \alias{make_prediction}
                      \alias{simplify_prediction}
                      \alias{verify_parent}
@@ -60,7 +60,34 @@ marker_genes(classifier) <- value
                      \S4method{marker_genes}{scAnnotatR}(classifier) <- value
                     -train_classifier_func(
                     +train_classifier_seurat(
                     +  train_obj,
                     +  cell_type,
                     +  marker_genes,
                     +  parent_cell = NA_character_,
                     +  parent_classifier = NULL,
                     +  path_to_models = "default",
                     +  zscore = TRUE,
                     +  seurat_tag_slot,
                     +  seurat_parent_tag_slot = "predicted_cell_type",
                     +  seurat_assay,
                     +  seurat_slot
                     +)
+                    +
                     +train_classifier_sce(
                     +  train_obj,
                     +  cell_type,
                     +  marker_genes,
                     +  parent_cell = NA_character_,
                     +  parent_classifier = NULL,
                     +  path_to_models = "default",
                     +  zscore = TRUE,
                     +  sce_tag_slot,
                     +  sce_parent_tag_slot = "predicted_cell_type",
                     +  sce_assay
                     +)
+                    +
                     +train_classifier_from_mat(
                        mat,
                        tag,
                        cell_type,
@@ -72,7 +99,32 @@ train_classifier_func(
                        zscore
+                     )
                     -test_classifier_func(
                     +test_classifier_seurat(
                     +  test_obj,
                     +  classifier,
                     +  target_cell_type = NULL,
                     +  parent_classifier = NULL,
                     +  path_to_models = "default",
                     +  zscore = TRUE,
                     +  seurat_tag_slot,
                     +  seurat_parent_tag_slot = "predicted_cell_type",
                     +  seurat_assay,
                     +  seurat_slot
                     +)
+                    +
                     +test_classifier_sce(
                     +  test_obj,
                     +  classifier,
                     +  target_cell_type = NULL,
                     +  parent_classifier = NULL,
                     +  path_to_models = "default",
                     +  zscore = TRUE,
                     +  sce_tag_slot,
                     +  sce_parent_tag_slot = "predicted_cell_type",
                     +  sce_assay
                     +)
+                    +
                     +test_classifier_from_mat(
                        mat,
                        tag,
                        classifier,
@@ -100,39 +152,17 @@ check_parent_child_coherence(
                        target_cell_type
+                     )
                     -\S4method{check_parent_child_coherence}{dgCMatrix,vector}(
                     -  mat,
                     -  tag,
                     -  pos_parent,
                     -  parent_cell,
                     -  cell_type,
                     -  target_cell_type
                     -)
+                    -
                      filter_cells(mat, tag)
                     -\S4method{filter_cells}{dgCMatrix,vector}(mat, tag)
+                    -
                      construct_tag_vect(tag, cell_type)
                     -\S4method{construct_tag_vect}{vector}(tag, cell_type)
+                    -
                      process_parent_classifier(
                        mat,
                        parent_tag,
                        parent_cell_type,
                        parent_classifier,
                        path_to_models,
                     -  zscore = TRUE
                     -)
+                    -
                     -\S4method{process_parent_classifier}{dgCMatrix}(
                     -  mat,
                     -  parent_tag,
                     -  parent_cell_type,
                     -  parent_classifier,
                     -  path_to_models,
                     -  zscore = TRUE
                     +  zscore
+                     )
                      make_prediction(mat, classifier, pred_cells, ignore_ambiguous_result = TRUE)
@@ -166,12 +196,7 @@ download_data_file(verbose = FALSE)
                      \item{value}{the new classifier}
                     -\item{mat}{expression matrix}
+                    -
                     -\item{tag}{tag of data}
+                    -
                     -\item{parent_tag}{vector, named list indicating pre-assigned/predicted
                     -parent cell type}
                     +\item{train_obj}{SCE object}
                      \item{parent_cell}{name of parent cell type}
@@ -183,6 +208,46 @@ to classification model for the parent cell type}
                      \item{zscore}{boolean indicating the transformation of gene expression
                      in object to zscore or not}
                     +\item{seurat_tag_slot}{string, name of annotation slot
                     +indicating cell tag/label in the testing object.
                     +Strings indicating cell types are expected in this slot.
                     +Expected values are string (A-Z, a-z, 0-9, no special character accepted)
                     +or binary/logical, 0/"no"/F/FALSE: not being new cell type,
                     +1/"yes"/T/TRUE: being new cell type.}
+                    +
                     +\item{seurat_parent_tag_slot}{string, name of tag slot in cell meta data
                     +indicating pre-assigned/predicted parent cell type.
                     +Default field is "predicted_cell_type".
                     +The slot must contain only string values.}
+                    +
                     +\item{seurat_assay}{name of assay to use in test_object}
+                    +
                     +\item{seurat_slot}{type of expression data to use in test_object.
                     +For Seurat object, some available types are: "counts", "data" and "scale.data".}
+                    +
                     +\item{sce_tag_slot}{string, name of annotation slot
                     +indicating cell tag/label in the testing object.
                     +Strings indicating cell types are expected in this slot.
                     +Expected values are string (A-Z, a-z, 0-9, no special character accepted)
                     +or binary/logical, 0/"no"/F/FALSE: not being new cell type,
                     +1/"yes"/T/TRUE: being new cell type.}
+                    +
                     +\item{sce_parent_tag_slot}{string, name of tag slot in cell meta data
                     +indicating pre-assigned/predicted parent cell type.
                     +Default field is "predicted_cell_type".
                     +The slot must contain only string values.}
+                    +
                     +\item{sce_assay}{name of assay to use in test_object}
+                    +
                     +\item{mat}{expression matrix}
+                    +
                     +\item{tag}{tag of data}
+                    +
                     +\item{parent_tag}{vector, named list indicating pre-assigned/predicted
                     +parent cell type}
+                    +
                     +\item{test_obj}{SCE object used for testing}
+                    +
                      \item{target_cell_type}{alternative cell types (in case of testing classifier)}
                      \item{pos_parent}{a vector indicating parent classifier prediction}
@@ -230,8 +295,20 @@ the classifier with the new marker genes
                      scAnnotatR object with the new marker genes.
                     +\code{\link{scAnnotatR}} object
+                    +
                     +\code{\link{scAnnotatR}} object
+                    +
                      caret trained model
                     +result of testing process in form of a list,
                     +including predicted values, prediction accuracy at a probability threshold,
                     +and roc curve information.
+                    +
                     +result of testing process in form of a list,
                     +including predicted values, prediction accuracy at a probability threshold,
                     +and roc curve information.
+                    +
                      model performance statistics
                      a list of balanced count matrix
@@ -266,10 +343,22 @@ path to the downloaded file in cache
                      \description{
                      Check if a scAnnotatR object is valid
                     +Train a classifier for a new cell type
                     +If cell type has a parent, only available for \code{\link{scAnnotatR}}
                     +object as parent cell classifying model.
+                    +
                     +Train a classifier for a new cell type
                     +If cell type has a parent, only available for \code{\link{scAnnotatR}}
                     +object as parent cell classifying model.
+                    +
                      Train a classifier for a new cell type from expression matrix
                      and tag
                      If cell type has a parent, only available for \code{\link{scAnnotatR}}
                      object as parent cell classifying model.
                     +Testing process when test object is of type Seurat
+                    +
                     +Testing process when test object is of type SCE
+                    +
                      Testing process from matrix and tag
+                     }

man/marker_genes.Rd

History View file @ a212ecf

@@ -19,8 +19,9 @@ Returns the set of marker genes for the given classifier.
                      data("tirosh_mel80_example")
                      selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
                      set.seed(123)
                     -classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     -marker_genes = selected_marker_genes_B, cell_type = "B cells")
                     +classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     +assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B,
                     +cell_type = "B cells", tag_slot = 'active.ident')
                      marker_genes(classifier_b)
+                     }

man/p_thres.Rd

History View file @ a212ecf

@@ -27,17 +27,20 @@ Returns the probability threshold for the given classifier.
                      data("tirosh_mel80_example")
                      selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
                      set.seed(123)
                     -classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     -marker_genes = selected_marker_genes_B, cell_type = "B cells")
                     +classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     +assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B,
                     +cell_type = "B cells", tag_slot = 'active.ident')
                      p_thres(classifier_b)
                      data("tirosh_mel80_example")
                      selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
                      set.seed(123)
                     -classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     -marker_genes = selected_marker_genes_B, cell_type = "B cells")
                     -classifier_b_test <- test_classifier(test_obj = tirosh_mel80_example,
                     -classifier = classifier_b)
                     +classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     +assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B,
                     +cell_type = "B cells", tag_slot = 'active.ident')
                     +classifier_b_test <- test_classifier(classifier = classifier_b,
                     +test_obj = tirosh_mel80_example, assay = 'RNA', slot = 'counts',
                     +tag_slot = 'active.ident')
                      # assign a new threhold probability for prediction
                      p_thres(classifier_b) <- 0.4
+                     }

man/parent.Rd

History View file @ a212ecf

@@ -19,8 +19,9 @@ Returns the parent of the cell type corresponding to the given classifier.
                      data("tirosh_mel80_example")
                      selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
                      set.seed(123)
                     -classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     -marker_genes = selected_marker_genes_B, cell_type = "B cells")
                     +classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     +assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B,
                     +cell_type = "B cells", tag_slot = 'active.ident')
                      parent(classifier_b)
+                     }

man/plot_roc_curve.Rd

History View file @ a212ecf

@@ -22,11 +22,13 @@ data("tirosh_mel80_example")
                      # train a classifier, for ex: B cell
                      selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
                      set.seed(123)
                     -classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     -marker_genes = selected_marker_genes_B, cell_type = "B cells")
                     +classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     +assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B,
                     +cell_type = "b cells", tag_slot = 'active.ident')
                     -classifier_b_test <- test_classifier(test_obj = tirosh_mel80_example,
                     -classifier = classifier_b)
                     +classifier_b_test <- test_classifier(classifier = classifier_b,
                     +test_obj = tirosh_mel80_example, assay = 'RNA', slot = 'counts',
                     +tag_slot = 'active.ident', target_cell_type = c("B cell"))
                      # run plot curve on the test result
                      roc_curve <- plot_roc_curve(test_result = classifier_b_test)

man/save_new_model.Rd

History View file @ a212ecf

@@ -33,8 +33,9 @@ data("tirosh_mel80_example")
                      # train classifier
                      selected_marker_genes_T = c("CD4", "CD8A", "CD8B")
                      set.seed(123)
                     -classifier_t <- train_classifier(train_obj = tirosh_mel80_example,
                     -marker_genes = selected_marker_genes_T, cell_type = "t cells")
                     +classifier_t <- train_classifier(train_obj = tirosh_mel80_example,
                     +assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_T,
                     +cell_type = "t cells", tag_slot = 'active.ident')
                      # save the trained classifier to system
                      # test classifier can be used before this step

man/scAnnotatR.Rd

History View file @ a212ecf

@@ -50,9 +50,9 @@ data("tirosh_mel80_example")
                      # train a classifier, for ex: B cell
                      selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
                      set.seed(123)
                     -classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     -                          marker_genes = selected_marker_genes_B,
                     -                          cell_type = "B cells")
                     +classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     +assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B,
                     +cell_type = "B cells", tag_slot = 'active.ident')
                      classifier_b
+                     }

man/show.Rd

History View file @ a212ecf

@@ -19,8 +19,9 @@ Show object
                      data("tirosh_mel80_example")
                      selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
                      set.seed(123)
                     -classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     -marker_genes = selected_marker_genes_B, cell_type = "B cells")
                     +classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     +assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B,
                     +cell_type = "B cells", tag_slot = 'active.ident')
                      classifier_b
+                     }

man/test_classifier.Rd

History View file @ a212ecf

@@ -2,51 +2,52 @@
                      % Please edit documentation in R/classifier.R
                      \name{test_classifier}
                      \alias{test_classifier}
                     -\alias{test_classifier,Seurat,scAnnotatR-method}
                     -\alias{test_classifier,SingleCellExperiment,scAnnotatR-method}
                     +\alias{test_classifier,scAnnotatR-method}
                      \title{Testing process.}
                      \usage{
                      test_classifier(
                     -  test_obj,
                        classifier,
                     -  target_cell_type = NULL,
                     -  parent_classifier = NULL,
                     -  path_to_models = "default",
                     -  zscore = TRUE,
                     -  ...
                     -)
+                    -
                     -\S4method{test_classifier}{Seurat,scAnnotatR}(
                        test_obj,
                     -  classifier,
                     +  assay,
                     +  slot = NULL,
                     +  tag_slot,
                        target_cell_type = NULL,
                        parent_classifier = NULL,
                     +  parent_tag_slot = "predicted_cell_type",
                        path_to_models = "default",
                     -  zscore = TRUE,
                     -  seurat_tag_slot = "active.ident",
                     -  seurat_parent_tag_slot = "predicted_cell_type",
                     -  seurat_assay = "RNA",
                     -  seurat_slot = "counts",
                     -  ...
                     +  zscore = TRUE
+                     )
                     -\S4method{test_classifier}{SingleCellExperiment,scAnnotatR}(
                     -  test_obj,
                     +\S4method{test_classifier}{scAnnotatR}(
                        classifier,
                     +  test_obj,
                     +  assay,
                     +  slot = NULL,
                     +  tag_slot,
                        target_cell_type = NULL,
                        parent_classifier = NULL,
                     +  parent_tag_slot = "predicted_cell_type",
                        path_to_models = "default",
                     -  zscore = TRUE,
                     -  sce_tag_slot = "ident",
                     -  sce_parent_tag_slot = "predicted_cell_type",
                     -  sce_assay = "logcounts",
                     -  ...
                     +  zscore = TRUE
+                     )
+                     }
                      \arguments{
                     -\item{test_obj}{xxobject that can be used for testing}
                     +\item{classifier}{scAnnotatR classification model}
+                    +
                     +\item{test_obj}{object that can be used for testing}
+                    +
                     +\item{assay}{name of assay to use in test_object}
                     -\item{classifier}{classification model}
                     +\item{slot}{type of expression data to use in test_object.
                     +For Seurat object, some available types are: "counts", "data" and "scale.data".
                     +Ignore this if test_obj is \code{\link{SingleCellExperiment}} object.}
+                    +
                     +\item{tag_slot}{string, name of annotation slot
                     +indicating cell tag/label in the testing object.
                     +Strings indicating cell types are expected in this slot.
                     +Expected values are string (A-Z, a-z, 0-9, no special character accepted)
                     +or binary/logical, 0/"no"/F/FALSE: not being new cell type,
                     +1/"yes"/T/TRUE: being new cell type.}
                      \item{target_cell_type}{vector indicating other cell types than cell labels
                      that can be considered as the main cell type in classifier,
@@ -56,51 +57,17 @@ Default as NULL.}
                      \item{parent_classifier}{\code{\link{scAnnotatR}} object
                      corresponding to classification model for the parent cell type}
                     +\item{parent_tag_slot}{string, name of tag slot in cell meta data
                     +indicating pre-assigned/predicted parent cell type.
                     +Default field is "predicted_cell_type".
                     +The slot must contain only string values.}
+                    +
                      \item{path_to_models}{path to the folder containing the list of models.
                      As default, the pretrained models in the package will be used.
                      If user has trained new models, indicate the folder containing
                      the new_models.rda file.}
                      \item{zscore}{boolean, whether gene expression is transformed to zscore}
+                    -
                     -\item{...}{arguments passed to other methods}
+                    -
                     -\item{seurat_tag_slot}{string, name of annotation slot
                     -indicating cell tag/label in the testing object.
                     -Strings indicating cell types are expected in this slot.
                     -For \code{\link{Seurat}} object, default value is "active.ident".
                     -Expected values are string (A-Z, a-z, 0-9, no special character accepted)
                     -or binary/logical, 0/"no"/F/FALSE: not being new cell type,
                     -1/"yes"/T/TRUE: being new cell type.}
+                    -
                     -\item{seurat_parent_tag_slot}{string, name of tag slot in cell meta data
                     -indicating pre-assigned/predicted parent cell type.
                     -Default field is "predicted_cell_type".
                     -The slot must contain only string values.}
+                    -
                     -\item{seurat_assay}{name of assay to use in
                     -\code{\link{Seurat}} object, defaults to 'RNA' assay.}
+                    -
                     -\item{seurat_slot}{type of expression data to use in
                     -\code{\link{Seurat}} object.
                     -Some available types are: "counts", "data" and "scale.data".
                     -Default to "counts", which contains unnormalized data.}
+                    -
                     -\item{sce_tag_slot}{string, name of annotation slot
                     -indicating cell tag/label in the testing object.
                     -Strings indicating cell types are expected in this slot.
                     -Default value is "ident".
                     -Expected values are string (A-Z, a-z, 0-9, no special character accepted)
                     -or binary/logical, 0/"no"/F/FALSE: not being new cell type,
                     -1/"yes"/T/TRUE: being new cell type.}
+                    -
                     -\item{sce_parent_tag_slot}{string, name of tag slot in cell meta data
                     -indicating pre-assigned/predicted parent cell type.
                     -Default is "predicted_cell_type".
                     -The slot must contain only string values.}
+                    -
                     -\item{sce_assay}{name of assay to use in \code{\link{SingleCellExperiment}}
                     -object, defaults to 'logcounts' assay.}
+                     }
                      \value{
                      result of testing process in form of a list,
@@ -125,13 +92,15 @@ data("tirosh_mel80_example")
                      # train the classifier
                      selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
                      set.seed(123)
                     -classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     -marker_genes = selected_marker_genes_B, cell_type = "B cells")
                     +classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     +assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B,
                     +cell_type = "b cells", tag_slot = 'active.ident')
                      # test the classifier, target cell type can be in other formats or
                      # alternative cell type that can be considered as the classified cell type
                     -classifier_b_test <- test_classifier(test_obj = tirosh_mel80_example,
                     -classifier = classifier_b, target_cell_type = c("B cell"))
                     +classifier_b_test <- test_classifier(classifier = classifier_b,
                     +test_obj = tirosh_mel80_example, assay = 'RNA', slot = 'counts',
                     +tag_slot = 'active.ident', target_cell_type = c("B cell"))
                      classifier_b_test
+                     }

man/train_classifier.Rd

History View file @ a212ecf

@@ -2,81 +2,43 @@
                      % Please edit documentation in R/classifier.R
                      \name{train_classifier}
                      \alias{train_classifier}
                     -\alias{train_classifier,Seurat-method}
                     -\alias{train_classifier,SingleCellExperiment-method}
                      \title{Train cell type classifier}
                      \usage{
                      train_classifier(
                        train_obj,
                     +  assay,
                     +  slot = NULL,
                        cell_type,
                        marker_genes,
                     +  tag_slot,
                        parent_cell = NA_character_,
                     +  parent_tag_slot = "predicted_cell_type",
                        parent_classifier = NULL,
                        path_to_models = "default",
                     -  zscore = TRUE,
                     -  ...
                     -)
+                    -
                     -\S4method{train_classifier}{Seurat}(
                     -  train_obj,
                     -  cell_type,
                     -  marker_genes,
                     -  parent_cell = NA_character_,
                     -  parent_classifier = NULL,
                     -  path_to_models = "default",
                     -  zscore = TRUE,
                     -  seurat_tag_slot = "active.ident",
                     -  seurat_parent_tag_slot = "predicted_cell_type",
                     -  seurat_assay = "RNA",
                     -  seurat_slot = "counts",
                     -  ...
                     -)
+                    -
                     -\S4method{train_classifier}{SingleCellExperiment}(
                     -  train_obj,
                     -  cell_type,
                     -  marker_genes,
                     -  parent_cell = NA_character_,
                     -  parent_classifier = NULL,
                     -  path_to_models = "default",
                     -  zscore = TRUE,
                     -  sce_tag_slot = "ident",
                     -  sce_parent_tag_slot = "predicted_cell_type",
                     -  sce_assay = "logcounts",
                     -  ...
                     +  zscore = TRUE
+                     )
+                     }
                      \arguments{
                      \item{train_obj}{object that can be used for training the new model.
                      \code{\link{Seurat}} object or \code{\link{SingleCellExperiment}} object
                     -is expected.
                     +is supported.
                      If the training model has parent, parent_tag_slot may have been indicated.
                      This field would have been filled out automatically
                      if user precedently run classify_cells function.
                      If no (predicted) cell type annotation provided,
                      the function can be run if 1- parent_cell or 2- parent_classifier is provided.}
                     +\item{assay}{name of assay to use in training object.}
+                    +
                     +\item{slot}{type of expression data to use in training object, omitted if
                     +train_obj is \code{\link{SingleCellExperiment}} object.}
+                    +
                      \item{cell_type}{string indicating the name of the subtype
                      This must exactly match cell tag/label if cell tag/label is a string.}
                      \item{marker_genes}{list of marker genes used for the new training model}
                     -\item{parent_cell}{string indicated the name of the parent cell type,
                     -if parent cell type classifier has already been saved in model database.
                     -Adjust path_to_models for exact database.}
+                    -
                     -\item{parent_classifier}{classification model for the parent cell type}
+                    -
                     -\item{path_to_models}{path to the folder containing the model database.
                     -As default, the pretrained models in the package will be used.
                     -If user has trained new models, indicate the folder containing the
                     -new_models.rda file.}
+                    -
                     -\item{zscore}{whether gene expression in train_obj is transformed to zscore}
+                    -
                     -\item{...}{arguments passed to other methods}
+                    -
                     -\item{seurat_tag_slot}{string, name of slot in cell meta data
                     +\item{tag_slot}{string, name of slot in cell meta data
                      indicating cell tag/label in the training object.
                      Strings indicating cell types are expected in this slot.
                      For \code{\link{Seurat}} object, default value is "active.ident".
@@ -84,35 +46,24 @@ Expected values are string (A-Z, a-z, 0-9, no special character accepted)
                      or binary/logical, 0/"no"/F/FALSE: not being new cell type,
 /"yes"/T/TRUE: being new cell type.}
                     -\item{seurat_parent_tag_slot}{string, name of a slot in cell meta data
                     +\item{parent_cell}{string indicated the name of the parent cell type,
                     +if parent cell type classifier has already been saved in model database.
                     +Adjust path_to_models for exact database.}
+                    +
                     +\item{parent_tag_slot}{string, name of a slot in cell meta data
                      indicating assigned/predicted cell type. Default is "predicted_cell_type".
                      This slot would have been filled automatically
                      if user have called classify_cells function.
                      The slot must contain only string values.}
                     -\item{seurat_assay}{name of assay to use in training object.
                     -Default to 'RNA' assay.}
+                    -
                     -\item{seurat_slot}{type of expression data to use in training object.
                     -For \code{\link{Seurat}} object, available types are: "counts", "data"
                     -and "scale.data". Default to "counts", which contains unnormalized data.}
+                    -
                     -\item{sce_tag_slot}{string, name of annotation slot indicating
                     -cell tag/label in the training object.
                     -For \code{\link{SingleCellExperiment}} object, default value is "ident".
                     -Expected values are string (A-Z, a-z, 0-9, no special character accepted)
                     -or binary/logical, 0/"no"/F/FALSE: not being new cell type,
                     -1/"yes"/T/TRUE: being new cell type.}
                     +\item{parent_classifier}{classification model for the parent cell type}
                     -\item{sce_parent_tag_slot}{string, name of a slot in cell meta data
                     -indicating pre-assigned/predicted cell type.
                     -Default field is "predicted_cell_type".
                     -This field would have been filled automatically
                     -when user called classify_cells function.
                     -The slot must contain only string values.}
                     +\item{path_to_models}{path to the folder containing the model database.
                     +As default, the pretrained models in the package will be used.
                     +If user has trained new models, indicate the folder containing the
                     +new_models.rda file.}
                     -\item{sce_assay}{name of assay to use in training object.
                     -Default to 'logcounts' assay.}
                     +\item{zscore}{whether gene expression in train_obj is transformed to zscore}
+                     }
                      \value{
                      \code{\link{scAnnotatR}} object
@@ -143,8 +94,9 @@ selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
                      # train the classifier, the "cell_type" argument must match
                      # the cell labels in the data, except upper/lower case
                      set.seed(123)
                     -classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     -marker_genes = selected_marker_genes_B, cell_type = "b cells")
                     +classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
                     +assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B,
                     +cell_type = "b cells", tag_slot = 'active.ident')
                      # classify cell types using B cell classifier,
                      # a test classifier process may be used before applying the classifier
@@ -163,7 +115,8 @@ p_marker_genes = c("SDC1", "CD19", "CD79A")
                      # for the training process.
                      set.seed(123)
                      plasma_classifier <- train_classifier(train_obj = tirosh_mel80_example,
                     -cell_type = "Plasma cell", marker_genes = p_marker_genes,
                     -parent_classifier = classifier_b, seurat_tag_slot = 'plasma_cell_tag')
                     +assay = 'RNA', slot = 'counts', cell_type = 'Plasma cell',
                     +marker_genes = p_marker_genes, tag_slot = 'plasma_cell_tag',
                     +parent_classifier = classifier_b)
+                     }

vignettes/training-basic-model.Rmd

History View file @ a212ecf

@@ -148,8 +148,8 @@ times for one model, users can use `set.seed`.
                      ```{r}
                      set.seed(123)
                      classifier_B <- train_classifier(train_obj = train_set, cell_type = "B cells",
                     -                          marker_genes = selected_marker_genes_B,
                     -                          sce_assay = 'counts', sce_tag_slot = 'B_cell')
                     +                                 marker_genes = selected_marker_genes_B,
                     +                                 assay = 'counts', tag_slot = 'B_cell')
                      ```
                      ```{r}
                      classifier_B
@@ -169,8 +169,8 @@ The `test_classifier` model automatically tests a classifier's performance
                      against another dataset. Here, we used the `test_set` created before:
                      ```{r}
                     -classifier_B_test <- test_classifier(test_obj = test_set, classifier = classifier_B,
                     -                              sce_assay = 'counts', sce_tag_slot = 'B_cell')
                     +classifier_B_test <- test_classifier(classifier = classifier_B, test_obj = test_set,
                     +                                     assay = 'counts', tag_slot = 'B_cell')
                      ```
                      ### Interpreting test model result

vignettes/training-child-model.Rmd

History View file @ a212ecf

@@ -178,7 +178,7 @@ Train the child classifier:
                      set.seed(123)
                      classifier_plasma <- train_classifier(train_obj = train_set,
                      marker_genes = selected_marker_genes_plasma, cell_type = "Plasma cells",
                     -sce_assay = 'counts', sce_tag_slot = 'plasma', parent_classifier = classifier_B)
                     +assay = 'counts', tag_slot = 'plasma', parent_classifier = classifier_B)
                      ```
                      If the  cells classifier has not been loaded to the current working space,
                      an equivalent training process should be:
@@ -186,7 +186,7 @@ an equivalent training process should be:
                      set.seed(123)
                      classifier_plasma <- train_classifier(train_obj = train_set,
                      marker_genes = selected_marker_genes_plasma, cell_type = "Plasma cells",
                     -sce_assay = 'counts', sce_tag_slot = 'plasma', parent_cell = 'B cells')
                     +assay = 'counts', tag_slot = 'plasma', parent_cell = 'B cells')
                      ```
                      ```{r}
                      classifier_plasma
@@ -200,7 +200,7 @@ caret_model(classifier_plasma)
                      The parent classifier must be also set in test method.
                      ```{r}
                      classifier_plasma_test <- test_classifier(test_obj = test_set,
                     -classifier = classifier_plasma, sce_assay = 'counts', sce_tag_slot = 'plasma',
                     +classifier = classifier_plasma, assay = 'counts', tag_slot = 'plasma',
                      parent_classifier = classifier_B)
                      ```