man/train_classifier.Rd
0065b788
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/classifier.R
 \name{train_classifier}
 \alias{train_classifier}
 \title{Train cell type classifier}
 \usage{
 train_classifier(
   train_obj,
a212ecf0
   assay,
   slot = NULL,
0065b788
   cell_type,
7e9ee09d
   marker_genes,
a212ecf0
   tag_slot,
0065b788
   parent_cell = NA_character_,
a212ecf0
   parent_tag_slot = "predicted_cell_type",
adb6f19c
   parent_classifier = NULL,
   path_to_models = "default",
a212ecf0
   zscore = TRUE
0065b788
 )
 }
 \arguments{
 \item{train_obj}{object that can be used for training the new model. 
 \code{\link{Seurat}} object or \code{\link{SingleCellExperiment}} object
a212ecf0
 is supported.
0065b788
 If the training model has parent, parent_tag_slot may have been indicated. 
 This field would have been filled out automatically 
 if user precedently run classify_cells function. 
 If no (predicted) cell type annotation provided, 
adb6f19c
 the function can be run if 1- parent_cell or 2- parent_classifier is provided.}
0065b788
 
a212ecf0
 \item{assay}{name of assay to use in training object.}
 
 \item{slot}{type of expression data to use in training object, omitted if 
 train_obj is \code{\link{SingleCellExperiment}} object.}
 
0065b788
 \item{cell_type}{string indicating the name of the subtype
 This must exactly match cell tag/label if cell tag/label is a string.}
 
7e9ee09d
 \item{marker_genes}{list of marker genes used for the new training model}
0065b788
 
a212ecf0
 \item{tag_slot}{string, name of slot in cell meta data 
0065b788
 indicating cell tag/label in the training object.
49e8d5e2
 Strings indicating cell types are expected in this slot.
0065b788
 For \code{\link{Seurat}} object, default value is "active.ident".  
49e8d5e2
 Expected values are string (A-Z, a-z, 0-9, no special character accepted) 
 or binary/logical, 0/"no"/F/FALSE: not being new cell type, 
 1/"yes"/T/TRUE: being new cell type.}
0065b788
 
a212ecf0
 \item{parent_cell}{string indicated the name of the parent cell type, 
 if parent cell type classifier has already been saved in model database.
 Adjust path_to_models for exact database.}
 
 \item{parent_tag_slot}{string, name of a slot in cell meta data 
0065b788
 indicating assigned/predicted cell type. Default is "predicted_cell_type". 
 This slot would have been filled automatically 
 if user have called classify_cells function.
 The slot must contain only string values.}
 
a212ecf0
 \item{parent_classifier}{classification model for the parent cell type}
0065b788
 
a212ecf0
 \item{path_to_models}{path to the folder containing the model database. 
 As default, the pretrained models in the package will be used. 
 If user has trained new models, indicate the folder containing the 
 new_models.rda file.}
0065b788
 
a212ecf0
 \item{zscore}{whether gene expression in train_obj is transformed to zscore}
0065b788
 }
 \value{
499496a2
 \code{\link{scAnnotatR}} object
0065b788
 }
 \description{
 Train a classifier for a new cell type. 
499496a2
 If cell type has a parent, only available for \code{\link{scAnnotatR}}
0065b788
 object as parent cell classifying model.
 }
 \note{
 Only one cell type is expected for each cell in object. 
 Ambiguous cell type, such as: "T cells/NK cells/ILC", 
 will be ignored from training.
 Subtypes used in training model for parent cell types must be indicated
 as parent cell type. For example, when training for B cells, 
 plasma cells must be annotated as B cells in order to be used.
 }
 \examples{
 # load small example dataset
 data("tirosh_mel80_example")
 
 # this dataset already contains pre-defined cell labels
 table(Seurat::Idents(tirosh_mel80_example))
 
 # define genes to use to classify this cell type (B cells in this example)
7e9ee09d
 selected_marker_genes_B = c("CD19", "MS4A1", "CD79A")
0065b788
 
 # train the classifier, the "cell_type" argument must match 
 # the cell labels in the data, except upper/lower case
 set.seed(123)
a212ecf0
 classifier_b <- train_classifier(train_obj = tirosh_mel80_example,
 assay = 'RNA', slot = 'counts', marker_genes = selected_marker_genes_B, 
 cell_type = "b cells", tag_slot = 'active.ident')
0065b788
 
 # classify cell types using B cell classifier, 
 # a test classifier process may be used before applying the classifier 
 tirosh_mel80_example <- classify_cells(classify_obj = tirosh_mel80_example, 
7114049b
 classifiers = c(classifier_b), assay = 'RNA', slot = 'counts')
0065b788
 
 # tag all cells that are plasma cells (random example here)
 tirosh_mel80_example[['plasma_cell_tag']] <- c(rep(1, 80), rep(0, 400))
 
7e9ee09d
 # set new marker genes for the subtype
 p_marker_genes = c("SDC1", "CD19", "CD79A")
0065b788
 
 # train the classifier, the "B cell" classifier is used as parent. 
 # This means, only cells already classified as "B cells" will be evaluated.
 # the "tag_slot" parameter tells the classifier to use this cell meta data
 # for the training process.
 set.seed(123)
adb6f19c
 plasma_classifier <- train_classifier(train_obj = tirosh_mel80_example, 
a212ecf0
 assay = 'RNA', slot = 'counts', cell_type = 'Plasma cell', 
 marker_genes = p_marker_genes, tag_slot = 'plasma_cell_tag',
 parent_classifier = classifier_b)
0065b788
 
 }