Bioconductor Code: scAnnotatR

Browse code

remove default_models store in package, use BiocFileCache to manage pretrained models

Former-commit-id: 4a69d2314d0fe5849d5002a1dc1cdd474f9afaff

nttvy authored on 29/07/2021 14:57:11
Showing 17 changed files

DESCRIPTION index 18e89bc..cb4dc34 100644
NAMESPACE index 463d9d7..47d6e0b 100644
R/classifier.R index 1cce40e..a7cd727 100644
R/data.R index d098618..5da0c76 100644
R/support.R index 3ae2aca..1256169 100644
R/tree.R index 6197456..582f865 100644
data/default_models.rda index 32a9846..0000000
man/default_models.Rd index f907e7d..0000000
man/delete_model.Rd index 3f5363d..467f90c 100644
man/internal.Rd index 6073079..e113366 100644
man/load_models.Rd index 0000000..7a5da80
man/plant_tree.Rd index c61fca3..490ca9c 100644
man/save_new_model.Rd index 8c1199d..9afe56b 100644
tests/testthat/test_class.R index 2d2f956..f68c515 100644
vignettes/classifying-cells.Rmd index 6353ed4..37e73c0 100644
vignettes/training-basic-model.Rmd index 7f0240a..adf6204 100644
vignettes/training-child-model.Rmd index 6086015..e4f31e2 100644

History View file @ a5d06e3

@@ -30,6 +30,8 @@ Imports:
                          e1071,
                          ape,
                          kernlab,
                     +    tools,
                     +    BiocFileCache,
                          utils
                      Suggests:
                          knitr,
@@ -39,7 +41,6 @@ Suggests:
                      VignetteBuilder: knitr
                      Depends: R (>= 4.1), Seurat, SingleCellExperiment, SummarizedExperiment
                      LazyData: true
                     -LazyDataCompression: xz
                      RoxygenNote: 7.1.1
                      URL: https://github.com/grisslab/scAnnotatR
                      BugReports: https://github.com/grisslab/scAnnotatR/issues/new

NAMESPACE

History View file @ a5d06e3

@@ -6,6 +6,7 @@ export(caret_model)
                      export(cell_type)
                      export(classify_cells)
                      export(delete_model)
                     +export(load_models)
                      export(marker_genes)
                      export(p_thres)
                      export(parent)
@@ -16,6 +17,7 @@ export(scAnnotatR)
                      export(test_classifier)
                      export(train_classifier)
                      exportMethods(show)
                     +import(BiocFileCache)
                      import(ROCR)
                      import(SingleCellExperiment)
                      import(ape)
@@ -27,6 +29,7 @@ import(ggplot2)
                      import(kernlab, except = c(alpha, predict))
                      import(methods)
                      import(pROC)
                     +import(tools)
                      importFrom(Seurat,GetAssayData)
                      importFrom(SummarizedExperiment,assay)
                      importFrom(SummarizedExperiment,colData)

R/classifier.R

History View file @ a5d06e3

@@ -132,6 +132,79 @@ setMethod("train_classifier", c("train_obj" = "Seurat"),
                        return(object)
                      })
                     +#' @inherit train_classifier
                     +#'
                     +#' @param sce_tag_slot string, name of annotation slot indicating
                     +#' cell tag/label in the training object.
                     +#' For \code{\link{SingleCellExperiment}} object, default value is "ident".
                     +#' Expected values are string (A-Z, a-z, 0-9, no special character accepted)
                     +#' or binary/logical, 0/"no"/F/FALSE: not being new cell type,
                     +#' 1/"yes"/T/TRUE: being new cell type.
                     +#' @param sce_parent_tag_slot string, name of a slot in cell meta data
                     +#' indicating pre-assigned/predicted cell type.
                     +#' Default field is "predicted_cell_type".
                     +#' This field would have been filled automatically
                     +#' when user called classify_cells function.
                     +#' The slot must contain only string values.
                     +#' @param sce_assay name of assay to use in training object.
                     +#' Default to 'logcounts' assay.
                     +#'
                     +#' @import SingleCellExperiment
                     +#' @importFrom SummarizedExperiment assay
                     +#'
                     +#' @rdname train_classifier
                     +setMethod("train_classifier", c("train_obj" = "SingleCellExperiment"),
                     +          function(train_obj, cell_type, marker_genes, parent_cell = NA_character_,
                     +                   parent_classifier = NULL, path_to_models = "default",
                     +                   zscore = TRUE, sce_tag_slot = "ident",
                     +                   sce_parent_tag_slot = "predicted_cell_type",
                     +                   sce_assay = 'logcounts', ...) {
                     +  # solve duplication of cell names
                     +  colnames(train_obj) <- make.unique(colnames(train_obj), sep = '_')
+                    +
                     +  # convert Seurat object to matrix
                     +  mat = SummarizedExperiment::assay(train_obj, sce_assay)
+                    +
                     +  tag = SummarizedExperiment::colData(train_obj)[, sce_tag_slot]
                     +  names(tag) <- colnames(train_obj)
+                    +
                     +  if (sce_parent_tag_slot %in% colnames(SummarizedExperiment::colData(train_obj))) {
                     +    parent_tag <- SummarizedExperiment::colData(train_obj)[, sce_parent_tag_slot]
                     +    names(parent_tag) <- colnames(train_obj)
                     +  } else parent_tag <- NULL
+                    +
                     +  object <- train_classifier_func(mat, tag, cell_type, marker_genes,
                     +                                  parent_tag, parent_cell, parent_classifier,
                     +                                  path_to_models, zscore)
+                    +
                     +  return(object)
                     +})
+                    +
                     +#' Train cell type from matrix
                     +#'
                     +#' @description Train a classifier for a new cell type from expression matrix
                     +#' and tag
                     +#' If cell type has a parent, only available for \code{\link{scAnnotatR}}
                     +#' object as parent cell classifying model.
                     +#' @param mat expression matrix of size n x m, n: genes, m: cells
                     +#' @param tag named list indicating cell label
                     +#' @param cell_type string indicating the name of the subtype
                     +#' This must exactly match cell tag/label if cell tag/label is a string.
                     +#' @param parent_tag named list indicating parent cell type
                     +#' @param marker_genes list of marker genes used for the new training model
                     +#' @param parent_cell string indicated the name of the parent cell type,
                     +#' if parent cell type classifier has already been saved in model database.
                     +#' Adjust path_to_models for exact database.
                     +#' @param parent_classifier classification model for the parent cell type
                     +#' @param path_to_models path to the folder containing the model database.
                     +#' As default, the pretrained models in the package will be used.
                     +#' If user has trained new models, indicate the folder containing the
                     +#' new_models.rda file.
                     +#' @param zscore whether gene expression in train_obj is transformed to zscore
                     +#'
                     +#' @return caret trained model
                     +#'
                     +#' @rdname internal
                      train_classifier_func <- function(mat, tag, cell_type, marker_genes,
                                                        parent_tag, parent_cell, parent_classifier,
                                                        path_to_models, zscore) {
@@ -139,7 +212,7 @@ train_classifier_func <- function(mat, tag, cell_type, marker_genes,
                        processed_parent <- process_parent_classifier(
                          mat, parent_tag, parent_cell, parent_classifier, path_to_models, zscore
+                       )
+                    -
+                    +
                        # check parent-child coherence
                        if (!is.null(processed_parent$pos_parent)) {
                          tag <- check_parent_child_coherence(
@@ -206,54 +279,6 @@ train_classifier_func <- function(mat, tag, cell_type, marker_genes,
                        return(object)
+                     }
                     -#' @inherit train_classifier
                     -#'
                     -#' @param sce_tag_slot string, name of annotation slot indicating
                     -#' cell tag/label in the training object.
                     -#' For \code{\link{SingleCellExperiment}} object, default value is "ident".
                     -#' Expected values are string (A-Z, a-z, 0-9, no special character accepted)
                     -#' or binary/logical, 0/"no"/F/FALSE: not being new cell type,
                     -#' 1/"yes"/T/TRUE: being new cell type.
                     -#' @param sce_parent_tag_slot string, name of a slot in cell meta data
                     -#' indicating pre-assigned/predicted cell type.
                     -#' Default field is "predicted_cell_type".
                     -#' This field would have been filled automatically
                     -#' when user called classify_cells function.
                     -#' The slot must contain only string values.
                     -#' @param sce_assay name of assay to use in training object.
                     -#' Default to 'logcounts' assay.
                     -#'
                     -#' @import SingleCellExperiment
                     -#' @importFrom SummarizedExperiment assay
                     -#'
                     -#' @rdname train_classifier
                     -setMethod("train_classifier", c("train_obj" = "SingleCellExperiment"),
                     -          function(train_obj, cell_type, marker_genes, parent_cell = NA_character_,
                     -                   parent_classifier = NULL, path_to_models = "default",
                     -                   zscore = TRUE, sce_tag_slot = "ident",
                     -                   sce_parent_tag_slot = "predicted_cell_type",
                     -                   sce_assay = 'logcounts', ...) {
                     -  # solve duplication of cell names
                     -  colnames(train_obj) <- make.unique(colnames(train_obj), sep = '_')
+                    -
                     -  # convert Seurat object to matrix
                     -  mat = SummarizedExperiment::assay(train_obj, sce_assay)
+                    -
                     -  tag = SummarizedExperiment::colData(train_obj)[, sce_tag_slot]
                     -  names(tag) <- colnames(train_obj)
+                    -
                     -  if (sce_parent_tag_slot %in% colnames(SummarizedExperiment::colData(train_obj))) {
                     -    parent_tag <- SummarizedExperiment::colData(train_obj)[, sce_parent_tag_slot]
                     -    names(parent_tag) <- colnames(train_obj)
                     -  } else parent_tag <- NULL
+                    -
                     -  object <- train_classifier_func(mat, tag, cell_type, marker_genes,
                     -                                  parent_tag, parent_cell, parent_classifier,
                     -                                  path_to_models, zscore)
+                    -
                     -  return(object)
                     -})
+                    -
                      #' Testing process.
                      #'
                      #' @description Testing process.
@@ -363,6 +388,76 @@ setMethod("test_classifier", c("test_obj" = "Seurat",
                        return(return_val)
                      })
                     +#' @inherit test_classifier
                     +#'
                     +#' @param sce_tag_slot string, name of annotation slot
                     +#' indicating cell tag/label in the testing object.
                     +#' Strings indicating cell types are expected in this slot.
                     +#' Default value is "ident".
                     +#' Expected values are string (A-Z, a-z, 0-9, no special character accepted)
                     +#' or binary/logical, 0/"no"/F/FALSE: not being new cell type,
                     +#' 1/"yes"/T/TRUE: being new cell type.
                     +#' @param sce_parent_tag_slot string, name of tag slot in cell meta data
                     +#' indicating pre-assigned/predicted parent cell type.
                     +#' Default is "predicted_cell_type".
                     +#' The slot must contain only string values.
                     +#' @param sce_assay name of assay to use in \code{\link{SingleCellExperiment}}
                     +#' object, defaults to 'logcounts' assay.
                     +#'
                     +#' @import SingleCellExperiment
                     +#' @importFrom SummarizedExperiment assay
                     +#'
                     +#' @rdname test_classifier
                     +setMethod("test_classifier", c("test_obj" = "SingleCellExperiment",
                     +                               "classifier" = "scAnnotatR"),
                     +          function(test_obj, classifier, target_cell_type = NULL,
                     +                   parent_classifier = NULL, path_to_models = "default",
                     +                   zscore = TRUE, sce_tag_slot = "ident",
                     +                   sce_parent_tag_slot = "predicted_cell_type",
                     +                   sce_assay = 'logcounts', ...) {
                     +  # solve duplication of cell names
                     +  colnames(test_obj) <- make.unique(colnames(test_obj), sep = '_')
                     +  . <- fpr <- tpr <- NULL
+                    +
                     +  # convert SCE object to matrix
                     +  mat = SummarizedExperiment::assay(test_obj, sce_assay)
+                    +
                     +  tag = SummarizedExperiment::colData(test_obj)[, sce_tag_slot]
                     +  names(tag) <- colnames(test_obj)
+                    +
                     +  if (sce_parent_tag_slot %in% colnames(SummarizedExperiment::colData(test_obj))) {
                     +    parent_tag <- SummarizedExperiment::colData(test_obj)[, sce_parent_tag_slot]
                     +    names(parent_tag) <- colnames(test_obj)
                     +  } else parent_tag <- NULL
+                    +
                     +  return_val <- test_classifier_func(mat, tag, classifier, parent_tag,
                     +                                     target_cell_type, parent_classifier,
                     +                                     path_to_models, zscore)
+                    +
                     +  return(return_val)
                     +})
+                    +
                     +#' Run testing process from matrix and tag
                     +#'
                     +#' @description Testing process from matrix and tag
                     +#' @param mat expression matrix of size n x m, n: genes, m: cells
                     +#' @param tag named list indicating cell label
                     +#' @param classifier classification model
                     +#' @param parent_tag named list indicating parent cell type
                     +#' @param target_cell_type vector indicating other cell types than cell labels
                     +#' that can be considered as the main cell type in classifier,
                     +#' for example, c("plasma cell", "b cell", "b cells", "activating b cell").
                     +#' Default as NULL.
                     +#' @param parent_classifier classification model for the parent cell type
                     +#' @param path_to_models path to the folder containing the model database.
                     +#' As default, the pretrained models in the package will be used.
                     +#' If user has trained new models, indicate the folder containing the
                     +#' new_models.rda file.
                     +#' @param zscore whether gene expression in train_obj is transformed to zscore
                     +#'
                     +#' @return model performance statistics
                     +#'
                     +#' @rdname internal
                      test_classifier_func <- function(mat, tag, classifier, parent_tag,
                                                       target_cell_type, parent_classifier,
                                                       path_to_models, zscore) {
@@ -415,55 +510,6 @@ test_classifier_func <- function(mat, tag, classifier, parent_tag,
                        return(return_val)
+                     }
                     -#' @inherit test_classifier
                     -#'
                     -#' @param sce_tag_slot string, name of annotation slot
                     -#' indicating cell tag/label in the testing object.
                     -#' Strings indicating cell types are expected in this slot.
                     -#' Default value is "ident".
                     -#' Expected values are string (A-Z, a-z, 0-9, no special character accepted)
                     -#' or binary/logical, 0/"no"/F/FALSE: not being new cell type,
                     -#' 1/"yes"/T/TRUE: being new cell type.
                     -#' @param sce_parent_tag_slot string, name of tag slot in cell meta data
                     -#' indicating pre-assigned/predicted parent cell type.
                     -#' Default is "predicted_cell_type".
                     -#' The slot must contain only string values.
                     -#' @param sce_assay name of assay to use in \code{\link{SingleCellExperiment}}
                     -#' object, defaults to 'logcounts' assay.
                     -#'
                     -#' @import SingleCellExperiment
                     -#' @importFrom SummarizedExperiment assay
                     -#'
                     -#' @rdname test_classifier
                     -setMethod("test_classifier", c("test_obj" = "SingleCellExperiment",
                     -                               "classifier" = "scAnnotatR"),
                     -          function(test_obj, classifier, target_cell_type = NULL,
                     -                   parent_classifier = NULL, path_to_models = "default",
                     -                   zscore = TRUE, sce_tag_slot = "ident",
                     -                   sce_parent_tag_slot = "predicted_cell_type",
                     -                   sce_assay = 'logcounts', ...) {
                     -  # solve duplication of cell names
                     -  colnames(test_obj) <- make.unique(colnames(test_obj), sep = '_')
                     -  . <- fpr <- tpr <- NULL
+                    -
                     -  # convert SCE object to matrix
                     -  mat = SummarizedExperiment::assay(test_obj, sce_assay)
+                    -
                     -  tag = SummarizedExperiment::colData(test_obj)[, sce_tag_slot]
                     -  names(tag) <- colnames(test_obj)
+                    -
                     -  if (sce_parent_tag_slot %in% colnames(SummarizedExperiment::colData(test_obj))) {
                     -    parent_tag <- SummarizedExperiment::colData(test_obj)[, sce_parent_tag_slot]
                     -    names(parent_tag) <- colnames(test_obj)
                     -  } else parent_tag <- NULL
+                    -
                     -  return_val <- test_classifier_func(mat, tag, classifier, parent_tag,
                     -                                     target_cell_type, parent_classifier,
                     -                                     path_to_models, zscore)
+                    -
                     -  return(return_val)
                     -})
+                    -
                      #' Plot roc curve
                      #'
                      #' @param test_result result of test_classifier function

R/data.R

History View file @ a5d06e3

@@ -8,16 +8,4 @@
                      #' @source WEIZMANN INSTITUTE OF SCIENCE
                      #' @author Itay Tirosh, 2016-04-05
                      #' @keywords datasets
                     -"tirosh_mel80_example"
+                    -
                     -#' @name default_models
                     -#' @title Pretrained classifiers for human cells
                     -#' @description Pretrained classifier obtained by training and testing on the
                     -#' Sade-Feldman melanoma dataset, the Jerby-Arnon melanoma dataset, the Haniffa
                     -#' Skin Cell Atlas and the Haniffa Covid-19 Cell Atlas.
                     -#' @docType data
                     -#' @usage default_models
                     -#' @format a list of \code{\link{scAnnotatR}} objects
                     -#' @author Vy Nguyen, June 2021
                     -#' @keywords datasets
                     -"default_models"
                     \ No newline at end of file
                     +"tirosh_mel80_example"
                     \ No newline at end of file

R/support.R

History View file @ a5d06e3

@@ -107,14 +107,16 @@ transform_to_zscore <- function(mat) {
                      #' @return list of classifiers
                      #'
                      #' @importFrom utils data
                     -#' @rdname internal
                     +#' @rdname load_models
                     +#' @export
                      load_models <- function(path_to_models) {
                        # prevents R CMD check note
                        model_list <- NULL
                        data_env <- new.env(parent = emptyenv())
                        if (path_to_models == "default") {
                     -    utils::data("default_models", envir = data_env)
                     +    models_path <- download_data_file(TRUE) # more function: if user want to save cache
                     +    load(models_path, envir = data_env)
                          model_list <- data_env[["default_models"]]
                        } else {
                          models_path <- file.path(path_to_models, "new_models.rda")
@@ -608,4 +610,43 @@ classify_clust <- function(clusts, most_probable_cell_type) {
                        names(clust.pred) <- levels(clusts)
                        converted_pred <- unlist(lapply(clusts, function(x) clust.pred[[x]]))
                        return(converted_pred)
                     -}
                     \ No newline at end of file
                     +}
+                    +
                     +#' Create a BiocFileCache object
                     +#'
                     +#' @return BiocFileCache object
                     +#' @import tools
                     +#' @import BiocFileCache
                     +#'
                     +#' @rdname internal
                     +.get_cache <-
                     +  function()
                     +  {
                     +    cache <- tools::R_user_dir("scAnnotatR", which="cache")
                     +    BiocFileCache::BiocFileCache(cache)
                     +  }
+                    +
                     +#' Download and store default models in cache
                     +#' @param verbose logical indicating downloading the file or not
                     +#'
                     +#' @return path to the downloaded file in cache
                     +#' @import BiocFileCache
                     +#'
                     +#' @rdname internal
                     +download_data_file <-
                     +  function(verbose = FALSE)
                     +  {
                     +    fileURL <- "https://github.com/grisslab/scAnnotatR-models/blob/main/default_models.rda?raw=true"
+                    +
                     +    bfc <- .get_cache()
                     +    rid <- BiocFileCache::bfcquery(bfc, "default_models", "rname")$rid
                     +    if (!length(rid)) {
                     +      if (verbose)
                     +        message("Downloading default models..." )
                     +      rid <- names(BiocFileCache::bfcadd(bfc, "default_models", fileURL))
                     +    }
                     +    if (isFALSE(BiocFileCache::bfcneedsupdate(bfc, rid)))
                     +      BiocFileCache::bfcdownload(bfc, rid)
+                    +
                     +    BiocFileCache::bfcrpath(bfc, rids = rid)
                     +  }
                     \ No newline at end of file

R/tree.R

History View file @ a5d06e3

@@ -9,7 +9,7 @@
                      #' both of them once. In addition, default pretrained models
                      #' of the package cannot be changed or removed.
                      #' This can be done with the new trained model list.
                     -#' @param path.to.models path to the folder containing the list of new models.
                     +#' @param path_to_models path to the folder containing the list of new models.
                      #'
                      #' @return no return value, but the model is now saved to database
                      #'
@@ -27,7 +27,7 @@
                      #'
                      #' # save the trained classifier to system
                      #' # test classifier can be used before this step
                     -#' save_new_model(new_model = classifier_t, path.to.models = tempdir())
                     +#' save_new_model(new_model = classifier_t, path_to_models = tempdir())
                      #'
                      #' # verify if new model has been saved
                      #' print(names(load(file.path(tempdir(), "new_models.rda"))))
@@ -35,20 +35,24 @@
                      #'
                      #' @export
                      save_new_model <- function(new_model, include.default = TRUE,
                     -                    path.to.models = tempdir()) {
                     +                    path_to_models = tempdir()) {
                        default_models <- NULL
                     +  data_env <- new.env(parent = emptyenv())
                     -  utils::data("default_models")
                     -  new_models.file.path = file.path(path.to.models, "new_models.rda")
                     +  new_models.file.path = file.path(path_to_models, "new_models.rda")
                        if (file.exists(new_models.file.path)) {
                     -    load(new_models.file.path)
                     +    load(new_models.file.path, data_env)
                     +    new_models <- data_env[["new_models"]]
                        } else {
                          new_models = NULL
+                       }
                        if (include.default == TRUE) {
                          # default models not in new_models will be added to new_models
                     +    path_to_default_models <- download_data_file(TRUE)
                     +    load(path_to_default_models, envir = data_env)
                     +    default_models <- data_env[["default_models"]]
                          to.be.added <- default_models[!names(default_models)%in%names(new_models)]
                          new_models <- append(to.be.added, new_models)
+                       }
@@ -77,7 +81,7 @@ save_new_model <- function(new_model, include.default = TRUE,
                      #' Plant tree from list of models
                      #'
                     -#' @param models.file.path list of models. If not provided,
                     +#' @param path_to_models list of models. If not provided,
                      #' list of default pretrained models in the package will be used.
                      #'
                      #' @return tree structure and plot of tree
@@ -92,24 +96,12 @@ save_new_model <- function(new_model, include.default = TRUE,
                      #' plant_tree()
                      #'
                      #' @export
                     -plant_tree <- function(models.file.path = "default") {
                     +plant_tree <- function(path_to_models = "default") {
                        data_env <- new.env(parent = emptyenv())
                        root.name <- "cell types"
                     -  if (models.file.path == "default") {
                     -    utils::data("default_models", envir = data_env)
                     -    model_list <- data_env[['default_models']]
                     -  } else {
                     -    models_file_path <- file.path(models.file.path, "new_models.rda")
                     -    if (!file.exists(models_file_path)) {
                     -      stop("No file exists in the indicated models file path",
                     -           call. = FALSE)
                     -    } else {
                     -      load(models_file_path, envir = data_env)
                     -      model_list <- data_env[['new_models']]
                     -    }
                     -  }
                     +  model_list <- load_models(path_to_models)
                        tree <- NULL
                        if (!is.null(model_list)) {
@@ -146,7 +138,7 @@ plant_tree <- function(models.file.path = "default") {
                      #' @param cell_type string indicating the cell type of which
                      #' the model will be removed from package
                      #' Attention: deletion of a parent model will also delete all of child model.
                     -#' @param path.to.models path to the folder containing
                     +#' @param path_to_models path to the folder containing
                      #' the list of models in which the to-be-deleted model is.
                      #'
                      #' @return no return value, but the model is deleted from database
@@ -162,16 +154,19 @@ plant_tree <- function(models.file.path = "default") {
                      #' marker_genes = selected_marker_genes_T, cell_type = "t cells")
                      #'
                      #' # save a classifier to system
                     -#' save_new_model(new_model = classifier_t, path.to.models = tempdir())
                     +#' save_new_model(new_model = classifier_t, path_to_models = tempdir())
                      #'
                      #' # delete classifier from system
                     -#' delete_model("t cells", path.to.models = tempdir())
                     +#' delete_model("t cells", path_to_models = tempdir())
                      #' @export
                     -delete_model <- function(cell_type, path.to.models = tempdir()) {
                     +delete_model <- function(cell_type, path_to_models = tempdir()) {
                        new_models <- NULL
                        data_env <- new.env(parent = emptyenv())
                     -  new_models.file.path <- file.path(path.to.models, "new_models.rda")
                     +  if (path_to_models == 'default')
                     +    stop("Cannot delete default models.", call. = FALSE)
+                    +
                     +  new_models.file.path <- file.path(path_to_models, "new_models.rda")
                        if (!file.exists(new_models.file.path)) {
                          stop("No list of models available", call. = FALSE)
                        } else {

data/default_models.rda

History View file @ a5d06e3

178	173	deleted file mode 100644
179	174	Binary files a/data/default_models.rda and /dev/null differ

man/default_models.Rd

History View file @ a5d06e3

                     deleted file mode 100644
@@ -1,21 +0,0 @@
                     -% Generated by roxygen2: do not edit by hand
                     -% Please edit documentation in R/data.R
                     -\docType{data}
                     -\name{default_models}
                     -\alias{default_models}
                     -\title{Pretrained classifiers for human cells}
                     -\format{
                     -a list of \code{\link{scAnnotatR}} objects
                     -}
                     -\usage{
                     -default_models
                     -}
                     -\description{
                     -Pretrained classifier obtained by training and testing on the
                     -Sade-Feldman melanoma dataset, the Jerby-Arnon melanoma dataset, the Haniffa
                     -Skin Cell Atlas and the Haniffa Covid-19 Cell Atlas.
                     -}
                     -\author{
                     -Vy Nguyen, June 2021
                     -}
                     -\keyword{datasets}

man/delete_model.Rd

History View file @ a5d06e3

@@ -4,14 +4,14 @@
                      \alias{delete_model}
                      \title{Delete model/branch from package}
                      \usage{
                     -delete_model(cell_type, path.to.models = tempdir())
                     +delete_model(cell_type, path_to_models = tempdir())
+                     }
                      \arguments{
                      \item{cell_type}{string indicating the cell type of which
                      the model will be removed from package
                      Attention: deletion of a parent model will also delete all of child model.}
                     -\item{path.to.models}{path to the folder containing
                     +\item{path_to_models}{path to the folder containing
                      the list of models in which the to-be-deleted model is.}
+                     }
                      \value{
@@ -31,8 +31,8 @@ classifier_t <- train_classifier(train_obj = tirosh_mel80_example,
                      marker_genes = selected_marker_genes_T, cell_type = "t cells")
                      # save a classifier to system
                     -save_new_model(new_model = classifier_t, path.to.models = tempdir())
                     +save_new_model(new_model = classifier_t, path_to_models = tempdir())
                      # delete classifier from system
                     -delete_model("t cells", path.to.models = tempdir())
                     +delete_model("t cells", path_to_models = tempdir())
+                     }

man/internal.Rd

History View file @ a5d06e3

@@ -1,5 +1,5 @@
                      % Generated by roxygen2: do not edit by hand
                     -% Please edit documentation in R/class.R, R/support.R
                     +% Please edit documentation in R/class.R, R/classifier.R, R/support.R
                      \name{checkObjectValidity}
                      \alias{checkObjectValidity}
                      \alias{checkCellTypeValidity}
@@ -13,10 +13,11 @@
                      \alias{caret_model<-,scAnnotatR-method}
                      \alias{marker_genes<-}
                      \alias{marker_genes<-,scAnnotatR-method}
                     +\alias{train_classifier_func}
                     +\alias{test_classifier_func}
                      \alias{balance_dataset}
                      \alias{train_func}
                      \alias{transform_to_zscore}
                     -\alias{load_models}
                      \alias{select_marker_genes}
                      \alias{check_parent_child_coherence}
                      \alias{check_parent_child_coherence,dgCMatrix,vector-method}
@@ -31,6 +32,8 @@
                      \alias{verify_parent}
                      \alias{test_performance}
                      \alias{classify_clust}
                     +\alias{.get_cache}
                     +\alias{download_data_file}
                      \title{Internal functions of scAnnotatR package}
                      \usage{
                      checkObjectValidity(object)
@@ -57,14 +60,35 @@ marker_genes(classifier) <- value
                      \S4method{marker_genes}{scAnnotatR}(classifier) <- value
                     +train_classifier_func(
                     +  mat,
                     +  tag,
                     +  cell_type,
                     +  marker_genes,
                     +  parent_tag,
                     +  parent_cell,
                     +  parent_classifier,
                     +  path_to_models,
                     +  zscore
                     +)
+                    +
                     +test_classifier_func(
                     +  mat,
                     +  tag,
                     +  classifier,
                     +  parent_tag,
                     +  target_cell_type,
                     +  parent_classifier,
                     +  path_to_models,
                     +  zscore
                     +)
+                    +
                      balance_dataset(mat, tag)
                      train_func(mat, tag)
                      transform_to_zscore(mat)
                     -load_models(path_to_models)
+                    -
                      select_marker_genes(mat, marker_genes)
                      check_parent_child_coherence(
@@ -120,6 +144,10 @@ verify_parent(mat, classifier, meta.data)
                      test_performance(mat, classifier, tag)
                      classify_clust(clusts, most_probable_cell_type)
+                    +
                     +.get_cache()
+                    +
                     +download_data_file(verbose = FALSE)
+                     }
                      \arguments{
                      \item{object}{The request classifier to check.}
@@ -142,25 +170,25 @@ classify_clust(clusts, most_probable_cell_type)
                      \item{tag}{tag of data}
                     -\item{path_to_models}{path to databases, or by default}
+                    -
                     -\item{pos_parent}{a vector indicating parent classifier prediction}
+                    -
                     -\item{parent_cell}{name of parent cell type}
+                    -
                     -\item{target_cell_type}{alternative cell types (in case of testing classifier)}
+                    -
                      \item{parent_tag}{vector, named list indicating pre-assigned/predicted
                      parent cell type}
                     -\item{parent_cell_type}{name of parent cell type}
                     +\item{parent_cell}{name of parent cell type}
                      \item{parent_classifier}{\code{\link{scAnnotatR}} object corresponding
                      to classification model for the parent cell type}
                     +\item{path_to_models}{path to databases, or by default}
+                    +
                      \item{zscore}{boolean indicating the transformation of gene expression
                      in object to zscore or not}
                     +\item{target_cell_type}{alternative cell types (in case of testing classifier)}
+                    +
                     +\item{pos_parent}{a vector indicating parent classifier prediction}
+                    +
                     +\item{parent_cell_type}{name of parent cell type}
+                    +
                      \item{pred_cells}{a whole prediction for all cells}
                      \item{ignore_ambiguous_result}{whether ignore ambigouous result}
@@ -174,6 +202,8 @@ in object to zscore or not}
                      \item{clusts}{cluster info}
                      \item{most_probable_cell_type}{predicted cell type}
+                    +
                     +\item{verbose}{logical indicating downloading the file or not}
+                     }
                      \value{
                      TRUE if the classifier is valid or the reason why it is not
@@ -200,6 +230,10 @@ the classifier with the new marker genes
                      scAnnotatR object with the new marker genes.
                     +caret trained model
+                    +
                     +model performance statistics
+                    +
                      a list of balanced count matrix
                      and corresponding tags of balanced count matrix
@@ -207,8 +241,6 @@ the classification model (caret object)
                      row wise center-scaled count matrix
                     -list of classifiers
+                    -
                      filtered matrix
                      list of adjusted tag
@@ -226,7 +258,18 @@ simplified prediction
                      applicable matrix
                      classifier performance
+                    +
                     +BiocFileCache object
+                    +
                     +path to the downloaded file in cache
+                     }
                      \description{
                      Check if a scAnnotatR object is valid
+                    +
                     +Train a classifier for a new cell type from expression matrix
                     +and tag
                     +If cell type has a parent, only available for \code{\link{scAnnotatR}}
                     +object as parent cell classifying model.
+                    +
                     +Testing process from matrix and tag
+                     }

man/load_models.Rd

History View file @ a5d06e3

                     new file mode 100644
@@ -0,0 +1,17 @@
                     +% Generated by roxygen2: do not edit by hand
                     +% Please edit documentation in R/support.R
                     +\name{load_models}
                     +\alias{load_models}
                     +\title{Load classifiers from databases}
                     +\usage{
                     +load_models(path_to_models)
                     +}
                     +\arguments{
                     +\item{path_to_models}{path to databases, or by default}
                     +}
                     +\value{
                     +list of classifiers
                     +}
                     +\description{
                     +Load classifiers from databases
                     +}

man/plant_tree.Rd

History View file @ a5d06e3

@@ -4,10 +4,10 @@
                      \alias{plant_tree}
                      \title{Plant tree from list of models}
                      \usage{
                     -plant_tree(models.file.path = "default")
                     +plant_tree(path_to_models = "default")
+                     }
                      \arguments{
                     -\item{models.file.path}{list of models. If not provided,
                     +\item{path_to_models}{list of models. If not provided,
                      list of default pretrained models in the package will be used.}
+                     }
                      \value{

man/save_new_model.Rd

History View file @ a5d06e3

@@ -4,7 +4,7 @@
                      \alias{save_new_model}
                      \title{Save a model to the package}
                      \usage{
                     -save_new_model(new_model, include.default = TRUE, path.to.models = tempdir())
                     +save_new_model(new_model, include.default = TRUE, path_to_models = tempdir())
+                     }
                      \arguments{
                      \item{new_model}{new model to be added into the classification tree}
@@ -18,7 +18,7 @@ both of them once. In addition, default pretrained models
                      of the package cannot be changed or removed.
                      This can be done with the new trained model list.}
                     -\item{path.to.models}{path to the folder containing the list of new models.}
                     +\item{path_to_models}{path to the folder containing the list of new models.}
+                     }
                      \value{
                      no return value, but the model is now saved to database
@@ -38,7 +38,7 @@ marker_genes = selected_marker_genes_T, cell_type = "t cells")
                      # save the trained classifier to system
                      # test classifier can be used before this step
                     -save_new_model(new_model = classifier_t, path.to.models = tempdir())
                     +save_new_model(new_model = classifier_t, path_to_models = tempdir())
                      # verify if new model has been saved
                      print(names(load(file.path(tempdir(), "new_models.rda"))))

tests/testthat/test_class.R

History View file @ a5d06e3

@@ -2,7 +2,7 @@ context("scAnnotatR class functions")
                      library(scAnnotatR)
                      test_that("Set cell type changes cell type", {
                     -  data("default_models")
                     +  default_models <- load_models('default')
                        classifier_B <- default_models[['B cells']]
                        cell_type(classifier_B) <- "b cells"
@@ -10,7 +10,7 @@ test_that("Set cell type changes cell type", {
                      })
                      test_that("Set probability threshold changes probability threshold", {
                     -  data("default_models")
                     +  default_models <- load_models('default')
                        classifier_B <- default_models[['B cells']]
                        p_thres(classifier_B) <- 0.6
@@ -18,7 +18,7 @@ test_that("Set probability threshold changes probability threshold", {
                      })
                      test_that("Set classifier changes classifier and marker genes", {
                     -  data("default_models")
                     +  default_models <- load_models('default')
                        classifier_B <- default_models[['B cells']]
                        classifier_T <- default_models[['T cells']]

vignettes/classifying-cells.Rmd

History View file @ a5d06e3

@@ -64,7 +64,7 @@ library(scAnnotatR)
                      The models are stored in the `default_models` object:
                      ```{r}
                     -data("default_models")
                     +default_models <- load_models("default")
                      names(default_models)
                      ```

vignettes/training-basic-model.Rmd

History View file @ a5d06e3

@@ -233,7 +233,7 @@ New classification models can be stored using the `save_new_model` function:
                      ```{r}
                      # no copy of pretrained models is performed
                     -save_new_model(new_model = classifier_B, path.to.models = tempdir(),
                     +save_new_model(new_model = classifier_B, path_to_models = tempdir(),
                                     include.default = FALSE)
                      ```
@@ -241,7 +241,7 @@ Parameters:
                        * **new_model**: The new model that should be added to the database in the
                                         specified directory.
                     -  * **path.to.models**: The directory where the new models should be stored.
                     +  * **path_to_models**: The directory where the new models should be stored.
                        * **include.default**: If set, the default models shipped with the package
                                               are added to the database.
@@ -253,7 +253,7 @@ Models can be deleted from the model database using the `delete_model` function:
                      ```{r}
                      # delete the "B cells" model from the new database
                     -delete_model("B cells", path.to.models = tempdir())
                     +delete_model("B cells", path_to_models = tempdir())
                      ```
                      ## Session Info

vignettes/training-child-model.Rmd

History View file @ a5d06e3

@@ -60,7 +60,7 @@ library(scAnnotatR)
                      ```
                      ```{r}
                     -data("default_models")
                     +default_models <- load_models('default')
                      classifier_B <- default_models[['B cells']]
                      classifier_B
                      ```
@@ -171,7 +171,7 @@ parent classifier to the train method:
                      for example: *parent_cell = 'B cells'*
                        * Users can give name of a model among models available in users' database
                     -AND the path to that database, for example: `parent_cell = 'B cells', path.to.models = '.'`
                     +AND the path to that database, for example: `parent_cell = 'B cells', path_to_models = '.'`
                      Train the child classifier:
                      ```{r}
@@ -230,15 +230,15 @@ names(default_models)
                      In our package, the default models already include a model classifying plasma cells.
                      Therefore, we will save this model to a new local database specified by the
                     -*path.to.models* parameter. If you start with a fresh new local database,
                     +*path_to_models* parameter. If you start with a fresh new local database,
                      there is no available parent classifier of plasma cells' classifier. Therefore,
                      we have to save the parent classifier first, e.g. the classifier for B cells.
                      ```{r}
                      # no copy of pretrained models is performed
                     -save_new_model(new_model = classifier_B, path.to.models = tempdir(),
                     +save_new_model(new_model = classifier_B, path_to_models = tempdir(),
                                     include.default = FALSE)
                     -save_new_model(new_model = classifier_plasma, path.to.models = tempdir(),
                     +save_new_model(new_model = classifier_plasma, path_to_models = tempdir(),
                                     include.default = FALSE)
                      ```