... | ... |
@@ -1,40 +1,41 @@ |
1 |
-#' Apply the mutual nearest neighbors (MNN) batch effect correction method to |
|
1 |
+#' Apply the mutual nearest neighbors (MNN) batch effect correction method to |
|
2 | 2 |
#' SingleCellExperiment object |
3 |
-#' |
|
4 |
-#' MNN is designed for batch correction of single-cell RNA-seq data where the |
|
5 |
-#' batches are partially confounded with biological conditions of interest. It |
|
6 |
-#' does so by identifying pairs of MNN in the high-dimensional log-expression |
|
7 |
-#' space. For each MNN pair, a pairwise correction vector is computed by |
|
3 |
+#' |
|
4 |
+#' MNN is designed for batch correction of single-cell RNA-seq data where the |
|
5 |
+#' batches are partially confounded with biological conditions of interest. It |
|
6 |
+#' does so by identifying pairs of MNN in the high-dimensional log-expression |
|
7 |
+#' space. For each MNN pair, a pairwise correction vector is computed by |
|
8 | 8 |
#' applying a Gaussian smoothing kernel with bandwidth `sigma`. |
9 |
-#' @param inSCE SingleCellExperiment object. An object that stores your dataset |
|
10 |
-#' and analysis procedures. |
|
11 |
-#' @param useAssay character, default `"logcounts"`. A string indicating the name |
|
12 |
-#' of the assay requiring batch correction in "inSCE", should exist in |
|
13 |
-#' `assayNames(inSCE)`. |
|
14 |
-#' @param batch character, default `"batch"`. A string indicating the |
|
15 |
-#' field of `colData(inSCE)` that defines different batches. |
|
16 |
-#' @param assayName character, default `"MNN"`. The name for the corrected |
|
17 |
-#' full-sized expression matrix. |
|
18 |
-#' @param k integer, default `20`. Specifies the number of nearest neighbours to |
|
19 |
-#' consider when defining MNN pairs. This should be interpreted as the minimum |
|
20 |
-#' frequency of each cell type or state in each batch. Larger values will |
|
21 |
-#' improve the precision of the correction by increasing the number of MNN |
|
9 |
+#' @param inSCE \linkS4class{SingleCellExperiment} inherited object. Required. |
|
10 |
+#' @param useAssay A single character indicating the name of the assay requiring |
|
11 |
+#' batch correction. Default \code{"logcounts"}. |
|
12 |
+#' @param batch A single character indicating a field in |
|
13 |
+#' \code{\link[SummarizedExperiment]{colData}} that annotates the batches. |
|
14 |
+#' Default \code{"batch"}. |
|
15 |
+#' @param k An integer. Specifies the number of nearest neighbours to |
|
16 |
+#' consider when defining MNN pairs. This should be interpreted as the minimum |
|
17 |
+#' frequency of each cell type or state in each batch. Larger values will |
|
18 |
+#' improve the precision of the correction by increasing the number of MNN |
|
22 | 19 |
#' pairs, at the cost of reducing accuracy by allowing MNN pairs to form between |
23 |
-#' cells of different type. |
|
24 |
-#' @param sigma Numeric, default `0.1`. Specifies how much information is |
|
25 |
-#' shared between MNN pairs when computing the batch effect. Larger values will |
|
26 |
-#' share more information, approaching a global correction for all cells in the |
|
27 |
-#' same batch. Smaller values allow the correction to vary across cell types, |
|
28 |
-#' which may be more accurate but comes at the cost of precision. |
|
29 |
-#' @return SingleCellExperiment object with `reducedDim(inSCE, reducedDimName)` |
|
30 |
-#' updated with corrected low-dimentional representation. |
|
20 |
+#' cells of different type. Default \code{20L}. |
|
21 |
+#' @param sigma A Numeric scalar. Specifies how much information is |
|
22 |
+#' shared between MNN pairs when computing the batch effect. Larger values will |
|
23 |
+#' share more information, approaching a global correction for all cells in the |
|
24 |
+#' same batch. Smaller values allow the correction to vary across cell types, |
|
25 |
+#' which may be more accurate but comes at the cost of precision. Default |
|
26 |
+#' \code{0.1}. |
|
27 |
+#' @param assayName A single characeter. The name for the corrected assay. Will |
|
28 |
+#' be saved to \code{\link[SummarizedExperiment]{assay}}. Default |
|
29 |
+#' \code{"MNN"}. |
|
30 |
+#' @return The input \linkS4class{SingleCellExperiment} object with |
|
31 |
+#' \code{assay(inSCE, assayName)} updated. |
|
31 | 32 |
#' @export |
32 |
-#' @references Lun ATL, et al., 2016 & 2018 |
|
33 |
-#' @examples |
|
33 |
+#' @references Lun ATL, et al., 2016 & 2018 |
|
34 |
+#' @examples |
|
34 | 35 |
#' data('sceBatches', package = 'singleCellTK') |
35 | 36 |
#' sceCorr <- runMNNCorrect(sceBatches) |
36 |
-runMNNCorrect <- function(inSCE, useAssay = 'logcounts', batch = 'batch', |
|
37 |
- assayName = 'MNN', k = 20, sigma = 0.1){ |
|
37 |
+runMNNCorrect <- function(inSCE, useAssay = 'logcounts', batch = 'batch', |
|
38 |
+ assayName = 'MNN', k = 20L, sigma = 0.1){ |
|
38 | 39 |
## Input check |
39 | 40 |
if(!inherits(inSCE, "SingleCellExperiment")){ |
40 | 41 |
stop("\"inSCE\" should be a SingleCellExperiment Object.") |
... | ... |
@@ -46,11 +47,12 @@ runMNNCorrect <- function(inSCE, useAssay = 'logcounts', batch = 'batch', |
46 | 47 |
stop(paste("\"batch name:", batch, "not found.")) |
47 | 48 |
} |
48 | 49 |
assayName <- gsub(' ', '_', assayName) |
49 |
- |
|
50 |
+ k <- as.integer(k) |
|
51 |
+ |
|
50 | 52 |
## Run algorithm |
51 | 53 |
batchCol <- SummarizedExperiment::colData(inSCE)[[batch]] |
52 | 54 |
batchFactor <- as.factor(batchCol) |
53 |
- mnnSCE <- batchelor::mnnCorrect(inSCE, batch = batchFactor, |
|
55 |
+ mnnSCE <- batchelor::mnnCorrect(inSCE, batch = batchFactor, |
|
54 | 56 |
k = k, sigma = sigma) |
55 | 57 |
corrected <- SummarizedExperiment::assay(mnnSCE, 'corrected') |
56 | 58 |
SummarizedExperiment::assay(inSCE, assayName) <- corrected |
... | ... |
@@ -4,23 +4,23 @@ |
4 | 4 |
#' SCANORAMA is analogous to computer vision algorithms for panorama stitching |
5 | 5 |
#' that identify images with overlapping content and merge these into a larger |
6 | 6 |
#' panorama. |
7 |
-#' @param inSCE SingleCellExperiment object. An object that stores your dataset |
|
8 |
-#' and analysis procedures. |
|
9 |
-#' @param useAssay character, default `"logcounts"`. A string indicating the name |
|
10 |
-#' of the assay requiring batch correction in "inSCE", should exist in |
|
11 |
-#' `assayNames(inSCE)`. |
|
12 |
-#' @param batch character, default `"batch"`. A string indicating the |
|
13 |
-#' field of `colData(inSCE)` that defines different batches. |
|
14 |
-#' @param assayName character, default `"SCANORAMA"`. The name for the |
|
15 |
-#' corrected full-sized expression matrix. |
|
16 |
-#' @param SIGMA numeric, default `15`. Algorithmic parameter, correction |
|
17 |
-#' smoothing parameter on Gaussian kernel. |
|
18 |
-#' @param ALPHA numeric, default `0.1`. Algorithmic parameter, alignment score |
|
19 |
-#' minimum cutoff. |
|
20 |
-#' @param KNN integer, default `20L`. Algorithmic parameter, number of nearest |
|
21 |
-#' neighbors to use for matching. |
|
22 |
-#' @return SingleCellExperiment object with `assay(inSCE, assayName)` updated |
|
23 |
-#' with corrected full-sized expression matrix. |
|
7 |
+#' @param inSCE \linkS4class{SingleCellExperiment} inherited object. Required. |
|
8 |
+#' @param useAssay A single character indicating the name of the assay requiring |
|
9 |
+#' batch correction. Default \code{"logcounts"}. |
|
10 |
+#' @param batch A single character indicating a field in |
|
11 |
+#' \code{\link[SummarizedExperiment]{colData}} that annotates the batches. |
|
12 |
+#' Default \code{"batch"}. |
|
13 |
+#' @param SIGMA A numeric scalar. Algorithmic parameter, correction smoothing |
|
14 |
+#' parameter on Gaussian kernel. Default \code{15}. |
|
15 |
+#' @param ALPHA A numeric scalar. Algorithmic parameter, alignment score |
|
16 |
+#' minimum cutoff. Default \code{0.1}. |
|
17 |
+#' @param KNN An integer. Algorithmic parameter, number of nearest neighbors to |
|
18 |
+#' use for matching. Default \code{20L}. |
|
19 |
+#' @param assayName A single characeter. The name for the corrected assay. Will |
|
20 |
+#' be saved to \code{\link[SummarizedExperiment]{assay}}. Default |
|
21 |
+#' \code{"SCANORAMA"}. |
|
22 |
+#' @return The input \linkS4class{SingleCellExperiment} object with |
|
23 |
+#' \code{assay(inSCE, assayName)} updated. |
|
24 | 24 |
#' @export |
25 | 25 |
#' @references Brian Hie et al, 2019 |
26 | 26 |
#' @examples |
... | ... |
@@ -29,8 +29,8 @@ |
29 | 29 |
#' sceCorr <- runSCANORAMA(sceBatches) |
30 | 30 |
#' } |
31 | 31 |
runSCANORAMA <- function(inSCE, useAssay = 'logcounts', batch = 'batch', |
32 |
- assayName = 'SCANORAMA', SIGMA = 15, ALPHA = 0.1, |
|
33 |
- KNN = 20L){ |
|
32 |
+ SIGMA = 15, ALPHA = 0.1, KNN = 20L, |
|
33 |
+ assayName = 'SCANORAMA'){ |
|
34 | 34 |
## Input check |
35 | 35 |
if(!inherits(inSCE, "SingleCellExperiment")){ |
36 | 36 |
stop("\"inSCE\" should be a SingleCellExperiment Object.") |
... | ... |
@@ -1,35 +1,35 @@ |
1 | 1 |
#' Apply scGen batch effect correction method to SingleCellExperiment object |
2 |
-#' |
|
3 |
-#' scGen is a generative model to predict single-cell perturbation response |
|
4 |
-#' across cell types, studies and species. It works by combining variational |
|
2 |
+#' |
|
3 |
+#' scGen is a generative model to predict single-cell perturbation response |
|
4 |
+#' across cell types, studies and species. It works by combining variational |
|
5 | 5 |
#' autoencoders and latent space vector arithmetics for high-dimensional single- |
6 | 6 |
#' cell gene expression data. |
7 |
-#' |
|
8 |
-#' Result does not look fine for now. Time consuming also even it allocates 32 |
|
9 |
-#' cores. |
|
10 |
-#' @param inSCE SingleCellExperiment object. An object that stores your dataset |
|
11 |
-#' and analysis procedures. |
|
12 |
-#' @param useAssay character, default `"logcounts"`. A string indicating the name |
|
13 |
-#' of the assay requiring batch correction in "inSCE", should exist in |
|
14 |
-#' `assayNames(inSCE)`. |
|
15 |
-#' @param batch character, default `"batch"`. A string indicating the field |
|
16 |
-#' of `colData(inSCE)` that defines different batches. |
|
17 |
-#' @param cellType character, default `"cell_type"`. A string indicating the |
|
18 |
-#' field of `colData(inSCE)` that defines different cell types. |
|
19 |
-#' @param assayName character, default `"SCGEN"`. The name for the corrected |
|
20 |
-#' full-sized expression matrix. |
|
21 |
-#' @param nEpochs integer, default `100L`. Algorithmic parameter, number of |
|
22 |
-#' epochs to iterate and optimize network weights. |
|
7 |
+#' @param inSCE \linkS4class{SingleCellExperiment} inherited object. Required. |
|
8 |
+#' @param useAssay A single character indicating the name of the assay requiring |
|
9 |
+#' batch correction. Default \code{"logcounts"}. |
|
10 |
+#' @param batch A single character indicating a field in |
|
11 |
+#' \code{\link[SummarizedExperiment]{colData}} that annotates the batches. |
|
12 |
+#' Default \code{"batch"}. |
|
13 |
+#' @param cellType A single character. A string indicating a field in |
|
14 |
+#' \code{colData(inSCE)} that defines different cell types. Default |
|
15 |
+#' \code{'cell_type'}. |
|
16 |
+#' @param nEpochs An integer. Algorithmic parameter, the number of epochs to |
|
17 |
+#' iterate and optimize network weights. Default \code{50L}. |
|
18 |
+#' @param assayName A single characeter. The name for the corrected assay. Will |
|
19 |
+#' be saved to \code{\link[SummarizedExperiment]{assay}}. Default |
|
20 |
+#' \code{"SCGEN"}. |
|
21 |
+#' @return The input \linkS4class{SingleCellExperiment} object with |
|
22 |
+#' \code{assay(inSCE, assayName)} updated. |
|
23 | 23 |
#' @export |
24 | 24 |
#' @references Lotfollahi, Mohammad et al., 2019 |
25 |
-#' @examples |
|
25 |
+#' @examples |
|
26 | 26 |
#' \dontrun{ |
27 | 27 |
#' data('sceBatches', package = 'singleCellTK') |
28 | 28 |
#' sceCorr <- runSCGEN(sceBatches) |
29 | 29 |
#' } |
30 |
-runSCGEN <- function(inSCE, useAssay = 'logcounts', batch = 'batch', |
|
31 |
- cellType = "cell_type", assayName = 'SCGEN', |
|
32 |
- nEpochs = 50L){ |
|
30 |
+runSCGEN <- function(inSCE, useAssay = 'logcounts', batch = 'batch', |
|
31 |
+ cellType = "cell_type", nEpochs = 50L, |
|
32 |
+ assayName = 'SCGEN'){ |
|
33 | 33 |
## Input check |
34 | 34 |
if(!inherits(inSCE, "SingleCellExperiment")){ |
35 | 35 |
stop("\"inSCE\" should be a SingleCellExperiment Object.") |
... | ... |
@@ -59,7 +59,7 @@ runSCGEN <- function(inSCE, useAssay = 'logcounts', batch = 'batch', |
59 | 59 |
adata <- .sce2adata(inSCE, useAssay = useAssay) |
60 | 60 |
network = scgen$VAEArith(x_dimension = adata$n_vars) |
61 | 61 |
network$train(train_data = adata, n_epochs = nEpochs) |
62 |
- corrAdata <- scgen$batch_removal(network, adata, batch_key = batch, |
|
62 |
+ corrAdata <- scgen$batch_removal(network, adata, batch_key = batch, |
|
63 | 63 |
cell_label_key = cellType) |
64 | 64 |
corrMat <- t(corrAdata$X) |
65 | 65 |
SummarizedExperiment::assay(inSCE, assayName) <- corrMat |
... | ... |
@@ -1,40 +1,43 @@ |
1 | 1 |
#' Apply scMerge batch effect correction method to SingleCellExperiment object |
2 | 2 |
#' |
3 |
-#' The scMerge method leverages factor analysis, stably expressed genes (SEGs) |
|
4 |
-#' and (pseudo-) replicates to remove unwanted variations and merge multiple |
|
5 |
-#' scRNA-Seq data. |
|
6 |
-#' @param inSCE SingleCellExperiment object. An object that stores your dataset |
|
7 |
-#' and analysis procedures. |
|
8 |
-#' @param useAssay character, default `"logcounts"`. A string indicating the name |
|
9 |
-#' of the assay requiring batch correction in "inSCE", should exist in |
|
10 |
-#' `assayNames(inSCE)`. |
|
11 |
-#' @param batch character, default `"batch"`. A string indicating the field |
|
12 |
-#' of `colData(inSCE)` that defines different batches. |
|
13 |
-#' @param assayName character, default `"scMerge"`. The name for the corrected |
|
14 |
-#' full-sized expression matrix. |
|
15 |
-#' @param kmeansK vector of int, default `NULL`. A vector indicating the |
|
16 |
-#' kmeans' K-value for each batch, in order to construct pseudo-replicates. The |
|
17 |
-#' length of `kmeansK` needs to be the same as the number of batches. |
|
18 |
-#' @param cellType character, default `"cell_type"`. A string indicating the |
|
19 |
-#' field of `colData(inSCE)` that defines different cell types. |
|
20 |
-#' @param seg array, default `NULL`. An array of gene names or indices that |
|
21 |
-#' specifies SEG (Stably Expressed Genes) set as negative control. Pre-defined |
|
22 |
-#' dataset with human and mouse SEG lists is available to user by running |
|
23 |
-#' `data('SEG')`. |
|
24 |
-#' @param nCores integer, default `parallel::detectCores()`. The number of |
|
25 |
-#' cores of processors to allocate for the task. By default it takes all the |
|
26 |
-#' cores available to the user. |
|
27 |
-#' @return SingleCellExperiment object with `assay(inSCE, assayName)` updated |
|
28 |
-#' with corrected full-sized expression matrix. |
|
3 |
+#' The scMerge method leverages factor analysis, stably expressed genes (SEGs) |
|
4 |
+#' and (pseudo-) replicates to remove unwanted variations and merge multiple |
|
5 |
+#' scRNA-Seq data. |
|
6 |
+#' @param inSCE \linkS4class{SingleCellExperiment} inherited object. Required. |
|
7 |
+#' @param useAssay A single character indicating the name of the assay requiring |
|
8 |
+#' batch correction. Default \code{"logcounts"}. |
|
9 |
+#' @param batch A single character indicating a field in |
|
10 |
+#' \code{\link[SummarizedExperiment]{colData}} that annotates the batches. |
|
11 |
+#' Default \code{"batch"}. |
|
12 |
+#' @param kmeansK An integer vector. Indicating the kmeans' K-value for each |
|
13 |
+#' batch (i.e. how many subclusters in each batch should exist), in order to |
|
14 |
+#' construct pseudo-replicates. The length of code{kmeansK} needs to be the same |
|
15 |
+#' as the number of batches. Default \code{NULL}, and this value will be |
|
16 |
+#' auto-detected by default, depending on \code{cellType}. |
|
17 |
+#' @param cellType A single character. A string indicating a field in |
|
18 |
+#' \code{colData(inSCE)} that defines different cell types. Default |
|
19 |
+#' \code{'cell_type'}. |
|
20 |
+#' @param seg A vector of gene names or indices that specifies SEG (Stably |
|
21 |
+#' Expressed Genes) set as negative control. Pre-defined dataset with human and |
|
22 |
+#' mouse SEG lists is available to user by running \code{data('SEG')}. Default |
|
23 |
+#' \code{NULL}, and this value will be auto-detected by default with |
|
24 |
+#' \code{\link[scMerge]{scSEGIndex}}. |
|
25 |
+#' @param nCores An integer. The number of cores of processors to allocate for |
|
26 |
+#' the task. Default \code{1L}. |
|
27 |
+#' @param assayName A single characeter. The name for the corrected assay. Will |
|
28 |
+#' be saved to \code{\link[SummarizedExperiment]{assay}}. Default |
|
29 |
+#' \code{"scMerge"}. |
|
30 |
+#' @return The input \linkS4class{SingleCellExperiment} object with |
|
31 |
+#' \code{assay(inSCE, assayName)} updated. |
|
29 | 32 |
#' @export |
30 | 33 |
#' @references Hoa, et al., 2020 |
31 |
-#' @examples |
|
34 |
+#' @examples |
|
32 | 35 |
#' data('sceBatches', package = 'singleCellTK') |
33 | 36 |
#' sceCorr <- runSCMerge(sceBatches) |
34 |
-runSCMerge <- function(inSCE, useAssay = "logcounts", batch = 'batch', |
|
35 |
- assayName = "scMerge", seg = NULL, kmeansK = NULL, |
|
36 |
- cellType = 'cell_type', |
|
37 |
- nCores = 1){ |
|
37 |
+runSCMerge <- function(inSCE, useAssay = "logcounts", batch = 'batch', |
|
38 |
+ assayName = "scMerge", seg = NULL, kmeansK = NULL, |
|
39 |
+ cellType = 'cell_type', |
|
40 |
+ nCores = 1L){ |
|
38 | 41 |
## Input check |
39 | 42 |
if(!inherits(inSCE, "SingleCellExperiment")){ |
40 | 43 |
stop("\"inSCE\" should be a SingleCellExperiment Object.") |
... | ... |
@@ -55,12 +58,12 @@ runSCMerge <- function(inSCE, useAssay = "logcounts", batch = 'batch', |
55 | 58 |
|
56 | 59 |
nCores <- min(as.integer(nCores), parallel::detectCores()) |
57 | 60 |
assayName <- gsub(' ', '_', assayName) |
58 |
- |
|
61 |
+ |
|
59 | 62 |
## Run algorithm |
60 | 63 |
|
61 | 64 |
batchCol <- SummarizedExperiment::colData(inSCE)[[batch]] |
62 | 65 |
uniqBatch <- unique(batchCol) |
63 |
- |
|
66 |
+ |
|
64 | 67 |
# Infer parameters |
65 | 68 |
if(is.null(cellType)){ |
66 | 69 |
cellTypeCol <- NULL |
... | ... |
@@ -89,17 +92,17 @@ runSCMerge <- function(inSCE, useAssay = "logcounts", batch = 'batch', |
89 | 92 |
} else { |
90 | 93 |
ctl <- seg |
91 | 94 |
} |
92 |
- |
|
95 |
+ |
|
93 | 96 |
# scMerge automatically search for the column called "batch"... |
94 | 97 |
colDataNames <- names(SummarizedExperiment::colData(inSCE)) |
95 | 98 |
names(SummarizedExperiment::colData(inSCE))[colDataNames == batch] <- 'batch' |
96 | 99 |
bpParam <- BiocParallel::MulticoreParam(workers = nCores) |
97 | 100 |
inSCE <- scMerge::scMerge(sce_combine = inSCE, exprs = useAssay, |
98 |
- hvg_exprs = useAssay, |
|
99 |
- assay_name = assayName, |
|
100 |
- ctl = ctl, kmeansK = kmeansK, |
|
101 |
+ hvg_exprs = useAssay, |
|
102 |
+ assay_name = assayName, |
|
103 |
+ ctl = ctl, kmeansK = kmeansK, |
|
101 | 104 |
#marker_list = topVarGenesPerBatch, |
102 |
- cell_type = cellTypeCol, |
|
105 |
+ cell_type = cellTypeCol, |
|
103 | 106 |
BPPARAM = bpParam) |
104 | 107 |
colDataNames <- names(SummarizedExperiment::colData(inSCE)) |
105 | 108 |
names(SummarizedExperiment::colData(inSCE))[colDataNames == 'batch'] <- batch |
... | ... |
@@ -9,22 +9,22 @@ |
9 | 9 |
#' dataset, that is hypothesized to originate from the same cell state. These |
10 | 10 |
#' anchors are then used to harmonize the datasets, or transfer information |
11 | 11 |
#' from one dataset to another. |
12 |
-#' @param inSCE SingleCellExperiment object. An object that stores your dataset |
|
13 |
-#' and analysis procedures. |
|
14 |
-#' @param useAssay character, default `"logcounts"`. A string indicating the name |
|
15 |
-#' of the assay requiring batch correction in "inSCE", should exist in |
|
16 |
-#' `assayNames(inSCE)`. |
|
17 |
-#' @param batch character, default `"batch"`. A string indicating the |
|
18 |
-#' field of `colData(inSCE)` that defines different batches. |
|
19 |
-#' @param altExpName character, default `"Seurat3Int"`. The name for the |
|
20 |
-#' corrected full-sized expression matrix. If the number of features returned |
|
21 |
-#' is smaller the number of total feature, the returned matrix will be saved in |
|
22 |
-#' `reducedDim(inSCE, assayName)`; if equal, `assay(inSCE, assayName)`. |
|
23 |
-#' @param nAnchors integer, default `nrow(inSCE)`. The number of features to |
|
24 |
-#' anchor, and also the final dimensionality of the integrated matrix. Thus |
|
25 |
-#' default value turns to produce full-sized assay. |
|
26 |
-#' @param verbose bool, default `TRUE`. Whether to show detail information of |
|
27 |
-#' the process. |
|
12 |
+#' @param inSCE \linkS4class{SingleCellExperiment} inherited object. Required. |
|
13 |
+#' @param useAssay A single character indicating the name of the assay requiring |
|
14 |
+#' batch correction. Default \code{"logcounts"}. |
|
15 |
+#' @param batch A single character indicating a field in |
|
16 |
+#' \code{\link[SummarizedExperiment]{colData}} that annotates the batches. |
|
17 |
+#' Default \code{"batch"}. |
|
18 |
+#' @param nAnchors An integer. The number of features to anchor. The final |
|
19 |
+#' number of the corrected features depends on this value. Default |
|
20 |
+#' \code{nrow(inSCE)}. |
|
21 |
+#' @param verbose A logical scalar. Whether to show detail information of |
|
22 |
+#' the process. Default \code{TRUE}. |
|
23 |
+#' @param altExpName A single character. The name for the |
|
24 |
+#' \code{\link[SingleCellExperiment]{altExp}} that stores the corrected assay. |
|
25 |
+#' The name of this assay has the same name. Default \code{"Seurat3Int"}. |
|
26 |
+#' @return The input \linkS4class{SingleCellExperiment} object with |
|
27 |
+#' \code{altExp(inSCE, altExpName)} updated. |
|
28 | 28 |
#' @export |
29 | 29 |
#' @references Stuart et al. 2019 |
30 | 30 |
#' @examples |
... | ... |
@@ -1,69 +1,72 @@ |
1 |
-#' Apply ZINBWaVE Batch effect correction method to SingleCellExperiment object |
|
2 |
-#' |
|
3 |
-#' A general and flexible zero-inflated negative binomial model that can be |
|
4 |
-#' used to provide a low-dimensional representations of scRNAseq data. The |
|
5 |
-#' model accounts for zero inflation (dropouts), over-dispersion, and the count |
|
6 |
-#' nature of the data. The model also accounts for the difference in library |
|
7 |
-#' sizes and optionally for batch effects and/or other covariates. |
|
8 |
-#' @param inSCE SingleCellExperiment object. An object that stores your dataset |
|
9 |
-#' and analysis procedures. |
|
10 |
-#' @param useAssay character, default `"logcounts"`. A string indicating the name |
|
11 |
-#' of the assay requiring batch correction in "inSCE", should exist in |
|
12 |
-#' `assayNames(inSCE)`. |
|
13 |
-#' @param batch character, default `"batch"`. A string indicating the |
|
14 |
-#' field of `colData(inSCE)` that defines different batches. |
|
15 |
-#' @param reducedDimName character, default `"zinbwave"`. The name for the |
|
16 |
-#' corrected low-dimensional representation. |
|
17 |
-#' @param nHVG integer, default `1000`. Number of highly variable genes to use |
|
18 |
-#' when fitting the model |
|
19 |
-#' @param nComponents integer, default `50L`. Number of principle components or |
|
20 |
-#' dimensionality to generate in the resulting reducedDim. |
|
21 |
-#' @param nIter integer, default `10`. The max number of iterations to perform. |
|
22 |
-#' @param epsilon integer, default `1000`. Algorithmic parameter, by default, the |
|
23 |
-#' epsilon parameter is set to the number of genes. We empirically found that a |
|
24 |
-#' high epsilon is often required to obtained a good low-level representation. |
|
25 |
-#' @export |
|
26 |
-#' @references Pollen, Alex A et al., 2014 |
|
27 |
-#' @examples |
|
28 |
-#' \dontrun{ |
|
29 |
-#' data('sceBatches', package = 'singleCellTK') |
|
30 |
-#' sceCorr <- runZINBWaVE(sceBatches, nIter=5) |
|
31 |
-#' } |
|
32 |
-runZINBWaVE <- function(inSCE, useAssay = 'logcounts', batch = 'batch', |
|
33 |
- reducedDimName = 'zinbwave', nHVG = 1000, |
|
34 |
- nComponents = 50, epsilon = 1000, nIter = 10){ |
|
35 |
- #filterParams = NULL <<< something told in tutorial but might be ignored |
|
36 |
- ## Input check |
|
37 |
- if(!inherits(inSCE, "SingleCellExperiment")){ |
|
38 |
- stop("\"inSCE\" should be a SingleCellExperiment Object.") |
|
39 |
- } |
|
40 |
- if(!useAssay %in% SummarizedExperiment::assayNames(inSCE)) { |
|
41 |
- stop(paste("\"useAssay\" (assay) name: ", useAssay, " not found")) |
|
42 |
- } |
|
43 |
- if(!batch %in% names(SummarizedExperiment::colData(inSCE))){ |
|
44 |
- stop(paste("\"batch name:", batch, "not found.")) |
|
45 |
- } |
|
46 |
- reducedDimName <- gsub(' ', '_', reducedDimName) |
|
47 |
- |
|
48 |
- # Run algorithm |
|
49 |
- tmpMatrix <- round(SummarizedExperiment::assay(inSCE, useAssay)) |
|
50 |
- tmpSCE <- inSCE |
|
51 |
- SummarizedExperiment::assay(tmpSCE, useAssay) <- tmpMatrix |
|
52 |
- |
|
53 |
- ##ZINBWaVE tutorial style of HVG selection |
|
54 |
- if(nHVG < nrow(inSCE)){ |
|
55 |
- logAssay <- log1p(SummarizedExperiment::assay(tmpSCE, useAssay)) |
|
56 |
- vars <- matrixStats::rowVars(logAssay) |
|
57 |
- names(vars) <- rownames(tmpSCE) |
|
58 |
- vars <- sort(vars, decreasing = TRUE) |
|
59 |
- tmpSCE <- tmpSCE[names(vars)[1:nHVG],] |
|
60 |
- } |
|
61 |
- epsilon <- min(nrow(inSCE), epsilon) |
|
62 |
- |
|
63 |
- tmpSCE <- zinbwave::zinbwave(tmpSCE, K = nComponents, epsilon = epsilon, |
|
64 |
- which_assay = useAssay, |
|
65 |
- X = paste('~', batch, sep = ''), |
|
66 |
- maxiter.optimize=nIter, verbose = TRUE) |
|
67 |
- reducedDim(inSCE, reducedDimName) <- reducedDim(tmpSCE, 'zinbwave') |
|
68 |
- return(inSCE) |
|
69 |
-} |
|
1 |
+#' Apply ZINBWaVE Batch effect correction method to SingleCellExperiment object |
|
2 |
+#' |
|
3 |
+#' A general and flexible zero-inflated negative binomial model that can be |
|
4 |
+#' used to provide a low-dimensional representations of scRNAseq data. The |
|
5 |
+#' model accounts for zero inflation (dropouts), over-dispersion, and the count |
|
6 |
+#' nature of the data. The model also accounts for the difference in library |
|
7 |
+#' sizes and optionally for batch effects and/or other covariates. |
|
8 |
+#' @param inSCE \linkS4class{SingleCellExperiment} inherited object. Required. |
|
9 |
+#' @param useAssay A single character indicating the name of the assay requiring |
|
10 |
+#' batch correction. Note that ZINBWaVE works for counts (integer) input rather |
|
11 |
+#' than logcounts that other methods prefer. Default \code{"counts"}. |
|
12 |
+#' @param batch A single character indicating a field in |
|
13 |
+#' \code{\link[SummarizedExperiment]{colData}} that annotates the batches. |
|
14 |
+#' Default \code{"batch"}. |
|
15 |
+#' @param nHVG An integer. Number of highly variable genes to use when fitting |
|
16 |
+#' the model. Default \code{1000L}. |
|
17 |
+#' @param nComponents An integer. The number of principle components or |
|
18 |
+#' dimensionality to generate in the resulting matrix. Default \code{50L}. |
|
19 |
+#' @param nIter An integer, The max number of iterations to perform. Default |
|
20 |
+#' \code{10L}. |
|
21 |
+#' @param epsilon An integer. Algorithmic parameter. Empirically, a high epsilon |
|
22 |
+#' is often required to obtained a good low-level representation. Default |
|
23 |
+#' \code{1000L}. |
|
24 |
+#' @param reducedDimName A single character. The name for the corrected |
|
25 |
+#' low-dimensional representation. Will be saved to \code{reducedDim(inSCE)}. |
|
26 |
+#' Default \code{"zinbwave"}. |
|
27 |
+#' @return The input \linkS4class{SingleCellExperiment} object with |
|
28 |
+#' \code{reducedDim(inSCE, reducedDimName)} updated. |
|
29 |
+#' @export |
|
30 |
+#' @references Pollen, Alex A et al., 2014 |
|
31 |
+#' @examples |
|
32 |
+#' \dontrun{ |
|
33 |
+#' data('sceBatches', package = 'singleCellTK') |
|
34 |
+#' sceCorr <- runZINBWaVE(sceBatches, nIter = 5) |
|
35 |
+#' } |
|
36 |
+runZINBWaVE <- function(inSCE, useAssay = 'counts', batch = 'batch', |
|
37 |
+ nHVG = 1000L, nComponents = 50L, epsilon = 1000, |
|
38 |
+ nIter = 10L, reducedDimName = 'zinbwave'){ |
|
39 |
+ ## Input check |
|
40 |
+ if(!inherits(inSCE, "SingleCellExperiment")){ |
|
41 |
+ stop("\"inSCE\" should be a SingleCellExperiment Object.") |
|
42 |
+ } |
|
43 |
+ if(!useAssay %in% SummarizedExperiment::assayNames(inSCE)) { |
|
44 |
+ stop(paste("\"useAssay\" (assay) name: ", useAssay, " not found")) |
|
45 |
+ } |
|
46 |
+ if(!batch %in% names(SummarizedExperiment::colData(inSCE))){ |
|
47 |
+ stop(paste("\"batch name:", batch, "not found.")) |
|
48 |
+ } |
|
49 |
+ reducedDimName <- gsub(' ', '_', reducedDimName) |
|
50 |
+ nHVG <- as.integer(nHVG) |
|
51 |
+ nComponents <- as.integer(nComponents) |
|
52 |
+ epsilon <- as.integer(epsilon) |
|
53 |
+ nIter <- as.integer(nIter) |
|
54 |
+ # Run algorithm |
|
55 |
+ ##ZINBWaVE tutorial style of HVG selection |
|
56 |
+ if(nHVG < nrow(inSCE)){ |
|
57 |
+ logAssay <- log1p(SummarizedExperiment::assay(inSCE, useAssay)) |
|
58 |
+ vars <- matrixStats::rowVars(logAssay) |
|
59 |
+ names(vars) <- rownames(inSCE) |
|
60 |
+ vars <- sort(vars, decreasing = TRUE) |
|
61 |
+ tmpSCE <- inSCE[names(vars)[1:nHVG],] |
|
62 |
+ } |
|
63 |
+ epsilon <- min(nrow(inSCE), epsilon) |
|
64 |
+ print('start!') |
|
65 |
+ tmpSCE <- zinbwave::zinbwave(tmpSCE, K = nComponents, epsilon = epsilon, |
|
66 |
+ which_assay = useAssay, |
|
67 |
+ X = paste('~', batch, sep = ''), |
|
68 |
+ maxiter.optimize = nIter, verbose = TRUE) |
|
69 |
+ SingleCellExperiment::reducedDim(inSCE, reducedDimName) <- |
|
70 |
+ SingleCellExperiment::reducedDim(tmpSCE, 'zinbwave') |
|
71 |
+ return(inSCE) |
|
72 |
+} |
... | ... |
@@ -15,40 +15,39 @@ runLIGER( |
15 | 15 |
) |
16 | 16 |
} |
17 | 17 |
\arguments{ |
18 |
-\item{inSCE}{SingleCellExperiment object. An object that stores your dataset |
|
19 |
-and analysis procedures.} |
|
18 |
+\item{inSCE}{\linkS4class{SingleCellExperiment} inherited object. Required.} |
|
20 | 19 |
|
21 |
-\item{useAssay}{character, default `"logcounts"`. A string indicating the name |
|
22 |
-of the assay requiring batch correction in "inSCE", should exist in |
|
23 |
-`assayNames(inSCE)`.} |
|
20 |
+\item{useAssay}{A single character indicating the name of the assay requiring |
|
21 |
+batch correction. Default \code{"logcounts"}.} |
|
24 | 22 |
|
25 |
-\item{batch}{character, default `"batch"`. A string indicating the |
|
26 |
-field of `colData(inSCE)` that defines different batches.} |
|
23 |
+\item{batch}{A single character indicating a field in |
|
24 |
+\code{\link[SummarizedExperiment]{colData}} that annotates the batches. |
|
25 |
+Default \code{"batch"}.} |
|
27 | 26 |
|
28 |
-\item{reducedDimName}{character, default `"LIGER"`. The name for the |
|
29 |
-corrected low-dimensional representation.} |
|
27 |
+\item{reducedDimName}{A single character. The name for the corrected |
|
28 |
+low-dimensional representation. Will be saved to \code{reducedDim(inSCE)}. |
|
29 |
+Default \code{"LIGER"}.} |
|
30 | 30 |
|
31 |
-\item{nComponents}{integer, default `20L`. Number of principle components or |
|
32 |
-dimensionality (factors, for this algorithm) to generate in the resulting |
|
33 |
-reducedDim.} |
|
31 |
+\item{nComponents}{An integer. The number of principle components or |
|
32 |
+dimensionality to generate in the resulting matrix. Default \code{20L}.} |
|
34 | 33 |
|
35 |
-\item{lambda}{numeric, default `5.0`. Algorithmic parameter, the penalty |
|
36 |
-parameter which limits the dataset-specific component of the factorization.} |
|
34 |
+\item{lambda}{A numeric scalar. Algorithmic parameter, the penalty |
|
35 |
+parameter which limits the dataset-specific component of the factorization. |
|
36 |
+Default \code{5.0}.} |
|
37 | 37 |
|
38 |
-\item{resolution}{numeric, default `1.0`. Algorithmic paramter, the |
|
39 |
-clustering resolution, increasing this increases the number of communities |
|
40 |
-detected.} |
|
38 |
+\item{resolution}{A numeric scalar. Algorithmic paramter, the clustering |
|
39 |
+resolution, increasing this increases the number of communities detected. |
|
40 |
+Default \code{1.0}} |
|
41 | 41 |
} |
42 | 42 |
\value{ |
43 |
-SingleCellExperiment object with `reducedDim(inSCE, reducedDimName)` |
|
44 |
-updated with corrected low-dimentional representation. |
|
43 |
+The input \linkS4class{SingleCellExperiment} object with |
|
44 |
+\code{reducedDim(inSCE, reducedDimName)} updated. |
|
45 | 45 |
} |
46 | 46 |
\description{ |
47 |
-LIGER relies on integrative non-negative matrix factorization to identify |
|
47 |
+LIGER relies on integrative non-negative matrix factorization to identify |
|
48 | 48 |
shared and dataset-specific factors. |
49 | 49 |
} |
50 | 50 |
\examples{ |
51 |
- |
|
52 | 51 |
\dontrun{ |
53 | 52 |
data('sceBatches', package = 'singleCellTK') |
54 | 53 |
sceCorr <- runLIGER(sceBatches) |
... | ... |
@@ -7,29 +7,28 @@ |
7 | 7 |
runLimmaBC(inSCE, useAssay = "logcounts", assayName = "LIMMA", batch = "batch") |
8 | 8 |
} |
9 | 9 |
\arguments{ |
10 |
-\item{inSCE}{SingleCellExperiment object. An object that stores your dataset |
|
11 |
-and analysis procedures.} |
|
10 |
+\item{inSCE}{\linkS4class{SingleCellExperiment} inherited object. Required.} |
|
12 | 11 |
|
13 |
-\item{useAssay}{character, default `"logcounts"`. A string indicating the name |
|
14 |
-of the assay requiring batch correction in "inSCE", should exist in |
|
15 |
-`assayNames(inSCE)`.} |
|
12 |
+\item{useAssay}{A single character indicating the name of the assay requiring |
|
13 |
+batch correction. Default \code{"logcounts"}.} |
|
16 | 14 |
|
17 |
-\item{assayName}{character, default `"LIMMA"`. The name for the corrected |
|
18 |
-full-sized expression matrix.} |
|
15 |
+\item{assayName}{A single characeter. The name for the corrected assay. Will |
|
16 |
+be saved to \code{\link[SummarizedExperiment]{assay}}. Default |
|
17 |
+\code{"LIMMA"}.} |
|
19 | 18 |
|
20 |
-\item{batch}{character, default `"batch"`. A string indicating the |
|
21 |
-field of `colData(inSCE)` that defines different batches.} |
|
19 |
+\item{batch}{A single character indicating a field in |
|
20 |
+\code{\link[SummarizedExperiment]{colData}} that annotates the batches. |
|
21 |
+Default \code{"batch"}.} |
|
22 | 22 |
} |
23 | 23 |
\value{ |
24 |
-SingleCellExperiment object with `assay(inSCE, assayName)` updated |
|
25 |
-with corrected full-sized expression matrix. |
|
24 |
+The input \linkS4class{SingleCellExperiment} object with |
|
25 |
+\code{assay(inSCE, assayName)} updated. |
|
26 | 26 |
} |
27 | 27 |
\description{ |
28 |
-Limma's batch effect removal function fits a linear model to the data, then |
|
28 |
+Limma's batch effect removal function fits a linear model to the data, then |
|
29 | 29 |
removes the component due to the batch effects. |
30 | 30 |
} |
31 | 31 |
\examples{ |
32 |
- |
|
33 | 32 |
data('sceBatches', package = 'singleCellTK') |
34 | 33 |
sceCorr <- runLimmaBC(sceBatches) |
35 | 34 |
} |
... | ... |
@@ -2,7 +2,7 @@ |
2 | 2 |
% Please edit documentation in R/runMNNCorrect.R |
3 | 3 |
\name{runMNNCorrect} |
4 | 4 |
\alias{runMNNCorrect} |
5 |
-\title{Apply the mutual nearest neighbors (MNN) batch effect correction method to |
|
5 |
+\title{Apply the mutual nearest neighbors (MNN) batch effect correction method to |
|
6 | 6 |
SingleCellExperiment object} |
7 | 7 |
\usage{ |
8 | 8 |
runMNNCorrect( |
... | ... |
@@ -10,50 +10,50 @@ runMNNCorrect( |
10 | 10 |
useAssay = "logcounts", |
11 | 11 |
batch = "batch", |
12 | 12 |
assayName = "MNN", |
13 |
- k = 20, |
|
13 |
+ k = 20L, |
|
14 | 14 |
sigma = 0.1 |
15 | 15 |
) |
16 | 16 |
} |
17 | 17 |
\arguments{ |
18 |
-\item{inSCE}{SingleCellExperiment object. An object that stores your dataset |
|
19 |
-and analysis procedures.} |
|
18 |
+\item{inSCE}{\linkS4class{SingleCellExperiment} inherited object. Required.} |
|
20 | 19 |
|
21 |
-\item{useAssay}{character, default `"logcounts"`. A string indicating the name |
|
22 |
-of the assay requiring batch correction in "inSCE", should exist in |
|
23 |
-`assayNames(inSCE)`.} |
|
20 |
+\item{useAssay}{A single character indicating the name of the assay requiring |
|
21 |
+batch correction. Default \code{"logcounts"}.} |
|
24 | 22 |
|
25 |
-\item{batch}{character, default `"batch"`. A string indicating the |
|
26 |
-field of `colData(inSCE)` that defines different batches.} |
|
23 |
+\item{batch}{A single character indicating a field in |
|
24 |
+\code{\link[SummarizedExperiment]{colData}} that annotates the batches. |
|
25 |
+Default \code{"batch"}.} |
|
27 | 26 |
|
28 |
-\item{assayName}{character, default `"MNN"`. The name for the corrected |
|
29 |
-full-sized expression matrix.} |
|
27 |
+\item{assayName}{A single characeter. The name for the corrected assay. Will |
|
28 |
+be saved to \code{\link[SummarizedExperiment]{assay}}. Default |
|
29 |
+\code{"MNN"}.} |
|
30 | 30 |
|
31 |
-\item{k}{integer, default `20`. Specifies the number of nearest neighbours to |
|
32 |
-consider when defining MNN pairs. This should be interpreted as the minimum |
|
33 |
-frequency of each cell type or state in each batch. Larger values will |
|
34 |
-improve the precision of the correction by increasing the number of MNN |
|
31 |
+\item{k}{An integer. Specifies the number of nearest neighbours to |
|
32 |
+consider when defining MNN pairs. This should be interpreted as the minimum |
|
33 |
+frequency of each cell type or state in each batch. Larger values will |
|
34 |
+improve the precision of the correction by increasing the number of MNN |
|
35 | 35 |
pairs, at the cost of reducing accuracy by allowing MNN pairs to form between |
36 |
-cells of different type.} |
|
36 |
+cells of different type. Default \code{20L}.} |
|
37 | 37 |
|
38 |
-\item{sigma}{Numeric, default `0.1`. Specifies how much information is |
|
39 |
-shared between MNN pairs when computing the batch effect. Larger values will |
|
40 |
-share more information, approaching a global correction for all cells in the |
|
41 |
-same batch. Smaller values allow the correction to vary across cell types, |
|
42 |
-which may be more accurate but comes at the cost of precision.} |
|
38 |
+\item{sigma}{A Numeric scalar. Specifies how much information is |
|
39 |
+shared between MNN pairs when computing the batch effect. Larger values will |
|
40 |
+share more information, approaching a global correction for all cells in the |
|
41 |
+same batch. Smaller values allow the correction to vary across cell types, |
|
42 |
+which may be more accurate but comes at the cost of precision. Default |
|
43 |
+\code{0.1}.} |
|
43 | 44 |
} |
44 | 45 |
\value{ |
45 |
-SingleCellExperiment object with `reducedDim(inSCE, reducedDimName)` |
|
46 |
-updated with corrected low-dimentional representation. |
|
46 |
+The input \linkS4class{SingleCellExperiment} object with |
|
47 |
+\code{assay(inSCE, assayName)} updated. |
|
47 | 48 |
} |
48 | 49 |
\description{ |
49 |
-MNN is designed for batch correction of single-cell RNA-seq data where the |
|
50 |
-batches are partially confounded with biological conditions of interest. It |
|
51 |
-does so by identifying pairs of MNN in the high-dimensional log-expression |
|
52 |
-space. For each MNN pair, a pairwise correction vector is computed by |
|
50 |
+MNN is designed for batch correction of single-cell RNA-seq data where the |
|
51 |
+batches are partially confounded with biological conditions of interest. It |
|
52 |
+does so by identifying pairs of MNN in the high-dimensional log-expression |
|
53 |
+space. For each MNN pair, a pairwise correction vector is computed by |
|
53 | 54 |
applying a Gaussian smoothing kernel with bandwidth `sigma`. |
54 | 55 |
} |
55 | 56 |
\examples{ |
56 |
- |
|
57 | 57 |
data('sceBatches', package = 'singleCellTK') |
58 | 58 |
sceCorr <- runMNNCorrect(sceBatches) |
59 | 59 |
} |
... | ... |
@@ -9,38 +9,38 @@ runSCANORAMA( |
9 | 9 |
inSCE, |
10 | 10 |
useAssay = "logcounts", |
11 | 11 |
batch = "batch", |
12 |
- assayName = "SCANORAMA", |
|
13 | 12 |
SIGMA = 15, |
14 | 13 |
ALPHA = 0.1, |
15 |
- KNN = 20L |
|
14 |
+ KNN = 20L, |
|
15 |
+ assayName = "SCANORAMA" |
|
16 | 16 |
) |
17 | 17 |
} |
18 | 18 |
\arguments{ |
19 |
-\item{inSCE}{SingleCellExperiment object. An object that stores your dataset |
|
20 |
-and analysis procedures.} |
|
19 |
+\item{inSCE}{\linkS4class{SingleCellExperiment} inherited object. Required.} |
|
21 | 20 |
|
22 |
-\item{useAssay}{character, default `"logcounts"`. A string indicating the name |
|
23 |
-of the assay requiring batch correction in "inSCE", should exist in |
|
24 |
-`assayNames(inSCE)`.} |
|
21 |
+\item{useAssay}{A single character indicating the name of the assay requiring |
|
22 |
+batch correction. Default \code{"logcounts"}.} |
|
25 | 23 |
|
26 |
-\item{batch}{character, default `"batch"`. A string indicating the |
|
27 |
-field of `colData(inSCE)` that defines different batches.} |
|
24 |
+\item{batch}{A single character indicating a field in |
|
25 |
+\code{\link[SummarizedExperiment]{colData}} that annotates the batches. |
|
26 |
+Default \code{"batch"}.} |
|
28 | 27 |
|
29 |
-\item{assayName}{character, default `"SCANORAMA"`. The name for the |
|
30 |
-corrected full-sized expression matrix.} |
|
28 |
+\item{SIGMA}{A numeric scalar. Algorithmic parameter, correction smoothing |
|
29 |
+parameter on Gaussian kernel. Default \code{15}.} |
|
31 | 30 |
|
32 |
-\item{SIGMA}{numeric, default `15`. Algorithmic parameter, correction |
|
33 |
-smoothing parameter on Gaussian kernel.} |
|
31 |
+\item{ALPHA}{A numeric scalar. Algorithmic parameter, alignment score |
|
32 |
+minimum cutoff. Default \code{0.1}.} |
|
34 | 33 |
|
35 |
-\item{ALPHA}{numeric, default `0.1`. Algorithmic parameter, alignment score |
|
36 |
-minimum cutoff.} |
|
34 |
+\item{KNN}{An integer. Algorithmic parameter, number of nearest neighbors to |
|
35 |
+use for matching. Default \code{20L}.} |
|
37 | 36 |
|
38 |
-\item{KNN}{integer, default `20L`. Algorithmic parameter, number of nearest |
|
39 |
-neighbors to use for matching.} |
|
37 |
+\item{assayName}{A single characeter. The name for the corrected assay. Will |
|
38 |
+be saved to \code{\link[SummarizedExperiment]{assay}}. Default |
|
39 |
+\code{"SCANORAMA"}.} |
|
40 | 40 |
} |
41 | 41 |
\value{ |
42 |
-SingleCellExperiment object with `assay(inSCE, assayName)` updated |
|
43 |
-with corrected full-sized expression matrix. |
|
42 |
+The input \linkS4class{SingleCellExperiment} object with |
|
43 |
+\code{assay(inSCE, assayName)} updated. |
|
44 | 44 |
} |
45 | 45 |
\description{ |
46 | 46 |
SCANORAMA is analogous to computer vision algorithms for panorama stitching |
... | ... |
@@ -9,40 +9,41 @@ runSCGEN( |
9 | 9 |
useAssay = "logcounts", |
10 | 10 |
batch = "batch", |
11 | 11 |
cellType = "cell_type", |
12 |
- assayName = "SCGEN", |
|
13 |
- nEpochs = 50L |
|
12 |
+ nEpochs = 50L, |
|
13 |
+ assayName = "SCGEN" |
|
14 | 14 |
) |
15 | 15 |
} |
16 | 16 |
\arguments{ |
17 |
-\item{inSCE}{SingleCellExperiment object. An object that stores your dataset |
|
18 |
-and analysis procedures.} |
|
17 |
+\item{inSCE}{\linkS4class{SingleCellExperiment} inherited object. Required.} |
|
19 | 18 |
|
20 |
-\item{useAssay}{character, default `"logcounts"`. A string indicating the name |
|
21 |
-of the assay requiring batch correction in "inSCE", should exist in |
|
22 |
-`assayNames(inSCE)`.} |
|
19 |
+\item{useAssay}{A single character indicating the name of the assay requiring |
|
20 |
+batch correction. Default \code{"logcounts"}.} |
|
23 | 21 |
|
24 |
-\item{batch}{character, default `"batch"`. A string indicating the field |
|
25 |
-of `colData(inSCE)` that defines different batches.} |
|
22 |
+\item{batch}{A single character indicating a field in |
|
23 |
+\code{\link[SummarizedExperiment]{colData}} that annotates the batches. |
|
24 |
+Default \code{"batch"}.} |
|
26 | 25 |
|
27 |
-\item{cellType}{character, default `"cell_type"`. A string indicating the |
|
28 |
-field of `colData(inSCE)` that defines different cell types.} |
|
26 |
+\item{cellType}{A single character. A string indicating a field in |
|
27 |
+\code{colData(inSCE)} that defines different cell types. Default |
|
28 |
+\code{'cell_type'}.} |
|
29 | 29 |
|
30 |
-\item{assayName}{character, default `"SCGEN"`. The name for the corrected |
|
31 |
-full-sized expression matrix.} |
|
30 |
+\item{nEpochs}{An integer. Algorithmic parameter, the number of epochs to |
|
31 |
+iterate and optimize network weights. Default \code{50L}.} |
|
32 | 32 |
|
33 |
-\item{nEpochs}{integer, default `100L`. Algorithmic parameter, number of |
|
34 |
-epochs to iterate and optimize network weights.} |
|
33 |
+\item{assayName}{A single characeter. The name for the corrected assay. Will |
|
34 |
+be saved to \code{\link[SummarizedExperiment]{assay}}. Default |
|
35 |
+\code{"SCGEN"}.} |
|
36 |
+} |
|
37 |
+\value{ |
|
38 |
+The input \linkS4class{SingleCellExperiment} object with |
|
39 |
+\code{assay(inSCE, assayName)} updated. |
|
35 | 40 |
} |
36 | 41 |
\description{ |
37 |
-scGen is a generative model to predict single-cell perturbation response |
|
38 |
-across cell types, studies and species. It works by combining variational |
|
42 |
+scGen is a generative model to predict single-cell perturbation response |
|
43 |
+across cell types, studies and species. It works by combining variational |
|
39 | 44 |
autoencoders and latent space vector arithmetics for high-dimensional single- |
40 | 45 |
cell gene expression data. |
41 | 46 |
} |
42 |
-\details{ |
|
43 |
-Result does not look fine for now. Time consuming also even it allocates 32 |
|
44 |
-cores. |
|
45 |
-} |
|
46 | 47 |
\examples{ |
47 | 48 |
\dontrun{ |
48 | 49 |
data('sceBatches', package = 'singleCellTK') |
... | ... |
@@ -12,46 +12,49 @@ runSCMerge( |
12 | 12 |
seg = NULL, |
13 | 13 |
kmeansK = NULL, |
14 | 14 |
cellType = "cell_type", |
15 |
- nCores = 1 |
|
15 |
+ nCores = 1L |
|
16 | 16 |
) |
17 | 17 |
} |
18 | 18 |
\arguments{ |
19 |
-\item{inSCE}{SingleCellExperiment object. An object that stores your dataset |
|
20 |
-and analysis procedures.} |
|
19 |
+\item{inSCE}{\linkS4class{SingleCellExperiment} inherited object. Required.} |
|
21 | 20 |
|
22 |
-\item{useAssay}{character, default `"logcounts"`. A string indicating the name |
|
23 |
-of the assay requiring batch correction in "inSCE", should exist in |
|
24 |
-`assayNames(inSCE)`.} |
|
21 |
+\item{useAssay}{A single character indicating the name of the assay requiring |
|
22 |
+batch correction. Default \code{"logcounts"}.} |
|
25 | 23 |
|
26 |
-\item{batch}{character, default `"batch"`. A string indicating the field |
|
27 |
-of `colData(inSCE)` that defines different batches.} |
|
24 |
+\item{batch}{A single character indicating a field in |
|
25 |
+\code{\link[SummarizedExperiment]{colData}} that annotates the batches. |
|
26 |
+Default \code{"batch"}.} |
|
28 | 27 |
|
29 |
-\item{assayName}{character, default `"scMerge"`. The name for the corrected |
|
30 |
-full-sized expression matrix.} |
|
28 |
+\item{assayName}{A single characeter. The name for the corrected assay. Will |
|
29 |
+be saved to \code{\link[SummarizedExperiment]{assay}}. Default |
|
30 |
+\code{"scMerge"}.} |
|
31 | 31 |
|
32 |
-\item{seg}{array, default `NULL`. An array of gene names or indices that |
|
33 |
-specifies SEG (Stably Expressed Genes) set as negative control. Pre-defined |
|
34 |
-dataset with human and mouse SEG lists is available to user by running |
|
35 |
-`data('SEG')`.} |
|
32 |
+\item{seg}{A vector of gene names or indices that specifies SEG (Stably |
|
33 |
+Expressed Genes) set as negative control. Pre-defined dataset with human and |
|
34 |
+mouse SEG lists is available to user by running \code{data('SEG')}. Default |
|
35 |
+\code{NULL}, and this value will be auto-detected by default with |
|
36 |
+\code{\link[scMerge]{scSEGIndex}}.} |
|
36 | 37 |
|
37 |
-\item{kmeansK}{vector of int, default `NULL`. A vector indicating the |
|
38 |
-kmeans' K-value for each batch, in order to construct pseudo-replicates. The |
|
39 |
-length of `kmeansK` needs to be the same as the number of batches.} |
|
38 |
+\item{kmeansK}{An integer vector. Indicating the kmeans' K-value for each |
|
39 |
+batch (i.e. how many subclusters in each batch should exist), in order to |
|
40 |
+construct pseudo-replicates. The length of code{kmeansK} needs to be the same |
|
41 |
+as the number of batches. Default \code{NULL}, and this value will be |
|
42 |
+auto-detected by default, depending on \code{cellType}.} |
|
40 | 43 |
|
41 |
-\item{cellType}{character, default `"cell_type"`. A string indicating the |
|
42 |
-field of `colData(inSCE)` that defines different cell types.} |
|
44 |
+\item{cellType}{A single character. A string indicating a field in |
|
45 |
+\code{colData(inSCE)} that defines different cell types. Default |
|
46 |
+\code{'cell_type'}.} |
|
43 | 47 |
|
44 |
-\item{nCores}{integer, default `parallel::detectCores()`. The number of |
|
45 |
-cores of processors to allocate for the task. By default it takes all the |
|
46 |
-cores available to the user.} |
|
48 |
+\item{nCores}{An integer. The number of cores of processors to allocate for |
|
49 |
+the task. Default \code{1L}.} |
|
47 | 50 |
} |
48 | 51 |
\value{ |
49 |
-SingleCellExperiment object with `assay(inSCE, assayName)` updated |
|
50 |
-with corrected full-sized expression matrix. |
|
52 |
+The input \linkS4class{SingleCellExperiment} object with |
|
53 |
+\code{assay(inSCE, assayName)} updated. |
|
51 | 54 |
} |
52 | 55 |
\description{ |
53 |
-The scMerge method leverages factor analysis, stably expressed genes (SEGs) |
|
54 |
-and (pseudo-) replicates to remove unwanted variations and merge multiple |
|
56 |
+The scMerge method leverages factor analysis, stably expressed genes (SEGs) |
|
57 |
+and (pseudo-) replicates to remove unwanted variations and merge multiple |
|
55 | 58 |
scRNA-Seq data. |
56 | 59 |
} |
57 | 60 |
\examples{ |
... | ... |
@@ -15,27 +15,29 @@ runSeurat3Integration( |
15 | 15 |
) |
16 | 16 |
} |
17 | 17 |
\arguments{ |
18 |
-\item{inSCE}{SingleCellExperiment object. An object that stores your dataset |
|
19 |
-and analysis procedures.} |
|
18 |
+\item{inSCE}{\linkS4class{SingleCellExperiment} inherited object. Required.} |
|
20 | 19 |
|
21 |
-\item{useAssay}{character, default `"logcounts"`. A string indicating the name |
|
22 |
-of the assay requiring batch correction in "inSCE", should exist in |
|
23 |
-`assayNames(inSCE)`.} |
|
20 |
+\item{useAssay}{A single character indicating the name of the assay requiring |
|
21 |
+batch correction. Default \code{"logcounts"}.} |
|
24 | 22 |
|
25 |
-\item{batch}{character, default `"batch"`. A string indicating the |
|
26 |
-field of `colData(inSCE)` that defines different batches.} |
|
23 |
+\item{batch}{A single character indicating a field in |
|
24 |
+\code{\link[SummarizedExperiment]{colData}} that annotates the batches. |
|
25 |
+Default \code{"batch"}.} |
|
27 | 26 |
|
28 |
-\item{altExpName}{character, default `"Seurat3Int"`. The name for the |
|
29 |
-corrected full-sized expression matrix. If the number of features returned |
|
30 |
-is smaller the number of total feature, the returned matrix will be saved in |
|
31 |
-`reducedDim(inSCE, assayName)`; if equal, `assay(inSCE, assayName)`.} |
|
27 |
+\item{altExpName}{A single character. The name for the |
|
28 |
+\code{\link[SingleCellExperiment]{altExp}} that stores the corrected assay. |
|
29 |
+The name of this assay has the same name. Default \code{"Seurat3Int"}.} |
|
32 | 30 |
|
33 |
-\item{nAnchors}{integer, default `nrow(inSCE)`. The number of features to |
|
34 |
-anchor, and also the final dimensionality of the integrated matrix. Thus |
|
35 |
-default value turns to produce full-sized assay.} |
|
31 |
+\item{nAnchors}{An integer. The number of features to anchor. The final |
|
32 |
+number of the corrected features depends on this value. Default |
|
33 |
+\code{nrow(inSCE)}.} |
|
36 | 34 |
|
37 |
-\item{verbose}{bool, default `TRUE`. Whether to show detail information of |
|
38 |
-the process.} |
|
35 |
+\item{verbose}{A logical scalar. Whether to show detail information of |
|
36 |
+the process. Default \code{TRUE}.} |
|
37 |
+} |
|
38 |
+\value{ |
|
39 |
+The input \linkS4class{SingleCellExperiment} object with |
|
40 |
+\code{altExp(inSCE, altExpName)} updated. |
|
39 | 41 |
} |
40 | 42 |
\description{ |
41 | 43 |
Can get either a full-sized corrected assay or a dimension reduced corrected |
... | ... |
@@ -6,40 +6,46 @@ |
6 | 6 |
\usage{ |
7 | 7 |
runZINBWaVE( |
8 | 8 |
inSCE, |
9 |
- useAssay = "logcounts", |
|
9 |
+ useAssay = "counts", |
|
10 | 10 |
batch = "batch", |
11 |
- reducedDimName = "zinbwave", |
|
12 |
- nHVG = 1000, |
|
13 |
- nComponents = 50, |
|
11 |
+ nHVG = 1000L, |
|
12 |
+ nComponents = 50L, |
|
14 | 13 |
epsilon = 1000, |
15 |
- nIter = 10 |
|
14 |
+ nIter = 10L, |
|
15 |
+ reducedDimName = "zinbwave" |
|
16 | 16 |
) |
17 | 17 |
} |
18 | 18 |
\arguments{ |
19 |
-\item{inSCE}{SingleCellExperiment object. An object that stores your dataset |
|
20 |
-and analysis procedures.} |
|
19 |
+\item{inSCE}{\linkS4class{SingleCellExperiment} inherited object. Required.} |
|
21 | 20 |
|
22 |
-\item{useAssay}{character, default `"logcounts"`. A string indicating the name |
|
23 |
-of the assay requiring batch correction in "inSCE", should exist in |
|
24 |
-`assayNames(inSCE)`.} |
|
21 |
+\item{useAssay}{A single character indicating the name of the assay requiring |
|
22 |
+batch correction. Note that ZINBWaVE works for counts (integer) input rather |
|
23 |
+than logcounts that other methods prefer. Default \code{"counts"}.} |
|
25 | 24 |
|
26 |
-\item{batch}{character, default `"batch"`. A string indicating the |
|
27 |
-field of `colData(inSCE)` that defines different batches.} |
|
25 |
+\item{batch}{A single character indicating a field in |
|
26 |
+\code{\link[SummarizedExperiment]{colData}} that annotates the batches. |
|
27 |
+Default \code{"batch"}.} |
|
28 | 28 |
|
29 |
-\item{reducedDimName}{character, default `"zinbwave"`. The name for the |
|
30 |
-corrected low-dimensional representation.} |
|
29 |
+\item{nHVG}{An integer. Number of highly variable genes to use when fitting |
|
30 |
+the model. Default \code{1000L}.} |
|
31 | 31 |
|
32 |
-\item{nHVG}{integer, default `1000`. Number of highly variable genes to use |
|
33 |
-when fitting the model} |
|
32 |
+\item{nComponents}{An integer. The number of principle components or |
|
33 |
+dimensionality to generate in the resulting matrix. Default \code{50L}.} |
|
34 | 34 |
|
35 |
-\item{nComponents}{integer, default `50L`. Number of principle components or |
|
36 |
-dimensionality to generate in the resulting reducedDim.} |
|
35 |
+\item{epsilon}{An integer. Algorithmic parameter. Empirically, a high epsilon |
|
36 |
+is often required to obtained a good low-level representation. Default |
|
37 |
+\code{1000L}.} |
|
37 | 38 |
|
38 |
-\item{epsilon}{integer, default `1000`. Algorithmic parameter, by default, the |
|
39 |
-epsilon parameter is set to the number of genes. We empirically found that a |
|
40 |
-high epsilon is often required to obtained a good low-level representation.} |
|
39 |
+\item{nIter}{An integer, The max number of iterations to perform. Default |
|
40 |
+\code{10L}.} |
|
41 | 41 |
|
42 |
-\item{nIter}{integer, default `10`. The max number of iterations to perform.} |
|
42 |
+\item{reducedDimName}{A single character. The name for the corrected |
|
43 |
+low-dimensional representation. Will be saved to \code{reducedDim(inSCE)}. |
|
44 |
+Default \code{"zinbwave"}.} |
|
45 |
+} |
|
46 |
+\value{ |
|
47 |
+The input \linkS4class{SingleCellExperiment} object with |
|
48 |
+\code{reducedDim(inSCE, reducedDimName)} updated. |
|
43 | 49 |
} |
44 | 50 |
\description{ |
45 | 51 |
A general and flexible zero-inflated negative binomial model that can be |
... | ... |
@@ -50,8 +56,8 @@ sizes and optionally for batch effects and/or other covariates. |
50 | 56 |
} |
51 | 57 |
\examples{ |
52 | 58 |
\dontrun{ |
53 |
-data('sceBatches', package = 'singleCellTK') |
|
54 |
-sceCorr <- runZINBWaVE(sceBatches, nIter=5) |
|
59 |
+ data('sceBatches', package = 'singleCellTK') |
|
60 |
+ sceCorr <- runZINBWaVE(sceBatches, nIter = 5) |
|
55 | 61 |
} |
56 | 62 |
} |
57 | 63 |
\references{ |