... | ... |
@@ -49,14 +49,14 @@ |
49 | 49 |
#' @param logfile Character. Messages will be redirected to a file named |
50 | 50 |
#' `logfile`. If NULL, messages will be printed to stdout. Default NULL. |
51 | 51 |
#' @param verbose Logical. Whether to print log messages. Default TRUE. |
52 |
-#' |
|
52 |
+#' |
|
53 | 53 |
#' @return A \link[SingleCellExperiment]{SingleCellExperiment} object with |
54 | 54 |
#' 'decontX_Contamination' and 'decontX_Clusters' added to the |
55 | 55 |
#' \link[SummarizedExperiment]{colData} slot. Additionally, the |
56 | 56 |
#' decontaminated counts will be added as an assay called 'decontXCounts'. |
57 | 57 |
#' @examples |
58 | 58 |
#' data(scExample, package = "singleCellTK") |
59 |
-#' sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
59 |
+#' sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
60 | 60 |
#' sce <- runDecontX(sce) |
61 | 61 |
#' @export |
62 | 62 |
runDecontX <- function(inSCE, |
... | ... |
@@ -84,8 +84,8 @@ runDecontX <- function(inSCE, |
84 | 84 |
|
85 | 85 |
message(paste0(date(), " ... Running 'DecontX'")) |
86 | 86 |
|
87 |
- inSCE <- celda::decontX(x = inSCE, |
|
88 |
- batch = sample, |
|
87 |
+ inSCE <- celda::decontX(x = inSCE, |
|
88 |
+ batch = sample, |
|
89 | 89 |
assayName = useAssay, |
90 | 90 |
z = z, |
91 | 91 |
maxIter = maxIter, |
... | ... |
@@ -100,7 +100,7 @@ runDecontX <- function(inSCE, |
100 | 100 |
verbose = verbose) |
101 | 101 |
|
102 | 102 |
#argsList <- argsList[!names(argsList) %in% ("...")] |
103 |
- |
|
103 |
+ |
|
104 | 104 |
inSCE@metadata$runDecontX <- argsList[-1] |
105 | 105 |
inSCE@metadata$runDecontX$packageVersion <- utils::packageDescription("celda")$Version |
106 | 106 |
|
... | ... |
@@ -330,7 +330,7 @@ |
330 | 330 |
#' 'doublet_finder_doublet_score'. |
331 | 331 |
#' @examples |
332 | 332 |
#' data(scExample, package = "singleCellTK") |
333 |
-#' sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
333 |
+#' sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
334 | 334 |
#' sce <- runDoubletFinder(sce) |
335 | 335 |
#' @export |
336 | 336 |
#' @importFrom SummarizedExperiment colData colData<- |
... | ... |
@@ -1,115 +1,115 @@ |
1 |
-#' Uniform Manifold Approximation and Projection(UMAP) algorithm for |
|
2 |
-#' dimension reduction. |
|
3 |
-#' |
|
4 |
-#' @param inSCE Input \linkS4class{SingleCellExperiment} object. |
|
5 |
-#' @param useAssay Indicate which assay to use. The default is "counts". |
|
6 |
-#' @param sample Character vector. Indicates which sample each cell belongs to. |
|
7 |
-#' @param reducedDimName a name to store the results of the dimension reduction |
|
8 |
-#' coordinates obtained from this method. This is stored in the SingleCellExperiment |
|
9 |
-#' object in the reducedDims slot. Default "UMAP". |
|
10 |
-#' @param logNorm Whether the counts will need to be log-normalized prior to |
|
11 |
-#' generating the UMAP via scater::logNormCounts. Default TRUE. |
|
12 |
-#' @param nNeighbors The size of local neighborhood used for |
|
13 |
-#' manifold approximation. Larger values result in more global |
|
14 |
-#' views of the manifold, while smaller values result in more |
|
15 |
-#' local data being preserved. Default 30. |
|
16 |
-#' See `?uwot::umap` for more information. |
|
17 |
-#' @param nIterations number of iterations performed during layout optimization. |
|
18 |
-#' Default is 200. |
|
19 |
-#' @param alpha initial value of "learning rate" of layout optimization. Default is 1. |
|
20 |
-#' @param minDist The effective minimum distance between embedded points. |
|
21 |
-#' Smaller values will result in a more clustered/clumped |
|
22 |
-#' embedding where nearby points on the manifold are drawn |
|
23 |
-#' closer together, while larger values will result on a more |
|
24 |
-#' even dispersal of points. Default 0.01. |
|
25 |
-#' See `?uwot::umap` for more information. |
|
26 |
-#' @param spread The effective scale of embedded points. In combination with |
|
27 |
-#' ‘min_dist’, this determines how clustered/clumped the |
|
28 |
-#' embedded points are. Default 1. |
|
29 |
-#' See `?uwot::umap` for more information. |
|
30 |
-#' @param pca Logical. Whether to perform dimensionality reduction with PCA |
|
31 |
-#' before UMAP. |
|
32 |
-#' @param initialDims Number of dimensions from PCA to use as |
|
33 |
-#' input in UMAP. Default 50. |
|
34 |
-#' |
|
35 |
-#' @return A \linkS4class{SingleCellExperiment} object with the reduced |
|
36 |
-#' dimensions updated under reducedDimName specified. |
|
37 |
-#' @export |
|
38 |
-#' |
|
39 |
-#' @examples |
|
40 |
-#' data(scExample, package = "singleCellTK") |
|
41 |
-#' sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
42 |
-#' umap_res <- getUMAP(inSCE = sce, useAssay = "counts", |
|
43 |
-#' reducedDimName = "UMAP", logNorm = TRUE, |
|
44 |
-#' nNeighbors = 30, alpha = 1, |
|
45 |
-#' nIterations = 200, spread = 1, pca = TRUE, |
|
46 |
-#' initialDims = 50) |
|
47 |
-#' reducedDims(umap_res) |
|
48 |
- |
|
49 |
-getUMAP <- function(inSCE, useAssay = "counts", |
|
50 |
- sample = NULL, |
|
51 |
- reducedDimName = "UMAP", |
|
52 |
- logNorm = TRUE, |
|
53 |
- nNeighbors = 30, |
|
54 |
- nIterations = 200, |
|
55 |
- alpha = 1, |
|
56 |
- minDist = 0.01, |
|
57 |
- spread = 1, |
|
58 |
- pca = TRUE, |
|
59 |
- initialDims = 50) { |
|
60 |
- if (!inherits(inSCE, "SingleCellExperiment")){ |
|
61 |
- stop("Please use a SingleCellExperiment object") |
|
62 |
- } |
|
63 |
- #test for assay existing |
|
64 |
- if (!all(useAssay %in% names(assays(inSCE)))){ |
|
65 |
- stop("assay '", useAssay, "' does not exist.") |
|
66 |
- } |
|
67 |
- |
|
68 |
- if(!is.null(sample)) { |
|
69 |
- if(length(sample) != ncol(inSCE)) { |
|
70 |
- stop("'sample' must be the same length as the number of columns in 'inSCE'") |
|
71 |
- } |
|
72 |
- } else { |
|
73 |
- sample = rep(1, ncol(inSCE)) |
|
74 |
- } |
|
75 |
- samples <- unique(sample) |
|
76 |
- umapDims = matrix(nrow = ncol(inSCE), ncol = 2) |
|
77 |
- for (i in seq_len(length(samples))){ |
|
78 |
- useAssayTemp = useAssay |
|
79 |
- sceSampleInd <- sample == samples[i] |
|
80 |
- sceSample <- inSCE[, sceSampleInd] |
|
81 |
- if(logNorm){ |
|
82 |
- sceSample <- scater_logNormCounts(sceSample, useAssay = useAssay) |
|
83 |
- useAssayTemp = "ScaterLogNormCounts" |
|
84 |
- } |
|
85 |
- |
|
86 |
- matColData <- SummarizedExperiment::assay(sceSample, useAssayTemp) |
|
87 |
- matColData <- as.matrix(matColData) |
|
88 |
- |
|
89 |
- if (isTRUE(pca)) { |
|
90 |
- if(initialDims > ncol(matColData)){ |
|
91 |
- doPCA <- ncol(matColData) |
|
92 |
- }else{ |
|
93 |
- doPCA <- initialDims |
|
94 |
- } |
|
95 |
- } else { |
|
96 |
- doPCA <- NULL |
|
97 |
- } |
|
98 |
- if(nNeighbors > ncol(matColData)){ |
|
99 |
- nNeighbors <- ncol(matColData) |
|
100 |
- } |
|
101 |
- |
|
102 |
- umapRes <- uwot::umap(t(matColData), n_neighbors = nNeighbors, |
|
103 |
- learning_rate = alpha, |
|
104 |
- min_dist = minDist, spread = spread, |
|
105 |
- n_sgd_threads = 1, pca = doPCA, |
|
106 |
- n_epochs = nIterations) |
|
107 |
- if (is.null(rownames(sceSample))) { |
|
108 |
- rownames(umapRes) <- colnames(sceSample) |
|
109 |
- } |
|
110 |
- umapDims[sceSampleInd, ] = umapRes |
|
111 |
- } |
|
112 |
- colnames(umapDims) <- c("UMAP1", "UMAP2") |
|
113 |
- SingleCellExperiment::reducedDim(inSCE, reducedDimName) <- umapDims |
|
114 |
- return(inSCE) |
|
115 |
-} |
|
1 |
+#' Uniform Manifold Approximation and Projection(UMAP) algorithm for |
|
2 |
+#' dimension reduction. |
|
3 |
+#' |
|
4 |
+#' @param inSCE Input \linkS4class{SingleCellExperiment} object. |
|
5 |
+#' @param useAssay Indicate which assay to use. The default is "counts". |
|
6 |
+#' @param sample Character vector. Indicates which sample each cell belongs to. |
|
7 |
+#' @param reducedDimName a name to store the results of the dimension reduction |
|
8 |
+#' coordinates obtained from this method. This is stored in the SingleCellExperiment |
|
9 |
+#' object in the reducedDims slot. Default "UMAP". |
|
10 |
+#' @param logNorm Whether the counts will need to be log-normalized prior to |
|
11 |
+#' generating the UMAP via scater::logNormCounts. Default TRUE. |
|
12 |
+#' @param nNeighbors The size of local neighborhood used for |
|
13 |
+#' manifold approximation. Larger values result in more global |
|
14 |
+#' views of the manifold, while smaller values result in more |
|
15 |
+#' local data being preserved. Default 30. |
|
16 |
+#' See `?uwot::umap` for more information. |
|
17 |
+#' @param nIterations number of iterations performed during layout optimization. |
|
18 |
+#' Default is 200. |
|
19 |
+#' @param alpha initial value of "learning rate" of layout optimization. Default is 1. |
|
20 |
+#' @param minDist The effective minimum distance between embedded points. |
|
21 |
+#' Smaller values will result in a more clustered/clumped |
|
22 |
+#' embedding where nearby points on the manifold are drawn |
|
23 |
+#' closer together, while larger values will result on a more |
|
24 |
+#' even dispersal of points. Default 0.01. |
|
25 |
+#' See `?uwot::umap` for more information. |
|
26 |
+#' @param spread The effective scale of embedded points. In combination with |
|
27 |
+#' ‘min_dist’, this determines how clustered/clumped the |
|
28 |
+#' embedded points are. Default 1. |
|
29 |
+#' See `?uwot::umap` for more information. |
|
30 |
+#' @param pca Logical. Whether to perform dimensionality reduction with PCA |
|
31 |
+#' before UMAP. |
|
32 |
+#' @param initialDims Number of dimensions from PCA to use as |
|
33 |
+#' input in UMAP. Default 50. |
|
34 |
+#' |
|
35 |
+#' @return A \linkS4class{SingleCellExperiment} object with the reduced |
|
36 |
+#' dimensions updated under reducedDimName specified. |
|
37 |
+#' @export |
|
38 |
+#' |
|
39 |
+#' @examples |
|
40 |
+#' data(scExample, package = "singleCellTK") |
|
41 |
+#' sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
42 |
+#' umap_res <- getUMAP(inSCE = sce, useAssay = "counts", |
|
43 |
+#' reducedDimName = "UMAP", logNorm = TRUE, |
|
44 |
+#' nNeighbors = 30, alpha = 1, |
|
45 |
+#' nIterations = 200, spread = 1, pca = TRUE, |
|
46 |
+#' initialDims = 50) |
|
47 |
+#' reducedDims(umap_res) |
|
48 |
+ |
|
49 |
+getUMAP <- function(inSCE, useAssay = "counts", |
|
50 |
+ sample = NULL, |
|
51 |
+ reducedDimName = "UMAP", |
|
52 |
+ logNorm = TRUE, |
|
53 |
+ nNeighbors = 30, |
|
54 |
+ nIterations = 200, |
|
55 |
+ alpha = 1, |
|
56 |
+ minDist = 0.01, |
|
57 |
+ spread = 1, |
|
58 |
+ pca = TRUE, |
|
59 |
+ initialDims = 50) { |
|
60 |
+ if (!inherits(inSCE, "SingleCellExperiment")){ |
|
61 |
+ stop("Please use a SingleCellExperiment object") |
|
62 |
+ } |
|
63 |
+ #test for assay existing |
|
64 |
+ if (!all(useAssay %in% names(assays(inSCE)))){ |
|
65 |
+ stop("assay '", useAssay, "' does not exist.") |
|
66 |
+ } |
|
67 |
+ |
|
68 |
+ if(!is.null(sample)) { |
|
69 |
+ if(length(sample) != ncol(inSCE)) { |
|
70 |
+ stop("'sample' must be the same length as the number of columns in 'inSCE'") |
|
71 |
+ } |
|
72 |
+ } else { |
|
73 |
+ sample = rep(1, ncol(inSCE)) |
|
74 |
+ } |
|
75 |
+ samples <- unique(sample) |
|
76 |
+ umapDims = matrix(nrow = ncol(inSCE), ncol = 2) |
|
77 |
+ for (i in seq_len(length(samples))){ |
|
78 |
+ useAssayTemp = useAssay |
|
79 |
+ sceSampleInd <- sample == samples[i] |
|
80 |
+ sceSample <- inSCE[, sceSampleInd] |
|
81 |
+ if(logNorm){ |
|
82 |
+ sceSample <- scater_logNormCounts(sceSample, useAssay = useAssay) |
|
83 |
+ useAssayTemp = "ScaterLogNormCounts" |
|
84 |
+ } |
|
85 |
+ |
|
86 |
+ matColData <- SummarizedExperiment::assay(sceSample, useAssayTemp) |
|
87 |
+ matColData <- as.matrix(matColData) |
|
88 |
+ |
|
89 |
+ if (isTRUE(pca)) { |
|
90 |
+ if(initialDims > ncol(matColData)){ |
|
91 |
+ doPCA <- ncol(matColData) |
|
92 |
+ }else{ |
|
93 |
+ doPCA <- initialDims |
|
94 |
+ } |
|
95 |
+ } else { |
|
96 |
+ doPCA <- NULL |
|
97 |
+ } |
|
98 |
+ if(nNeighbors > ncol(matColData)){ |
|
99 |
+ nNeighbors <- ncol(matColData) |
|
100 |
+ } |
|
101 |
+ |
|
102 |
+ umapRes <- uwot::umap(t(matColData), n_neighbors = nNeighbors, |
|
103 |
+ learning_rate = alpha, |
|
104 |
+ min_dist = minDist, spread = spread, |
|
105 |
+ n_sgd_threads = 1, pca = doPCA, |
|
106 |
+ n_epochs = nIterations) |
|
107 |
+ if (is.null(rownames(sceSample))) { |
|
108 |
+ rownames(umapRes) <- colnames(sceSample) |
|
109 |
+ } |
|
110 |
+ umapDims[sceSampleInd, ] = umapRes |
|
111 |
+ } |
|
112 |
+ colnames(umapDims) <- c("UMAP1", "UMAP2") |
|
113 |
+ SingleCellExperiment::reducedDim(inSCE, reducedDimName) <- umapDims |
|
114 |
+ return(inSCE) |
|
115 |
+} |
... | ... |
@@ -47,7 +47,7 @@ reportDropletQC <- function(inSCE, output_file = NULL, |
47 | 47 |
#' @return .html file |
48 | 48 |
#' @examples |
49 | 49 |
#' data(scExample, package = "singleCellTK") |
50 |
-#' sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
50 |
+#' sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
51 | 51 |
#' \dontrun{ |
52 | 52 |
#' sce <- runCellQC(sce) |
53 | 53 |
#' reportCellQC(inSCE = sce) |
... | ... |
@@ -84,7 +84,7 @@ reportCellQC <- function(inSCE, output_file = NULL, |
84 | 84 |
#' @return .html file |
85 | 85 |
#' @examples |
86 | 86 |
#' data(scExample, package = "singleCellTK") |
87 |
-#' sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
87 |
+#' sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
88 | 88 |
#' \donttest{ |
89 | 89 |
#' sce <- runDecontX(sce) |
90 | 90 |
#' sce <- getUMAP(sce) |
... | ... |
@@ -165,12 +165,6 @@ reportQCTool <- function(inSCE, algorithm=c("BarcodeRankDrops", |
165 | 165 |
#' @param output_dir name of the output directory to save the rendered file. If |
166 | 166 |
#' \code{NULL} the file is stored to the current working directory. |
167 | 167 |
#' Default \code{NULL}. |
168 |
-#' @examples |
|
169 |
-#' data(scExample, package = "singleCellTK") |
|
170 |
-#' sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
171 |
-#' sce <- runDEAnalysis(inSCE = sce, groupName1 = "Sample1", method = "DESeq2", |
|
172 |
-#' groupName2 = "Sample2", index1 = 1:20, index2 = 21:40, analysisName = "DESeq2") |
|
173 |
-#' reportDiffExp(sce, study = "DESeq2", output_file = "DESeq2_res") |
|
174 | 168 |
#' @return .html file |
175 | 169 |
#' @export |
176 | 170 |
reportDiffExp <- function(inSCE, study, |
... | ... |
@@ -262,7 +262,7 @@ plotDEGRegression <- function(inSCE, useResult, threshP = FALSE, labelBy = NULL, |
262 | 262 |
#' @examples |
263 | 263 |
#' data(scExample, package = "singleCellTK") |
264 | 264 |
#' \dontrun{ |
265 |
-#' sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
265 |
+#' sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
266 | 266 |
#' sce <- runDEAnalysis(inSCE = sce, groupName1 = "Sample1", method = "DESeq2", |
267 | 267 |
#' groupName2 = "Sample2", index1 = 1:100, index2 = 101:190, analysisName = "DESeq2") |
268 | 268 |
#' plotDEGHeatmap(sce, useResult = "DESeq2", fdrThreshold = 1) |
... | ... |
@@ -177,7 +177,7 @@ dataAnnotationColor <- function(inSCE, axis = NULL, |
177 | 177 |
#' @param ... Other arguments passed to \code{\link[ComplexHeatmap]{Heatmap}}. |
178 | 178 |
#' @examples |
179 | 179 |
#' data(scExample, package = "singleCellTK") |
180 |
-#' plotSCEHeatmap(sce[1:3,1:3]) |
|
180 |
+#' plotSCEHeatmap(sce[1:3,1:3], useAssay = "counts") |
|
181 | 181 |
#' @return A \code{\link[ComplexHeatmap]{Heatmap}} object |
182 | 182 |
#' @export |
183 | 183 |
#' @author Yichen Wang |
... | ... |
@@ -15,7 +15,7 @@ |
15 | 15 |
#' |
16 | 16 |
#' @examples |
17 | 17 |
#' data(scExample, package = "singleCellTK") |
18 |
-#' sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
18 |
+#' sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
19 | 19 |
#' sce <- getUMAP(inSCE = sce, useAssay = "counts", reducedDimName = "UMAP") |
20 | 20 |
#' plotUMAP(sce, shape = "No Shape", reducedDimName = "UMAP", |
21 | 21 |
#' runUMAP = TRUE, useAssay = "counts") |
... | ... |
@@ -127,7 +127,7 @@ |
127 | 127 |
#' \code{\link{runANOVA}} |
128 | 128 |
#' @examples |
129 | 129 |
#' data(scExample, package = "singleCellTK") |
130 |
-#' sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
130 |
+#' sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
131 | 131 |
#' sce <- runDEAnalysis(inSCE = sce, groupName1 = "Sample1", method = "DESeq2", |
132 | 132 |
#' groupName2 = "Sample2", index1 = 1:20, index2 = 21:40, analysisName = "DESeq2") |
133 | 133 |
#' @return Input SCE object with \code{metadata(inSCE)} updated with name |
... | ... |
@@ -190,7 +190,7 @@ runDEAnalysis <- function(method = 'MAST', ...){ |
190 | 190 |
#' Default \code{FALSE}. |
191 | 191 |
#' @examples |
192 | 192 |
#' data(scExample, package = "singleCellTK") |
193 |
-#' sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
193 |
+#' sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
194 | 194 |
#' sce <- runDESeq2(inSCE = sce, groupName1 = "Sample1", |
195 | 195 |
#' groupName2 = "Sample2", index1 = 1:20, index2 = 21:40, analysisName = "DESeq2") |
196 | 196 |
#' |
... | ... |
@@ -317,7 +317,7 @@ runDESeq2 <- function(inSCE, useAssay = 'counts', index1 = NULL, |
317 | 317 |
#' Default \code{FALSE}. |
318 | 318 |
#' @examples |
319 | 319 |
#' data(scExample, package = "singleCellTK") |
320 |
-#' sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
320 |
+#' sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
321 | 321 |
#' sce@assays@data$logcounts <- log10(counts(sce) + 1) |
322 | 322 |
#' sce <- runLimmaDE(inSCE = sce, groupName1 = "Sample1", |
323 | 323 |
#' groupName2 = "Sample2", index1 = 1:20, index2 = 21:40, analysisName = "Limma") |
... | ... |
@@ -440,7 +440,7 @@ runLimmaDE <- function(inSCE, useAssay = 'logcounts', index1 = NULL, |
440 | 440 |
#' Default \code{FALSE}. |
441 | 441 |
#' @examples |
442 | 442 |
#' data(scExample, package = "singleCellTK") |
443 |
-#' sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
443 |
+#' sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
444 | 444 |
#' sce@assays@data$logcounts <- log10(counts(sce) + 1) |
445 | 445 |
#' sce <- runANOVA(inSCE = sce, groupName1 = "Sample1", |
446 | 446 |
#' groupName2 = "Sample2", index1 = 1:20, index2 = 21:40, |
... | ... |
@@ -586,7 +586,7 @@ runANOVA <- function(inSCE, useAssay = 'logcounts', index1 = NULL, |
586 | 586 |
#' Default \code{FALSE}. |
587 | 587 |
#' @examples |
588 | 588 |
#' data(scExample, package = "singleCellTK") |
589 |
-#' sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
589 |
+#' sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
590 | 590 |
#' sce@assays@data$logcounts <- log10(counts(sce) + 1) |
591 | 591 |
#' sce <- runMAST(inSCE = sce, groupName1 = "Sample1", |
592 | 592 |
#' groupName2 = "Sample2", index1 = 1:20, index2 = 21:40, analysisName = "MAST") |
... | ... |
@@ -20,7 +20,7 @@ |
20 | 20 |
#' of \code{inSCE}. |
21 | 21 |
#' @examples |
22 | 22 |
#' data(scExample, package = "singleCellTK") |
23 |
-#' sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
23 |
+#' sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
24 | 24 |
#' \donttest{ |
25 | 25 |
#' sce <- runCellQC(sce) |
26 | 26 |
#' } |
... | ... |
@@ -118,7 +118,7 @@ |
118 | 118 |
#' QC stats stored in the inSCE. |
119 | 119 |
#' @examples |
120 | 120 |
#' data(scExample, package = "singleCellTK") |
121 |
-#' sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
121 |
+#' sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
122 | 122 |
#' sampleSummaryStats(sce, simple = TRUE) |
123 | 123 |
#' @importFrom magrittr %>% |
124 | 124 |
#' @export |
... | ... |
@@ -23,7 +23,7 @@ |
23 | 23 |
#' Please refer to the documentation of \link[scds]{cxds} for details. |
24 | 24 |
#' @examples |
25 | 25 |
#' data(scExample, package = "singleCellTK") |
26 |
-#' sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
26 |
+#' sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
27 | 27 |
#' sce <- runCxds(sce) |
28 | 28 |
#' @export |
29 | 29 |
#' @importFrom SummarizedExperiment colData colData<- |
... | ... |
@@ -31,9 +31,9 @@ |
31 | 31 |
runCxds <- function(inSCE, |
32 | 32 |
sample = NULL, |
33 | 33 |
seed = 12345, |
34 |
- ntop = 500, |
|
34 |
+ ntop = 500, |
|
35 | 35 |
binThresh = 0, |
36 |
- verb = FALSE, |
|
36 |
+ verb = FALSE, |
|
37 | 37 |
retRes = FALSE, |
38 | 38 |
estNdbl = FALSE, |
39 | 39 |
useAssay = "counts") { |
... | ... |
@@ -48,7 +48,7 @@ runCxds <- function(inSCE, |
48 | 48 |
} |
49 | 49 |
|
50 | 50 |
message(paste0(date(), " ... Running 'cxds'")) |
51 |
- |
|
51 |
+ |
|
52 | 52 |
## Getting current arguments |
53 | 53 |
#argsList <- as.list(formals(fun = sys.function(sys.parent()), envir = parent.frame())) |
54 | 54 |
argsList <- mget(names(formals()),sys.frame(sys.nframe())) |
... | ... |
@@ -71,19 +71,19 @@ runCxds <- function(inSCE, |
71 | 71 |
|
72 | 72 |
mat <- SummarizedExperiment::assay(sceSample, i = useAssay) |
73 | 73 |
counts(sceSample) <- .convertToMatrix(mat) |
74 |
- |
|
74 |
+ |
|
75 | 75 |
result <- NULL |
76 | 76 |
nGene <- 500 |
77 | 77 |
while(!inherits(result, "SingleCellExperiment") & nGene > 0) { |
78 | 78 |
try({result <- withr::with_seed(seed, scds::cxds(sce = sceSample, |
79 |
- ntop = nGene, |
|
80 |
- binThresh = binThresh, |
|
81 |
- verb = verb, |
|
79 |
+ ntop = nGene, |
|
80 |
+ binThresh = binThresh, |
|
81 |
+ verb = verb, |
|
82 | 82 |
retRes = retRes, |
83 | 83 |
estNdbl = estNdbl))}, silent = TRUE) |
84 | 84 |
nGene <- nGene - 100 |
85 |
- } |
|
86 |
- |
|
85 |
+ } |
|
86 |
+ |
|
87 | 87 |
if (!inherits(result, "try-error") & !is.null(result)) { |
88 | 88 |
if ("cxds_call" %in% colnames(SummarizedExperiment::colData(result))) { |
89 | 89 |
output[sceSampleInd, ] <- SummarizedExperiment::colData(result)[, |
... | ... |
@@ -95,12 +95,12 @@ runCxds <- function(inSCE, |
95 | 95 |
} else { |
96 | 96 |
output[sceSampleInd, ] <- NA |
97 | 97 |
warning(paste0("'cxds' from package 'scds' did not complete successfully for sample", samples[i])) |
98 |
- } |
|
98 |
+ } |
|
99 | 99 |
} |
100 | 100 |
|
101 | 101 |
colnames(output) <- paste0("scds_", colnames(output)) |
102 | 102 |
colData(inSCE) = cbind(colData(inSCE), output) |
103 |
- |
|
103 |
+ |
|
104 | 104 |
inSCE@metadata$runCxds <- argsList[-1] |
105 | 105 |
inSCE@metadata$runCxds$packageVersion <- utils::packageDescription("scds")$Version |
106 | 106 |
|
... | ... |
@@ -120,7 +120,7 @@ runCxds <- function(inSCE, |
120 | 120 |
#' separately. If NULL, then all cells will be processed together. |
121 | 121 |
#' Default NULL. |
122 | 122 |
#' @param seed Seed for the random number generator. Default 12345. |
123 |
-#' @param ntop See \link[scds]{bcds} for more information. Default \code{500}. |
|
123 |
+#' @param ntop See \link[scds]{bcds} for more information. Default \code{500}. |
|
124 | 124 |
#' @param srat See \link[scds]{bcds} for more information. Default \code{1}. |
125 | 125 |
#' @param verb See \link[scds]{bcds} for more information. Default \code{FALSE}. |
126 | 126 |
#' @param retRes See \link[scds]{bcds} for more information. Default \code{FALSE}. |
... | ... |
@@ -135,7 +135,7 @@ runCxds <- function(inSCE, |
135 | 135 |
#' Please refer to the documentation of \link[scds]{bcds} for details. |
136 | 136 |
#' @examples |
137 | 137 |
#' data(scExample, package = "singleCellTK") |
138 |
-#' sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
138 |
+#' sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
139 | 139 |
#' sce <- runBcds(sce) |
140 | 140 |
#' @export |
141 | 141 |
#' @importFrom SummarizedExperiment colData colData<- |
... | ... |
@@ -143,11 +143,11 @@ runCxds <- function(inSCE, |
143 | 143 |
runBcds <- function(inSCE, |
144 | 144 |
sample = NULL, |
145 | 145 |
seed = 12345, |
146 |
- ntop = 500, |
|
147 |
- srat = 1, |
|
146 |
+ ntop = 500, |
|
147 |
+ srat = 1, |
|
148 | 148 |
verb = FALSE, |
149 | 149 |
retRes = FALSE, |
150 |
- nmax = "tune", |
|
150 |
+ nmax = "tune", |
|
151 | 151 |
varImp = FALSE, |
152 | 152 |
estNdbl = FALSE, |
153 | 153 |
useAssay = "counts" |
... | ... |
@@ -163,7 +163,7 @@ runBcds <- function(inSCE, |
163 | 163 |
} |
164 | 164 |
|
165 | 165 |
message(paste0(date(), " ... Running 'bcds'")) |
166 |
- |
|
166 |
+ |
|
167 | 167 |
## Getting current arguments |
168 | 168 |
#argsList <- as.list(formals(fun = sys.function(sys.parent()), envir = parent.frame())) |
169 | 169 |
argsList <- mget(names(formals()),sys.frame(sys.nframe())) |
... | ... |
@@ -186,22 +186,22 @@ runBcds <- function(inSCE, |
186 | 186 |
|
187 | 187 |
mat <- SummarizedExperiment::assay(sceSample, i = useAssay) |
188 | 188 |
counts(sceSample) <- .convertToMatrix(mat) |
189 |
- |
|
189 |
+ |
|
190 | 190 |
result <- NULL |
191 | 191 |
nGene <- 500 |
192 | 192 |
while(!inherits(result, "SingleCellExperiment") & nGene > 0) { |
193 | 193 |
try({result <- withr::with_seed(seed, |
194 | 194 |
scds::bcds(sce = sceSample, |
195 | 195 |
ntop = nGene, |
196 |
- srat = srat, |
|
196 |
+ srat = srat, |
|
197 | 197 |
verb = verb, |
198 | 198 |
retRes = retRes, |
199 |
- nmax = nmax, |
|
199 |
+ nmax = nmax, |
|
200 | 200 |
varImp = varImp, |
201 | 201 |
estNdbl = estNdbl |
202 | 202 |
))}, silent = TRUE) |
203 | 203 |
nGene <- nGene - 100 |
204 |
- } |
|
204 |
+ } |
|
205 | 205 |
|
206 | 206 |
if (!inherits(result, "try-error") & !is.null(result)) { |
207 | 207 |
if ("bcds_call" %in% colnames(SummarizedExperiment::colData(result))) { |
... | ... |
@@ -214,13 +214,13 @@ runBcds <- function(inSCE, |
214 | 214 |
} else { |
215 | 215 |
output[sceSampleInd, ] <- NA |
216 | 216 |
warning(paste0("'bcds' from package 'scds' did not complete successfully for sample", samples[i])) |
217 |
- } |
|
218 |
- |
|
217 |
+ } |
|
218 |
+ |
|
219 | 219 |
} |
220 | 220 |
|
221 | 221 |
colnames(output) <- paste0("scds_", colnames(output)) |
222 | 222 |
colData(inSCE) = cbind(colData(inSCE), output) |
223 |
- |
|
223 |
+ |
|
224 | 224 |
inSCE@metadata$runBcds <- argsList[-1] |
225 | 225 |
inSCE@metadata$runBcds$packageVersion <- utils::packageDescription("scds")$Version |
226 | 226 |
|
... | ... |
@@ -241,7 +241,7 @@ runBcds <- function(inSCE, |
241 | 241 |
#' Default NULL. |
242 | 242 |
#' @param seed Seed for the random number generator. Default 12345. |
243 | 243 |
#' @param nTop The number of top varialbe genes to consider. Used in both \code{csds} |
244 |
-#' and \code{bcds}. Default \code{500}. |
|
244 |
+#' and \code{bcds}. Default \code{500}. |
|
245 | 245 |
#' @param cxdsArgs See \link[scds]{cxds_bcds_hybrid} for more information. Default \code{NULL}. |
246 | 246 |
#' @param bcdsArgs See \link[scds]{cxds_bcds_hybrid} for more information. Default \code{NULL}. |
247 | 247 |
#' @param verb See \link[scds]{cxds_bcds_hybrid} for more information. Default \code{FALSE}. |
... | ... |
@@ -256,7 +256,7 @@ runBcds <- function(inSCE, |
256 | 256 |
#' details. |
257 | 257 |
#' @examples |
258 | 258 |
#' data(scExample, package = "singleCellTK") |
259 |
-#' sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
259 |
+#' sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
260 | 260 |
#' sce <- runCxdsBcdsHybrid(sce) |
261 | 261 |
#' @export |
262 | 262 |
#' @importFrom SummarizedExperiment colData colData<- |
... | ... |
@@ -266,7 +266,7 @@ runCxdsBcdsHybrid <- function(inSCE, |
266 | 266 |
seed = 12345, |
267 | 267 |
nTop = 500, |
268 | 268 |
cxdsArgs = list(), |
269 |
- bcdsArgs = list(), |
|
269 |
+ bcdsArgs = list(), |
|
270 | 270 |
verb = FALSE, |
271 | 271 |
estNdbl = FALSE, |
272 | 272 |
force = FALSE, |
... | ... |
@@ -309,14 +309,14 @@ runCxdsBcdsHybrid <- function(inSCE, |
309 | 309 |
result <- NULL |
310 | 310 |
nGene <- 500 |
311 | 311 |
while(!inherits(result, "SingleCellExperiment") & nGene > 0) { |
312 |
- try({result <- withr::with_seed(seed, scds::cxds_bcds_hybrid(sce = sceSample, |
|
313 |
- cxdsArgs=c(list(ntop = nGene), cxdsArgs), |
|
314 |
- bcdsArgs=c(list(ntop = nGene), bcdsArgs), |
|
312 |
+ try({result <- withr::with_seed(seed, scds::cxds_bcds_hybrid(sce = sceSample, |
|
313 |
+ cxdsArgs=c(list(ntop = nGene), cxdsArgs), |
|
314 |
+ bcdsArgs=c(list(ntop = nGene), bcdsArgs), |
|
315 | 315 |
verb = verb, |
316 | 316 |
estNdbl = estNdbl, |
317 | 317 |
force = force))}, silent = TRUE) |
318 | 318 |
nGene <- nGene - 100 |
319 |
- } |
|
319 |
+ } |
|
320 | 320 |
|
321 | 321 |
if (!inherits(result, "try-error") & !is.null(result)) { |
322 | 322 |
if ("hybrid_call" %in% colnames(SummarizedExperiment::colData(result))) { |
... | ... |
@@ -329,12 +329,12 @@ runCxdsBcdsHybrid <- function(inSCE, |
329 | 329 |
} else { |
330 | 330 |
output[sceSampleInd, ] <- NA |
331 | 331 |
warning(paste0("'cxds_bcds_hybrid' from package 'scds' did not complete successfully for sample", samples[i])) |
332 |
- } |
|
332 |
+ } |
|
333 | 333 |
} |
334 | 334 |
|
335 | 335 |
colnames(output) <- paste0("scds_", colnames(output)) |
336 | 336 |
colData(inSCE) = cbind(colData(inSCE), output) |
337 |
- |
|
337 |
+ |
|
338 | 338 |
inSCE@metadata$runCxdsBcdsHybrid <- argsList[-1] |
339 | 339 |
inSCE@metadata$runCxdsBcdsHybrid$packageVersion <- utils::packageDescription("scds")$Version |
340 | 340 |
|
... | ... |
@@ -88,7 +88,7 @@ |
88 | 88 |
#' @seealso \link[scran]{doubletCells} |
89 | 89 |
#' @examples |
90 | 90 |
#' data(scExample, package = "singleCellTK") |
91 |
-#' sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
91 |
+#' sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
92 | 92 |
#' sce <- runDoubletCells(sce) |
93 | 93 |
#' @export |
94 | 94 |
#' @importFrom SummarizedExperiment colData colData<- |
... | ... |
@@ -1,231 +1,231 @@ |
1 |
-#' @title Find doublets using \code{scrublet}. |
|
2 |
-#' @description A wrapper function that calls \code{scrub_doublets} from python |
|
3 |
-#' module \code{scrublet}. Simulates doublets from the observed data and uses |
|
4 |
-#' a k-nearest-neighbor classifier to calculate a continuous |
|
5 |
-#' \code{scrublet_score} (between 0 and 1) for each transcriptome. The score |
|
6 |
-#' is automatically thresholded to generate \code{scrublet_call}, a boolean |
|
7 |
-#' array that is \code{TRUE} for predicted doublets and \code{FALSE} |
|
8 |
-#' otherwise. |
|
9 |
-#' @param inSCE A \link[SingleCellExperiment]{SingleCellExperiment} object. |
|
10 |
-#' Needs \code{counts} in assays slot. |
|
11 |
-#' @param sample Character vector. Indicates which sample each cell belongs to. |
|
12 |
-#' Scrublet will be run on cells from each sample separately. If NULL, then |
|
13 |
-#' all cells will be processed together. Default \code{NULL}. |
|
14 |
-#' @param useAssay A string specifying which assay in the SCE to use. Default |
|
15 |
-#' 'counts'. |
|
16 |
-#' @param simDoubletRatio Numeric. Number of doublets to simulate relative to |
|
17 |
-#' the number of observed transcriptomes. Default 2.0. |
|
18 |
-#' @param nNeighbors Integer. Number of neighbors used to construct the KNN |
|
19 |
-#' graph of observed transcriptomes and simulated doublets. If \code{NULL}, |
|
20 |
-#' this is set to \code{round(0.5 * sqrt(n_cells))}. Default \code{NULL}. |
|
21 |
-#' @param minDist Float Determines how tightly UMAP packs points together. If \code{NULL}, |
|
22 |
-#' this is set to 0.1. Default \code{NULL}. |
|
23 |
-#' @param expectedDoubletRate The estimated doublet rate for the experiment. |
|
24 |
-#' Default 0.1. |
|
25 |
-#' @param stdevDoubletRate Uncertainty in the expected doublet rate. |
|
26 |
-#' Default 0.02. |
|
27 |
-#' @param syntheticDoubletUmiSubsampling Numeric. Rate for sampling UMIs |
|
28 |
-#' when creating synthetic doublets. If 1.0, each doublet is created by simply |
|
29 |
-#' adding the UMIs from two randomly sampled observed transcriptomes. For |
|
30 |
-#' values less than 1, the UMI counts are added and then randomly sampled at |
|
31 |
-#' the specified rate. Defuault: 1.0. |
|
32 |
-#' @param useApproxNeighbors Boolean. Use approximate nearest neighbor method |
|
33 |
-#' (\code{annoy}) for the KNN classifier. Default \code{TRUE}. |
|
34 |
-#' @param distanceMetric Character. Distance metric used when finding nearest |
|
35 |
-#' neighbors. |
|
36 |
-#' For list of valid values, see the documentation for \code{annoy} (if |
|
37 |
-#' \code{useApproxNeighbors} is \code{TRUE}) or |
|
38 |
-#' \code{sklearn.neighbors.NearestNeighbors} (if \code{useApproxNeighbors} is |
|
39 |
-#' \code{FALSE}). Default "euclidean". |
|
40 |
-#' @param getDoubletNeighborParents Boolean. If \code{TRUE}, return the |
|
41 |
-#' parent transcriptomes that generated the doublet neighbors of each |
|
42 |
-#' observed transcriptome. This information can be used to infer the cell |
|
43 |
-#' states that generated a given doublet state. Default \code{FALSE}. |
|
44 |
-#' @param minCounts Numeric. Used for gene filtering prior to PCA. Genes |
|
45 |
-#' expressed at fewer than \code{minCounts} in fewer than \code{minCells} |
|
46 |
-#' (see below) are excluded. Default 3. |
|
47 |
-#' @param minCells Integer. Used for gene filtering prior to PCA. Genes |
|
48 |
-#' expressed at fewer than \code{minCounts} (see above) in fewer than |
|
49 |
-#' \code{minCells} are excluded. Default 3. |
|
50 |
-#' @param minGeneVariabilityPctl Numeric. Used for gene filtering prior to |
|
51 |
-#' PCA. Keep the most highly variable genes (in the top |
|
52 |
-#' minGeneVariabilityPctl percentile), as measured by the v-statistic |
|
53 |
-#' (\emph{Klein et al., Cell 2015}). Default 85. |
|
54 |
-#' @param logTransform Boolean. If \code{TRUE}, log-transform the counts matrix |
|
55 |
-#' (log10(1+TPM)). \code{sklearn.decomposition.TruncatedSVD} will be used for |
|
56 |
-#' dimensionality reduction, unless \code{meanCenter} is \code{TRUE}. |
|
57 |
-#' Default \code{FALSE}. |
|
58 |
-#' @param meanCenter If \code{TRUE}, center the data such that each gene has a |
|
59 |
-#' mean of 0. \code{sklearn.decomposition.PCA} will be used for |
|
60 |
-#' dimensionality reduction. Default \code{TRUE}. |
|
61 |
-#' @param normalizeVariance Boolean. If \code{TRUE}, normalize the data such |
|
62 |
-#' that each gene has a variance of 1. |
|
63 |
-#' \code{sklearn.decomposition.TruncatedSVD} will be used for dimensionality |
|
64 |
-#' reduction, unless \code{meanCenter} is \code{TRUE}. Default \code{TRUE}. |
|
65 |
-#' @param nPrinComps Integer. Number of principal components used to embed |
|
66 |
-#' the transcriptomes prior to k-nearest-neighbor graph construction. |
|
67 |
-#' Default 30. |
|
68 |
-#' @param tsneAngle Float. Determines angular size of a distant node as measured |
|
69 |
-#' from a point in the t-SNE plot. If default, it is set to 0.5 Default \code{NULL}. |
|
70 |
-#' @param tsnePerplexity Integer. The number of nearest neighbors that |
|
71 |
-#' is used in other manifold learning algorithms. |
|
72 |
-#' If default, it is set to 30. Default \code{NULL}. |
|
73 |
-#' @param verbose Boolean. If \code{TRUE}, print progress updates. Default |
|
74 |
-#' \code{TRUE}. |
|
75 |
-#' @param seed Seed for the random number generator. Default 12345. |
|
76 |
-#' @return A \link[SingleCellExperiment]{SingleCellExperiment} object with |
|
77 |
-#' \code{scrub_doublets} output appended to the |
|
78 |
-#' \link[SummarizedExperiment]{colData} slot. The columns include |
|
79 |
-#' \emph{scrublet_score} and \emph{scrublet_call}. |
|
80 |
-#' @examples |
|
81 |
-#' data(scExample, package = "singleCellTK") |
|
82 |
-#' sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
83 |
-#' sce <- runScrublet(sce) |
|
84 |
-#' @export |
|
85 |
-#' @importFrom reticulate py_module_available py_set_seed import |
|
86 |
-#' @importFrom SummarizedExperiment colData colData<- |
|
87 |
-#' @importFrom SingleCellExperiment reducedDim |
|
88 |
-runScrublet <- function(inSCE, |
|
89 |
- sample = NULL, |
|
90 |
- useAssay = "counts", |
|
91 |
- simDoubletRatio = 2.0, |
|
92 |
- nNeighbors = NULL, |
|
93 |
- minDist = NULL, |
|
94 |
- expectedDoubletRate = 0.1, |
|
95 |
- stdevDoubletRate = 0.02, |
|
96 |
- syntheticDoubletUmiSubsampling = 1.0, |
|
97 |
- useApproxNeighbors = TRUE, |
|
98 |
- distanceMetric = "euclidean", |
|
99 |
- getDoubletNeighborParents = FALSE, |
|
100 |
- minCounts = 3, |
|
101 |
- minCells = 3L, |
|
102 |
- minGeneVariabilityPctl = 85, |
|
103 |
- logTransform = FALSE, |
|
104 |
- meanCenter = TRUE, |
|
105 |
- normalizeVariance = TRUE, |
|
106 |
- nPrinComps = 30L, |
|
107 |
- tsneAngle = NULL, |
|
108 |
- tsnePerplexity = NULL, |
|
109 |
- verbose = TRUE, |
|
110 |
- seed = 12345) { |
|
111 |
- |
|
112 |
- if (!reticulate::py_module_available(module = "scrublet")) { |
|
113 |
- warning("Cannot find python module 'scrublet', please install Conda and", |
|
114 |
- " run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv().", |
|
115 |
- "If one of these have been previously run to install the modules,", |
|
116 |
- "make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(),", |
|
117 |
- " respectively, if R has been restarted since the module installation.", |
|
118 |
- " Alternatively, Scrublet can be installed on the local machine", |
|
119 |
- "with pip (e.g. pip install scrublet) and then the 'use_python()'", |
|
120 |
- " function from the 'reticulate' package can be used to select the", |
|
121 |
- " correct Python environment.") |
|
122 |
- return(inSCE) |
|
123 |
- } |
|
124 |
- |
|
125 |
- if (!is.null(seed)) { |
|
126 |
- reticulate::py_set_seed(seed = seed) |
|
127 |
- } |
|
128 |
- |
|
129 |
- if (!is.null(sample)) { |
|
130 |
- if (length(sample) != ncol(inSCE)) { |
|
131 |
- stop("'sample' must be the same length as the number of", |
|
132 |
- " columns in 'inSCE'") |
|
133 |
- } |
|
134 |
- } else { |
|
135 |
- sample = rep(1, ncol(inSCE)) |
|
136 |
- } |
|
137 |
- |
|
138 |
- message(paste0(date(), " ... Running 'scrublet'")) |
|
139 |
- |
|
140 |
- ## Getting current arguments values |
|
141 |
- #argsList <- as.list(formals(fun = sys.function(sys.parent()), envir = parent.frame())) |
|
142 |
- argsList <- mget(names(formals()),sys.frame(sys.nframe())) |
|
143 |
- |
|
144 |
- ## Define result matrix for all samples |
|
145 |
- output <- S4Vectors::DataFrame(row.names = colnames(inSCE), |
|
146 |
- scrublet_score = numeric(ncol(inSCE)), |
|
147 |
- scrublet_call = logical(ncol(inSCE))) |
|
148 |
- |
|
149 |
- ## Loop through each sample and run scrublet |
|
150 |
- error <- try({ |
|
151 |
- samples <- unique(sample) |
|
152 |
- umapDims <- matrix(ncol = 2, |
|
153 |
- nrow = ncol(inSCE)) |
|
154 |
- rownames(umapDims) = colnames(inSCE) |
|
155 |
- colnames(umapDims) = c("UMAP_1", "UMAP_2") |
|
156 |
- |
|
157 |
- tsneDims <- matrix(ncol = 2, |
|
158 |
- nrow = ncol(inSCE)) |
|
159 |
- rownames(tsneDims) = colnames(inSCE) |
|
160 |
- colnames(tsneDims) = c("TSNE_1", "TSNE_2") |
|
161 |
- |
|
162 |
- for (i in seq_len(length(samples))) { |
|
163 |
- sceSampleInd <- sample == samples[i] |
|
164 |
- sceSample <- inSCE[, sceSampleInd] |
|
165 |
- |
|
166 |
- mat <- SummarizedExperiment::assay(sceSample, i = useAssay) |
|
167 |
- mat <- .convertToMatrix(mat) |
|
168 |
- |
|
169 |
- scr <- scrublet$Scrublet(counts_matrix = t(mat), |
|
170 |
- sim_doublet_ratio = simDoubletRatio, |
|
171 |
- n_neighbors = nNeighbors, |
|
172 |
- expected_doublet_rate = expectedDoubletRate, |
|
173 |
- stdev_doublet_rate = stdevDoubletRate) |
|
174 |
- |
|
175 |
- result <- scr$scrub_doublets( |
|
176 |
- synthetic_doublet_umi_subsampling = syntheticDoubletUmiSubsampling, |
|
177 |
- use_approx_neighbors = useApproxNeighbors, |
|
178 |
- distance_metric = distanceMetric, |
|
179 |
- get_doublet_neighbor_parents = getDoubletNeighborParents, |
|
180 |
- min_counts = minCounts, |
|
181 |
- min_cells = as.integer(minCells), |
|
182 |
- min_gene_variability_pctl = minGeneVariabilityPctl, |
|
183 |
- log_transform = logTransform, |
|
184 |
- mean_center = meanCenter, |
|
185 |
- normalize_variance = normalizeVariance, |
|
186 |
- n_prin_comps = as.integer(nPrinComps), |
|
187 |
- verbose = verbose) |
|
188 |
- |
|
189 |
- output[sceSampleInd, "scrublet_score"] <- result[[1]] |
|
190 |
- output[sceSampleInd, "scrublet_call"] <- result[[2]] |
|
191 |
- |
|
192 |
- ## Extract UMAP and TSNE coordinates |
|
193 |
- if (is.null(nNeighbors) && is.null(minDist)){ |
|
194 |
- umap_coordinates <- scrublet$get_umap(scr$manifold_obs_) |
|
195 |
- }else { |
|
196 |
- umap_coordinates <- scrublet$get_umap(scr$manifold_obs_, |
|
197 |
- n_neighbors=as.integer(nNeighbors), |
|
198 |
- min_dist=minDist) |
|
199 |
- } |
|
200 |
- umapDims[sceSampleInd, ] <- umap_coordinates |
|
201 |
- |
|
202 |
- if (is.null(tsneAngle) && is.null(tsnePerplexity)){ |
|
203 |
- tsne_coordinates <- scrublet$get_tsne(scr$manifold_obs_) |
|
204 |
- }else { |
|
205 |
- tsne_coordinates <- scrublet$get_tsne(scr$manifold_obs_, |
|
206 |
- angle=tsneAngle, |
|
207 |
- perplexity=as.integer(tsnePerplexity)) |
|
208 |
- } |
|
209 |
- tsneDims[sceSampleInd, ] <- tsne_coordinates |
|
210 |
- |
|
211 |
- } |
|
212 |
- |
|
213 |
- colData(inSCE) = cbind(colData(inSCE), output) |
|
214 |
- }, silent = TRUE) |
|
215 |
- |
|
216 |
- if (inherits(error, "try-error")) { |
|
217 |
- warning("Scrublet did not complete successfully. Returning SCE without", |
|
218 |
- " making any changes. Error given by Scrublet: \n\n", error) |
|
219 |
- } |
|
220 |
- |
|
221 |
- inSCE@metadata$runScrublet <- argsList[-1] |
|
222 |
- |
|
223 |
- ## add scrublet version to metadata |
|
224 |
- version <- pkg_resources$require("scrublet")[[1]] |
|
225 |
- inSCE@metadata$scrublet$packageVersion <- version |
|
226 |
- reducedDim(inSCE,'scrublet_TSNE') <- tsneDims |
|
227 |
- reducedDim(inSCE,'scrublet_UMAP') <- umapDims |
|
228 |
- |
|
229 |
- return(inSCE) |
|
230 |
-} |
|
231 |
- |
|
1 |
+#' @title Find doublets using \code{scrublet}. |
|
2 |
+#' @description A wrapper function that calls \code{scrub_doublets} from python |
|
3 |
+#' module \code{scrublet}. Simulates doublets from the observed data and uses |
|
4 |
+#' a k-nearest-neighbor classifier to calculate a continuous |
|
5 |
+#' \code{scrublet_score} (between 0 and 1) for each transcriptome. The score |
|
6 |
+#' is automatically thresholded to generate \code{scrublet_call}, a boolean |
|
7 |
+#' array that is \code{TRUE} for predicted doublets and \code{FALSE} |
|
8 |
+#' otherwise. |
|
9 |
+#' @param inSCE A \link[SingleCellExperiment]{SingleCellExperiment} object. |
|
10 |
+#' Needs \code{counts} in assays slot. |
|
11 |
+#' @param sample Character vector. Indicates which sample each cell belongs to. |
|
12 |
+#' Scrublet will be run on cells from each sample separately. If NULL, then |
|
13 |
+#' all cells will be processed together. Default \code{NULL}. |
|
14 |
+#' @param useAssay A string specifying which assay in the SCE to use. Default |
|
15 |
+#' 'counts'. |
|
16 |
+#' @param simDoubletRatio Numeric. Number of doublets to simulate relative to |
|
17 |
+#' the number of observed transcriptomes. Default 2.0. |
|
18 |
+#' @param nNeighbors Integer. Number of neighbors used to construct the KNN |
|
19 |
+#' graph of observed transcriptomes and simulated doublets. If \code{NULL}, |
|
20 |
+#' this is set to \code{round(0.5 * sqrt(n_cells))}. Default \code{NULL}. |
|
21 |
+#' @param minDist Float Determines how tightly UMAP packs points together. If \code{NULL}, |
|
22 |
+#' this is set to 0.1. Default \code{NULL}. |
|
23 |
+#' @param expectedDoubletRate The estimated doublet rate for the experiment. |
|
24 |
+#' Default 0.1. |
|
25 |
+#' @param stdevDoubletRate Uncertainty in the expected doublet rate. |
|
26 |
+#' Default 0.02. |
|
27 |
+#' @param syntheticDoubletUmiSubsampling Numeric. Rate for sampling UMIs |
|
28 |
+#' when creating synthetic doublets. If 1.0, each doublet is created by simply |
|
29 |
+#' adding the UMIs from two randomly sampled observed transcriptomes. For |
|
30 |
+#' values less than 1, the UMI counts are added and then randomly sampled at |
|
31 |
+#' the specified rate. Defuault: 1.0. |
|
32 |
+#' @param useApproxNeighbors Boolean. Use approximate nearest neighbor method |
|
33 |
+#' (\code{annoy}) for the KNN classifier. Default \code{TRUE}. |
|
34 |
+#' @param distanceMetric Character. Distance metric used when finding nearest |
|
35 |
+#' neighbors. |
|
36 |
+#' For list of valid values, see the documentation for \code{annoy} (if |
|
37 |
+#' \code{useApproxNeighbors} is \code{TRUE}) or |
|
38 |
+#' \code{sklearn.neighbors.NearestNeighbors} (if \code{useApproxNeighbors} is |
|
39 |
+#' \code{FALSE}). Default "euclidean". |
|
40 |
+#' @param getDoubletNeighborParents Boolean. If \code{TRUE}, return the |
|
41 |
+#' parent transcriptomes that generated the doublet neighbors of each |
|
42 |
+#' observed transcriptome. This information can be used to infer the cell |
|
43 |
+#' states that generated a given doublet state. Default \code{FALSE}. |
|
44 |
+#' @param minCounts Numeric. Used for gene filtering prior to PCA. Genes |
|
45 |
+#' expressed at fewer than \code{minCounts} in fewer than \code{minCells} |
|
46 |
+#' (see below) are excluded. Default 3. |
|
47 |
+#' @param minCells Integer. Used for gene filtering prior to PCA. Genes |
|
48 |
+#' expressed at fewer than \code{minCounts} (see above) in fewer than |
|
49 |
+#' \code{minCells} are excluded. Default 3. |
|
50 |
+#' @param minGeneVariabilityPctl Numeric. Used for gene filtering prior to |
|
51 |
+#' PCA. Keep the most highly variable genes (in the top |
|
52 |
+#' minGeneVariabilityPctl percentile), as measured by the v-statistic |
|
53 |
+#' (\emph{Klein et al., Cell 2015}). Default 85. |
|
54 |
+#' @param logTransform Boolean. If \code{TRUE}, log-transform the counts matrix |
|
55 |
+#' (log10(1+TPM)). \code{sklearn.decomposition.TruncatedSVD} will be used for |
|
56 |
+#' dimensionality reduction, unless \code{meanCenter} is \code{TRUE}. |
|
57 |
+#' Default \code{FALSE}. |
|
58 |
+#' @param meanCenter If \code{TRUE}, center the data such that each gene has a |
|
59 |
+#' mean of 0. \code{sklearn.decomposition.PCA} will be used for |
|
60 |
+#' dimensionality reduction. Default \code{TRUE}. |
|
61 |
+#' @param normalizeVariance Boolean. If \code{TRUE}, normalize the data such |
|
62 |
+#' that each gene has a variance of 1. |
|
63 |
+#' \code{sklearn.decomposition.TruncatedSVD} will be used for dimensionality |
|
64 |
+#' reduction, unless \code{meanCenter} is \code{TRUE}. Default \code{TRUE}. |
|
65 |
+#' @param nPrinComps Integer. Number of principal components used to embed |
|
66 |
+#' the transcriptomes prior to k-nearest-neighbor graph construction. |
|
67 |
+#' Default 30. |
|
68 |
+#' @param tsneAngle Float. Determines angular size of a distant node as measured |
|
69 |
+#' from a point in the t-SNE plot. If default, it is set to 0.5 Default \code{NULL}. |
|
70 |
+#' @param tsnePerplexity Integer. The number of nearest neighbors that |
|
71 |
+#' is used in other manifold learning algorithms. |
|
72 |
+#' If default, it is set to 30. Default \code{NULL}. |
|
73 |
+#' @param verbose Boolean. If \code{TRUE}, print progress updates. Default |
|
74 |
+#' \code{TRUE}. |
|
75 |
+#' @param seed Seed for the random number generator. Default 12345. |
|
76 |
+#' @return A \link[SingleCellExperiment]{SingleCellExperiment} object with |
|
77 |
+#' \code{scrub_doublets} output appended to the |
|
78 |
+#' \link[SummarizedExperiment]{colData} slot. The columns include |
|
79 |
+#' \emph{scrublet_score} and \emph{scrublet_call}. |
|
80 |
+#' @examples |
|
81 |
+#' data(scExample, package = "singleCellTK") |
|
82 |
+#' sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
83 |
+#' sce <- runScrublet(sce) |
|
84 |
+#' @export |
|
85 |
+#' @importFrom reticulate py_module_available py_set_seed import |
|
86 |
+#' @importFrom SummarizedExperiment colData colData<- |
|
87 |
+#' @importFrom SingleCellExperiment reducedDim |
|
88 |
+runScrublet <- function(inSCE, |
|
89 |
+ sample = NULL, |
|
90 |
+ useAssay = "counts", |
|
91 |
+ simDoubletRatio = 2.0, |
|
92 |
+ nNeighbors = NULL, |
|
93 |
+ minDist = NULL, |
|
94 |
+ expectedDoubletRate = 0.1, |
|
95 |
+ stdevDoubletRate = 0.02, |
|
96 |
+ syntheticDoubletUmiSubsampling = 1.0, |
|
97 |
+ useApproxNeighbors = TRUE, |
|
98 |
+ distanceMetric = "euclidean", |
|
99 |
+ getDoubletNeighborParents = FALSE, |
|
100 |
+ minCounts = 3, |
|
101 |
+ minCells = 3L, |
|
102 |
+ minGeneVariabilityPctl = 85, |
|
103 |
+ logTransform = FALSE, |
|
104 |
+ meanCenter = TRUE, |
|
105 |
+ normalizeVariance = TRUE, |
|
106 |
+ nPrinComps = 30L, |
|
107 |
+ tsneAngle = NULL, |
|
108 |
+ tsnePerplexity = NULL, |
|
109 |
+ verbose = TRUE, |
|
110 |
+ seed = 12345) { |
|
111 |
+ |
|
112 |
+ if (!reticulate::py_module_available(module = "scrublet")) { |
|
113 |
+ warning("Cannot find python module 'scrublet', please install Conda and", |
|
114 |
+ " run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv().", |
|
115 |
+ "If one of these have been previously run to install the modules,", |
|
116 |
+ "make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(),", |
|
117 |
+ " respectively, if R has been restarted since the module installation.", |
|
118 |
+ " Alternatively, Scrublet can be installed on the local machine", |
|
119 |
+ "with pip (e.g. pip install scrublet) and then the 'use_python()'", |
|
120 |
+ " function from the 'reticulate' package can be used to select the", |
|
121 |
+ " correct Python environment.") |
|
122 |
+ return(inSCE) |
|
123 |
+ } |
|
124 |
+ |
|
125 |
+ if (!is.null(seed)) { |
|
126 |
+ reticulate::py_set_seed(seed = seed) |
|
127 |
+ } |
|
128 |
+ |
|
129 |
+ if (!is.null(sample)) { |
|
130 |
+ if (length(sample) != ncol(inSCE)) { |
|
131 |
+ stop("'sample' must be the same length as the number of", |
|
132 |
+ " columns in 'inSCE'") |
|
133 |
+ } |
|
134 |
+ } else { |
|
135 |
+ sample = rep(1, ncol(inSCE)) |
|
136 |
+ } |
|
137 |
+ |
|
138 |
+ message(paste0(date(), " ... Running 'scrublet'")) |
|
139 |
+ |
|
140 |
+ ## Getting current arguments values |
|
141 |
+ #argsList <- as.list(formals(fun = sys.function(sys.parent()), envir = parent.frame())) |
|
142 |
+ argsList <- mget(names(formals()),sys.frame(sys.nframe())) |
|
143 |
+ |
|
144 |
+ ## Define result matrix for all samples |
|
145 |
+ output <- S4Vectors::DataFrame(row.names = colnames(inSCE), |
|
146 |
+ scrublet_score = numeric(ncol(inSCE)), |
|
147 |
+ scrublet_call = logical(ncol(inSCE))) |
|
148 |
+ |
|
149 |
+ ## Loop through each sample and run scrublet |
|
150 |
+ error <- try({ |
|
151 |
+ samples <- unique(sample) |
|
152 |
+ umapDims <- matrix(ncol = 2, |
|
153 |
+ nrow = ncol(inSCE)) |
|
154 |
+ rownames(umapDims) = colnames(inSCE) |
|
155 |
+ colnames(umapDims) = c("UMAP_1", "UMAP_2") |
|
156 |
+ |
|
157 |
+ tsneDims <- matrix(ncol = 2, |
|
158 |
+ nrow = ncol(inSCE)) |
|
159 |
+ rownames(tsneDims) = colnames(inSCE) |
|
160 |
+ colnames(tsneDims) = c("TSNE_1", "TSNE_2") |
|
161 |
+ |
|
162 |
+ for (i in seq_len(length(samples))) { |
|
163 |
+ sceSampleInd <- sample == samples[i] |
|
164 |
+ sceSample <- inSCE[, sceSampleInd] |
|
165 |
+ |
|
166 |
+ mat <- SummarizedExperiment::assay(sceSample, i = useAssay) |
|
167 |
+ mat <- .convertToMatrix(mat) |
|
168 |
+ |
|
169 |
+ scr <- scrublet$Scrublet(counts_matrix = t(mat), |
|
170 |
+ sim_doublet_ratio = simDoubletRatio, |
|
171 |
+ n_neighbors = nNeighbors, |
|
172 |
+ expected_doublet_rate = expectedDoubletRate, |
|
173 |
+ stdev_doublet_rate = stdevDoubletRate) |
|
174 |
+ |
|
175 |
+ result <- scr$scrub_doublets( |
|
176 |
+ synthetic_doublet_umi_subsampling = syntheticDoubletUmiSubsampling, |
|
177 |
+ use_approx_neighbors = useApproxNeighbors, |
|
178 |
+ distance_metric = distanceMetric, |
|
179 |
+ get_doublet_neighbor_parents = getDoubletNeighborParents, |
|
180 |
+ min_counts = minCounts, |
|
181 |
+ min_cells = as.integer(minCells), |
|
182 |
+ min_gene_variability_pctl = minGeneVariabilityPctl, |
|
183 |
+ log_transform = logTransform, |
|
184 |
+ mean_center = meanCenter, |
|
185 |
+ normalize_variance = normalizeVariance, |
|
186 |
+ n_prin_comps = as.integer(nPrinComps), |
|
187 |
+ verbose = verbose) |
|
188 |
+ |
|
189 |
+ output[sceSampleInd, "scrublet_score"] <- result[[1]] |
|
190 |
+ output[sceSampleInd, "scrublet_call"] <- result[[2]] |
|
191 |
+ |
|
192 |
+ ## Extract UMAP and TSNE coordinates |
|
193 |
+ if (is.null(nNeighbors) && is.null(minDist)){ |
|
194 |
+ umap_coordinates <- scrublet$get_umap(scr$manifold_obs_) |
|
195 |
+ }else { |
|
196 |
+ umap_coordinates <- scrublet$get_umap(scr$manifold_obs_, |
|
197 |
+ n_neighbors=as.integer(nNeighbors), |
|
198 |
+ min_dist=minDist) |
|
199 |
+ } |
|
200 |
+ umapDims[sceSampleInd, ] <- umap_coordinates |
|
201 |
+ |
|
202 |
+ if (is.null(tsneAngle) && is.null(tsnePerplexity)){ |
|
203 |
+ tsne_coordinates <- scrublet$get_tsne(scr$manifold_obs_) |
|
204 |
+ }else { |
|
205 |
+ tsne_coordinates <- scrublet$get_tsne(scr$manifold_obs_, |
|
206 |
+ angle=tsneAngle, |
|
207 |
+ perplexity=as.integer(tsnePerplexity)) |
|
208 |
+ } |
|
209 |
+ tsneDims[sceSampleInd, ] <- tsne_coordinates |
|
210 |
+ |
|
211 |
+ } |
|
212 |
+ |
|
213 |
+ colData(inSCE) = cbind(colData(inSCE), output) |
|
214 |
+ }, silent = TRUE) |
|
215 |
+ |
|
216 |
+ if (inherits(error, "try-error")) { |
|
217 |
+ warning("Scrublet did not complete successfully. Returning SCE without", |
|
218 |
+ " making any changes. Error given by Scrublet: \n\n", error) |
|
219 |
+ } |
|
220 |
+ |
|
221 |
+ inSCE@metadata$runScrublet <- argsList[-1] |
|
222 |
+ |
|
223 |
+ ## add scrublet version to metadata |
|
224 |
+ version <- pkg_resources$require("scrublet")[[1]] |
|
225 |
+ inSCE@metadata$scrublet$packageVersion <- version |
|
226 |
+ reducedDim(inSCE,'scrublet_TSNE') <- tsneDims |
|
227 |
+ reducedDim(inSCE,'scrublet_UMAP') <- umapDims |
|
228 |
+ |
|
229 |
+ return(inSCE) |
|
230 |
+} |
|
231 |
+ |
... | ... |
@@ -73,7 +73,7 @@ dimension reduction. |
73 | 73 |
} |
74 | 74 |
\examples{ |
75 | 75 |
data(scExample, package = "singleCellTK") |
76 |
-sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
76 |
+sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
77 | 77 |
umap_res <- getUMAP(inSCE = sce, useAssay = "counts", |
78 | 78 |
reducedDimName = "UMAP", logNorm = TRUE, |
79 | 79 |
nNeighbors = 30, alpha = 1, |
... | ... |
@@ -141,7 +141,7 @@ Plot heatmap of using data stored in SingleCellExperiment Object |
141 | 141 |
} |
142 | 142 |
\examples{ |
143 | 143 |
data(scExample, package = "singleCellTK") |
144 |
-plotSCEHeatmap(sce[1:3,1:3]) |
|
144 |
+plotSCEHeatmap(sce[1:3,1:3], useAssay = "counts") |
|
145 | 145 |
} |
146 | 146 |
\author{ |
147 | 147 |
Yichen Wang |
... | ... |
@@ -37,7 +37,7 @@ Plot UMAP results either on already run results or run first and then plot. |
37 | 37 |
} |
38 | 38 |
\examples{ |
39 | 39 |
data(scExample, package = "singleCellTK") |
40 |
-sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
40 |
+sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
41 | 41 |
sce <- getUMAP(inSCE = sce, useAssay = "counts", reducedDimName = "UMAP") |
42 | 42 |
plotUMAP(sce, shape = "No Shape", reducedDimName = "UMAP", |
43 | 43 |
runUMAP = TRUE, useAssay = "counts") |
... | ... |
@@ -32,7 +32,7 @@ A function to generate .html Rmarkdown report containing the visualizations of |
32 | 32 |
} |
33 | 33 |
\examples{ |
34 | 34 |
data(scExample, package = "singleCellTK") |
35 |
-sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
35 |
+sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
36 | 36 |
\dontrun{ |
37 | 37 |
sce <- runCellQC(sce) |
38 | 38 |
reportCellQC(inSCE = sce) |
... | ... |
@@ -28,10 +28,3 @@ Default \code{NULL}.} |
28 | 28 |
A function to generate .html Rmarkdown report containing the |
29 | 29 |
visualizations of the \code{\link{runDEAnalysis}} function output |
30 | 30 |
} |
31 |
-\examples{ |
|
32 |
-data(scExample, package = "singleCellTK") |
|
33 |
-sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
34 |
-sce <- runDEAnalysis(inSCE = sce, groupName1 = "Sample1", method = "DESeq2", |
|
35 |
- groupName2 = "Sample2", index1 = 1:20, index2 = 21:40, analysisName = "DESeq2") |
|
36 |
-reportDiffExp(sce, study = "DESeq2", output_file = "DESeq2_res") |
|
37 |
-} |
... | ... |
@@ -32,7 +32,7 @@ A function to generate .html Rmarkdown report for the specified QC algorithm ou |
32 | 32 |
} |
33 | 33 |
\examples{ |
34 | 34 |
data(scExample, package = "singleCellTK") |
35 |
-sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
35 |
+sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
36 | 36 |
\donttest{ |
37 | 37 |
sce <- runDecontX(sce) |
38 | 38 |
sce <- getUMAP(sce) |
... | ... |
@@ -94,7 +94,7 @@ only. |
94 | 94 |
} |
95 | 95 |
\examples{ |
96 | 96 |
data(scExample, package = "singleCellTK") |
97 |
-sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
97 |
+sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
98 | 98 |
sce@assays@data$logcounts <- log10(counts(sce) + 1) |
99 | 99 |
sce <- runANOVA(inSCE = sce, groupName1 = "Sample1", |
100 | 100 |
groupName2 = "Sample2", index1 = 1:20, index2 = 21:40, |
... | ... |
@@ -60,6 +60,6 @@ A wrapper function for \link[scds]{bcds}. Annotate |
60 | 60 |
} |
61 | 61 |
\examples{ |
62 | 62 |
data(scExample, package = "singleCellTK") |
63 |
-sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
63 |
+sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
64 | 64 |
sce <- runBcds(sce) |
65 | 65 |
} |
... | ... |
@@ -53,6 +53,6 @@ A wrapper function for \link[scds]{cxds}. Annotate |
53 | 53 |
} |
54 | 54 |
\examples{ |
55 | 55 |
data(scExample, package = "singleCellTK") |
56 |
-sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
56 |
+sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
57 | 57 |
sce <- runCxds(sce) |
58 | 58 |
} |
... | ... |
@@ -59,6 +59,6 @@ A wrapper function for \link[scds]{cxds_bcds_hybrid}. Annotate |
59 | 59 |
} |
60 | 60 |
\examples{ |
61 | 61 |
data(scExample, package = "singleCellTK") |
62 |
-sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
62 |
+sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
63 | 63 |
sce <- runCxdsBcdsHybrid(sce) |
64 | 64 |
} |
... | ... |
@@ -26,7 +26,7 @@ Method supported: 'MAST', 'DESeq2', 'Limma', 'ANOVA' |
26 | 26 |
} |
27 | 27 |
\examples{ |
28 | 28 |
data(scExample, package = "singleCellTK") |
29 |
-sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
29 |
+sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
30 | 30 |
sce <- runDEAnalysis(inSCE = sce, groupName1 = "Sample1", method = "DESeq2", |
31 | 31 |
groupName2 = "Sample2", index1 = 1:20, index2 = 21:40, analysisName = "DESeq2") |
32 | 32 |
} |
... | ... |
@@ -94,7 +94,7 @@ used. |
94 | 94 |
} |
95 | 95 |
\examples{ |
96 | 96 |
data(scExample, package = "singleCellTK") |
97 |
-sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
97 |
+sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
98 | 98 |
sce <- runDESeq2(inSCE = sce, groupName1 = "Sample1", |
99 | 99 |
groupName2 = "Sample2", index1 = 1:20, index2 = 21:40, analysisName = "DESeq2") |
100 | 100 |
|
... | ... |
@@ -96,6 +96,6 @@ A wrapper function for \link[celda]{decontX}. Identify |
96 | 96 |
} |
97 | 97 |
\examples{ |
98 | 98 |
data(scExample, package = "singleCellTK") |
99 |
-sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
99 |
+sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
100 | 100 |
sce <- runDecontX(sce) |
101 | 101 |
} |
... | ... |
@@ -93,7 +93,7 @@ This function is a wrapper function for \link[scran]{doubletCells}. |
93 | 93 |
} |
94 | 94 |
\examples{ |
95 | 95 |
data(scExample, package = "singleCellTK") |
96 |
-sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
96 |
+sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
97 | 97 |
sce <- runDoubletCells(sce) |
98 | 98 |
} |
99 | 99 |
\references{ |
... | ... |
@@ -54,6 +54,6 @@ Uses doubletFinder to determine cells within the dataset |
54 | 54 |
} |
55 | 55 |
\examples{ |
56 | 56 |
data(scExample, package = "singleCellTK") |
57 |
-sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
57 |
+sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
58 | 58 |
sce <- runDoubletFinder(sce) |
59 | 59 |
} |
... | ... |
@@ -90,7 +90,7 @@ used. |
90 | 90 |
} |
91 | 91 |
\examples{ |
92 | 92 |
data(scExample, package = "singleCellTK") |
93 |
-sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
93 |
+sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
94 | 94 |
sce@assays@data$logcounts <- log10(counts(sce) + 1) |
95 | 95 |
sce <- runLimmaDE(inSCE = sce, groupName1 = "Sample1", |
96 | 96 |
groupName2 = "Sample2", index1 = 1:20, index2 = 21:40, analysisName = "Limma") |
... | ... |
@@ -90,7 +90,7 @@ used. |
90 | 90 |
} |
91 | 91 |
\examples{ |
92 | 92 |
data(scExample, package = "singleCellTK") |
93 |
-sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
93 |
+sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
94 | 94 |
sce@assays@data$logcounts <- log10(counts(sce) + 1) |
95 | 95 |
sce <- runMAST(inSCE = sce, groupName1 = "Sample1", |
96 | 96 |
groupName2 = "Sample2", index1 = 1:20, index2 = 21:40, analysisName = "MAST") |
... | ... |
@@ -138,6 +138,6 @@ A wrapper function that calls \code{scrub_doublets} from python |
138 | 138 |
} |
139 | 139 |
\examples{ |
140 | 140 |
data(scExample, package = "singleCellTK") |
141 |
-sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
141 |
+sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
142 | 142 |
sce <- runScrublet(sce) |
143 | 143 |
} |
... | ... |
@@ -24,6 +24,6 @@ Plot QC metrics generated from QC algorithms via either kable or csv file. |
24 | 24 |
} |
25 | 25 |
\examples{ |
26 | 26 |
data(scExample, package = "singleCellTK") |
27 |
-sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
27 |
+sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
28 | 28 |
sampleSummaryStats(sce, simple = TRUE) |
29 | 29 |
} |
... | ... |
@@ -2,7 +2,7 @@ |
2 | 2 |
library(singleCellTK) |
3 | 3 |
context("Testing decontamination algorithms") |
4 | 4 |
data(scExample, package = "singleCellTK") |
5 |
-sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
5 |
+sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
6 | 6 |
|
7 | 7 |
test_that(desc = "Testing runDecontX", { |
8 | 8 |
sceres <- runDecontX(sce) |
... | ... |
@@ -3,16 +3,16 @@ context("Testing mergeSCColData") |
3 | 3 |
|
4 | 4 |
data(scExample, package = "singleCellTK") |
5 | 5 |
|
6 |
-sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
6 |
+sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
7 | 7 |
|
8 | 8 |
colData(sce)$column_name = rownames(colData(sce)) |
9 | 9 |
test_that(desc = "Testing mergeSCEColData", { |
10 | 10 |
sce2 <- sce |
11 | 11 |
colData(sce2)$test <- 0 |
12 |
- |
|
12 |
+ |
|
13 | 13 |
#test again |
14 | 14 |
mergedsce <- mergeSCEColData(sce, sce2) |
15 |
- |
|
15 |
+ |
|
16 | 16 |
expect_equal(ncol(colData(sce)) + 1, ncol(colData(mergedsce))) |
17 | 17 |
}) |
18 | 18 |
|
... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
context("misc functions") |
2 | 2 |
|
3 | 3 |
data(scExample, package = "singleCellTK") |
4 |
-sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
4 |
+sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
5 | 5 |
|
6 | 6 |
test_that("summarizeSCE", { |
7 | 7 |
ta <- summarizeSCE(sce, sample = NULL) |
... | ... |
@@ -3,7 +3,7 @@ library(singleCellTK) |
3 | 3 |
context("Testing dimensionality reduction algorithms") |
4 | 4 |
data(scExample, package = "singleCellTK") |
5 | 5 |
sceDroplet <- sce |
6 |
-sce <- sce[, colData(sce)$type != 'EmptyDroplet'] |
|
6 |
+sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
7 | 7 |
sampleVector <- c(rep("Sample1", 100), rep("Sample2", 95)) |
8 | 8 |
sceres <- getUMAP(inSCE = sce, useAssay = "counts", logNorm = TRUE, sample = sampleVector, nNeighbors = 10, reducedDimName = "UMAP", |
9 | 9 |
nIterations = 20, alpha = 1, minDist = 0.01, pca = TRUE, initialDims = 20) |