Browse code

Switching now deprecated doubletCells to scDblFinder. Making appropriate changes

unknown authored on 23/10/2020 16:38:53
Showing 7 changed files

... ...
@@ -105,7 +105,8 @@ Imports:
105 105
     yaml,
106 106
     rmarkdown,
107 107
     magrittr,
108
-    kableExtra
108
+    kableExtra,
109
+    scDblFinder
109 110
 RoxygenNote: 7.1.1
110 111
 Suggests:
111 112
     testthat,
... ...
@@ -1116,7 +1116,6 @@ plotDoubletFinderResults <- function(inSCE,
1116 1116
 #'  Default TRUE.
1117 1117
 #' @param dots Boolean. If TRUE, will plot dots for each violin plot.
1118 1118
 #'  Default TRUE.
1119
-#' @param logScore Boolean. If TRUE, the log normalized doublet score will be used.
1120 1119
 #' @param reducedDimName Saved dimension reduction name in the
1121 1120
 #' \linkS4class{SingleCellExperiment} object. Required.
1122 1121
 #' @param xlab Character vector. Label for x-axis. Default NULL.
... ...
@@ -1173,7 +1172,6 @@ plotDoubletCellsResults <- function(inSCE,
1173 1172
                                     violin=TRUE,
1174 1173
                                     boxplot=FALSE,
1175 1174
                                     dots=TRUE,
1176
-                                    logScore=TRUE,
1177 1175
                                     reducedDimName=NULL,
1178 1176
                                     xlab=NULL,
1179 1177
                                     ylab=NULL,
... ...
@@ -1212,13 +1210,9 @@ plotDoubletCellsResults <- function(inSCE,
1212 1210
   }
1213 1211
   sampleVector <- sample
1214 1212
 
1215
-  if (logScore) {
1216
-    coldata = "scran_doubletCells_score_log10"
1217
-    titleDoubletCells <- "DoubletCells Doublet Score, log10"
1218
-  } else {
1219
-    coldata = "scran_doubletCells_score"
1220
-    titleDoubletCells <- "DoubletCells Doublet Score"
1221
-  }
1213
+
1214
+  coldata = "scran_doubletCells_score"
1215
+  titleDoubletCells <- "DoubletCells Doublet Score"
1222 1216
 
1223 1217
   samples <- unique(sample)
1224 1218
   if (length(samples) > 1) {
... ...
@@ -1,81 +1,60 @@
1
-.runDoubletCells <- function(cell.matrix = cell.matrix,
2
-                              k = k,
3
-                              nIters = nIters,
4
-                              size.factors.norm = NULL,
5
-                              size.factors.content = NULL,
6
-                              subset.row = NULL,
7
-                              block = 10000,
8
-                              d = 50,
9
-                              force.match=FALSE,
10
-                              force.k=20,
11
-                              force.ndist=3,
12
-                              BNPARAM=BNPARAM,
13
-                              BSPARAM=BSPARAM,
14
-                              BPPARAM=BPPARAM
15
-                              ) {
16
-
17
-  cell.matrix <- .convertToMatrix(cell.matrix)
18
-
19
-  scores <- matrix(scran::doubletCells(cell.matrix, k = k,
20
-                                       niters = nIters,
21
-                                       size.factors.norm = NULL,
22
-                                       size.factors.content = NULL,
23
-                                       subset.row = NULL,
24
-                                       block = 10000,
25
-                                       d = 50,
26
-                                       force.match=FALSE,
27
-                                       force.k=20,
28
-                                       force.ndist=3,
29
-                                       BNPARAM=BNPARAM,
30
-                                       BSPARAM=BSPARAM,
31
-                                       BPPARAM=BPPARAM
32
-                                       ), ncol=1)
33
-  scores <- cbind(scores,log10(scores[,1]+1))
34
-  colnames(scores) <- c("scran_doubletCells_score", "scran_doubletCells_score_log10")
35
-
36
-
37
-  return(scores)
38
-}
39
-
40
-
41
-#' @title Detect doublet cells using \link[scran]{doubletCells}.
42
-#' @description A wrapper function for \link[scran]{doubletCells}. Identify
1
+# .runDoubletCells <- function(cell.matrix = cell.matrix,
2
+#                               k = k,
3
+#                               nIters = nIters,
4
+#                               size.factors.norm = NULL,
5
+#                               size.factors.content = NULL,
6
+#                               subset.row = NULL,
7
+#                               block = 10000,
8
+#                               d = 50,
9
+#                               force.match=FALSE,
10
+#                               force.k=20,
11
+#                               force.ndist=3,
12
+#                               BNPARAM=BNPARAM,
13
+#                               BSPARAM=BSPARAM,
14
+#                               BPPARAM=BPPARAM
15
+#                               ) {
16
+# 
17
+#   cell.matrix <- .convertToMatrix(cell.matrix)
18
+# 
19
+#   scores <- matrix(scran::doubletCells(cell.matrix, k = k,
20
+#                                        niters = nIters,
21
+#                                        size.factors.norm = NULL,
22
+#                                        size.factors.content = NULL,
23
+#                                        subset.row = NULL,
24
+#                                        block = 10000,
25
+#                                        d = 50,
26
+#                                        force.match=FALSE,
27
+#                                        force.k=20,
28
+#                                        force.ndist=3,
29
+#                                        BNPARAM=BNPARAM,
30
+#                                        BSPARAM=BSPARAM,
31
+#                                        BPPARAM=BPPARAM
32
+#                                        ), ncol=1)
33
+#   scores <- cbind(scores,log10(scores[,1]+1))
34
+#   colnames(scores) <- c("scran_doubletCells_score", "scran_doubletCells_score_log10")
35
+# 
36
+# 
37
+#   return(scores)
38
+# }
39
+# 
40
+
41
+#' @title Detect doublet cells using \link[scDblFinder]{scDblFinder}.
42
+#' @description A wrapper function for \link[scDblFinder]{scDblFinder}. Identify
43 43
 #'  potential doublet cells based on simulations of putative doublet expression
44 44
 #'  profiles. Generate a doublet score for each cell.
45 45
 #' @param inSCE A \link[SingleCellExperiment]{SingleCellExperiment} object.
46 46
 #' @param sample Character vector. Indicates which sample each cell belongs to.
47
-#'  \link[scran]{doubletCells} will be run on cells from each sample separately.
47
+#'  \link[scDblFinder]{scDblFinder} will be run on cells from each sample separately.
48 48
 #' @param useAssay  A string specifying which assay in the SCE to use.
49 49
 #' @param nNeighbors Number of nearest neighbors used to calculate density for
50 50
 #'  doublet detection. Default 50.
51 51
 #' @param simDoublets Number of simulated doublets created for doublet
52 52
 #'  detection. Default 10000.
53 53
 #' @param seed Seed for the random number generator. Default 12345.
54
-#' @param size.factors.norm A numeric vector of size factors for normalization
55
-#'  of \code{x} prior to PCA and distance calculations. If \code{NULL}, defaults
56
-#'  to size factors derived from the library sizes of \code{x}. For the SingleCellExperiment
57
-#'  method, the default values are taken from \code{\link{sizeFactors}(x)}, if they are available.
58
-#' @param size.factors.content A numeric vector of size factors for RNA content
59
-#'  normalization of \code{x} prior to simulating doublets. #' This is orthogonal to
60
-#'  the values in \code{size.factors.norm}
61
-#' @param subset.row See \code{?"\link{scran-gene-selection}"}.
62
-#' @param block An integer scalar controlling the rate of doublet generation,
63
-#'  to keep memory usage low.
64
-#' @param d An integer scalar specifying the number of components to retain after the PCA.
65
-#' @param force.match A logical scalar indicating whether remapping of simulated
66
-#'  doublets to original cells should be performed.
67
-#' @param force.k An integer scalar specifying the number of neighbours to use for
68
-#'  remapping if \code{force.match=TRUE}.
69
-#' @param force.ndist A numeric scalar specifying the bandwidth for remapping
70
-#'  if \code{force.match=TRUE}.
71
-#' @param BNPARAM A \code{\link[BiocNeighbors]{BiocNeighborParam}} object specifying the nearest neighbor algorithm.
72
-#' This should be an algorithm supported by \code{\link[BiocNeighbors]{findNeighbors}}.
73
-#' @param BSPARAM A \code{\link[BiocSingular]{BiocSingularParam}} object specifying the algorithm to
74
-#'  use for PCA, if \code{d} is not \code{NA}.
75 54
 #' @param BPPARAM A \code{\link{BiocParallelParam}} object specifying whether the
76 55
 #'  neighbour searches should be parallelized.
77
-#' @details This function is a wrapper function for \link[scran]{doubletCells}.
78
-#'  \code{runDoubletCells} runs \link[scran]{doubletCells} for each
56
+#' @details This function is a wrapper function for \link[scDblFinder]{scDblFinder}.
57
+#'  \code{runDoubletCells} runs \link[scDblFinder]{scDblFinder} for each
79 58
 #'  \code{sample} within \code{inSCE} iteratively. The
80 59
 #'  resulting doublet scores for all cells will be appended to the
81 60
 #'  \link{colData} of \code{inSCE}.
... ...
@@ -98,16 +77,16 @@ runDoubletCells <- function(inSCE,
98 77
     nNeighbors = 50,
99 78
     simDoublets = max(10000, ncol(inSCE)),
100 79
     seed = 12345,
101
-    size.factors.norm = NULL,
102
-    size.factors.content = NULL,
103
-    subset.row = NULL,
104
-    block = 10000,
105
-    d = 50,
106
-    force.match=FALSE,
107
-    force.k=20,
108
-    force.ndist=3,
109
-    BNPARAM=BiocNeighbors::KmknnParam(),
110
-    BSPARAM=BiocSingular::bsparam(),
80
+    # size.factors.norm = NULL,
81
+    # size.factors.content = NULL,
82
+    # subset.row = NULL,
83
+    # block = 10000,
84
+    # d = 50,
85
+    # force.match=FALSE,
86
+    # force.k=20,
87
+    # force.ndist=3,
88
+    # BNPARAM=BiocNeighbors::KmknnParam(),
89
+    # BSPARAM=BiocSingular::bsparam(),
111 90
     BPPARAM=BiocParallel::SerialParam()
112 91
 ) {
113 92
   #argsList <- as.list(formals(fun = sys.function(sys.parent()), envir = parent.frame()))
... ...
@@ -124,45 +103,54 @@ runDoubletCells <- function(inSCE,
124 103
   message(paste0(date(), " ... Running 'doubletCells'"))
125 104
 
126 105
   ## Define result matrix for all samples
127
-  output <- S4Vectors::DataFrame(row.names = colnames(inSCE),
128
-            scran_doubletCells_score = numeric(ncol(inSCE)),
129
-            scran_doubletCells_score_log10 = numeric(ncol(inSCE)))
106
+  # output <- S4Vectors::DataFrame(row.names = colnames(inSCE),
107
+  #           scran_doubletCells_score = numeric(ncol(inSCE)),
108
+  #           scran_doubletCells_score_log10 = numeric(ncol(inSCE)))
130 109
 
131 110
   ## Loop through each sample and run barcodeRank
132
-  samples <- unique(sample)
133
-  for (i in seq_len(length(samples))) {
134
-    sceSampleInd <- sample == samples[i]
135
-    sceSample <- inSCE[, sceSampleInd]
136
-
137
-    mat <- SummarizedExperiment::assay(sceSample, i = useAssay)
138
-
139
-    result <- withr::with_seed(seed,
140
-              .runDoubletCells(cell.matrix = mat,
141
-                               k = nNeighbors,
142
-                               nIters = simDoublets,
143
-                               size.factors.norm = NULL,
144
-                               size.factors.content = NULL,
145
-                               subset.row = NULL,
146
-                               block = 10000,
147
-                               d = 50,
148
-                               force.match=FALSE,
149
-                               force.k=20,
150
-                               force.ndist=3,
151
-                               BNPARAM=BNPARAM,
152
-                               BSPARAM=BSPARAM,
153
-                               BPPARAM=BPPARAM
154
-                               ))
155
-
156
-    output[sceSampleInd, ] <- result
157
-  }
111
+  #samples <- unique(sample)
112
+  
113
+  inSCE <- withr::with_seed(seed,
114
+                            scDblFinder::scDblFinder(sce = inSCE,
115
+                            samples = sample,
116
+                            artificialDoublets = simDoublets,
117
+                            k = nNeighbors,
118
+                            verbose = FALSE
119
+                            ))
120
+  names(SummarizedExperiment::colData(inSCE)) <- gsub(pattern = "scDblFinder\\.",
121
+                                                      "scran_doubletCells_",
122
+                                                      names(SummarizedExperiment::colData(inSCE)))
123
+  
124
+  # for (i in seq_len(length(samples))) {
125
+  #   sceSampleInd <- sample == samples[i]
126
+  #   sceSample <- inSCE[, sceSampleInd]
127
+  # 
128
+  #   mat <- SummarizedExperiment::assay(sceSample, i = useAssay)
129
+  # 
130
+  #   result <- withr::with_seed(seed,
131
+  #             .runDoubletCells(cell.matrix = mat,
132
+  #                              k = nNeighbors,
133
+  #                              nIters = simDoublets,
134
+  #                              size.factors.norm = NULL,
135
+  #                              size.factors.content = NULL,
136
+  #                              subset.row = NULL,
137
+  #                              block = 10000,
138
+  #                              d = 50,
139
+  #                              force.match=FALSE,
140
+  #                              force.k=20,
141
+  #                              force.ndist=3,
142
+  #                              BNPARAM=BNPARAM,
143
+  #                              BSPARAM=BSPARAM,
144
+  #                              BPPARAM=BPPARAM
145
+  #                              ))
146
+  # 
147
+  #   output[sceSampleInd, ] <- result
148
+  # }
149
+
150
+  argsList <- argsList[!names(argsList) %in% c("BPPARAM")]
158 151
 
159
-  argsList <- argsList[!names(argsList) %in% c("BNPARAM","BSPARAM","BPPARAM")]
160
-  #dotList <- list(...)
161
-  #dotList <- dotList[!names(dotList) %in% c("BNPARAM","BSPARAM","BPPARAM")]
162
-  #argsList <- c(argsList, dotList)
163 152
   inSCE@metadata$runDoubletCells <- argsList[-1]
164
-  inSCE@metadata$runDoubletCells$packageVersion <- utils::packageDescription("scran")$Version
165
-  colData(inSCE) = cbind(colData(inSCE), output)
153
+  inSCE@metadata$runDoubletCells$packageVersion <- utils::packageDescription("scDblFinder")$Version
166 154
 
167 155
   return(inSCE)
168 156
 }
... ...
@@ -359,7 +359,7 @@ description_DoubletCells<- descriptionDoubletCells()
359 359
 i="DoubletCells"
360 360
 cat(paste0('## ', i, ' \n'))
361 361
 
362
-doubletCellData <- c("scran_doubletCells_score_log10")
362
+doubletCellData <- c("scran_doubletCells_score")
363 363
 skipDoubletCell <- any(!doubletCellData %in% names(colData(sce.qc)))
364 364
 
365 365
 if (skipDoubletCell) {
... ...
@@ -13,7 +13,6 @@ plotDoubletCellsResults(
13 13
   violin = TRUE,
14 14
   boxplot = FALSE,
15 15
   dots = TRUE,
16
-  logScore = TRUE,
17 16
   reducedDimName = NULL,
18 17
   xlab = NULL,
19 18
   ylab = NULL,
... ...
@@ -67,8 +66,6 @@ Default TRUE.}
67 66
 \item{dots}{Boolean. If TRUE, will plot dots for each violin plot.
68 67
 Default TRUE.}
69 68
 
70
-\item{logScore}{Boolean. If TRUE, the log normalized doublet score will be used.}
71
-
72 69
 \item{reducedDimName}{Saved dimension reduction name in the
73 70
 \linkS4class{SingleCellExperiment} object. Required.}
74 71
 
... ...
@@ -2,7 +2,7 @@
2 2
 % Please edit documentation in R/scran_doubletCells.R
3 3
 \name{runDoubletCells}
4 4
 \alias{runDoubletCells}
5
-\title{Detect doublet cells using \link[scran]{doubletCells}.}
5
+\title{Detect doublet cells using \link[scDblFinder]{scDblFinder}.}
6 6
 \usage{
7 7
 runDoubletCells(
8 8
   inSCE,
... ...
@@ -11,16 +11,6 @@ runDoubletCells(
11 11
   nNeighbors = 50,
12 12
   simDoublets = max(10000, ncol(inSCE)),
13 13
   seed = 12345,
14
-  size.factors.norm = NULL,
15
-  size.factors.content = NULL,
16
-  subset.row = NULL,
17
-  block = 10000,
18
-  d = 50,
19
-  force.match = FALSE,
20
-  force.k = 20,
21
-  force.ndist = 3,
22
-  BNPARAM = BiocNeighbors::KmknnParam(),
23
-  BSPARAM = BiocSingular::bsparam(),
24 14
   BPPARAM = BiocParallel::SerialParam()
25 15
 )
26 16
 }
... ...
@@ -28,7 +18,7 @@ runDoubletCells(
28 18
 \item{inSCE}{A \link[SingleCellExperiment]{SingleCellExperiment} object.}
29 19
 
30 20
 \item{sample}{Character vector. Indicates which sample each cell belongs to.
31
-\link[scran]{doubletCells} will be run on cells from each sample separately.}
21
+\link[scDblFinder]{scDblFinder} will be run on cells from each sample separately.}
32 22
 
33 23
 \item{useAssay}{A string specifying which assay in the SCE to use.}
34 24
 
... ...
@@ -40,37 +30,6 @@ detection. Default 10000.}
40 30
 
41 31
 \item{seed}{Seed for the random number generator. Default 12345.}
42 32
 
43
-\item{size.factors.norm}{A numeric vector of size factors for normalization
44
-of \code{x} prior to PCA and distance calculations. If \code{NULL}, defaults
45
-to size factors derived from the library sizes of \code{x}. For the SingleCellExperiment
46
-method, the default values are taken from \code{\link{sizeFactors}(x)}, if they are available.}
47
-
48
-\item{size.factors.content}{A numeric vector of size factors for RNA content
49
-normalization of \code{x} prior to simulating doublets. #' This is orthogonal to
50
-the values in \code{size.factors.norm}}
51
-
52
-\item{subset.row}{See \code{?"\link{scran-gene-selection}"}.}
53
-
54
-\item{block}{An integer scalar controlling the rate of doublet generation,
55
-to keep memory usage low.}
56
-
57
-\item{d}{An integer scalar specifying the number of components to retain after the PCA.}
58
-
59
-\item{force.match}{A logical scalar indicating whether remapping of simulated
60
-doublets to original cells should be performed.}
61
-
62
-\item{force.k}{An integer scalar specifying the number of neighbours to use for
63
-remapping if \code{force.match=TRUE}.}
64
-
65
-\item{force.ndist}{A numeric scalar specifying the bandwidth for remapping
66
-if \code{force.match=TRUE}.}
67
-
68
-\item{BNPARAM}{A \code{\link[BiocNeighbors]{BiocNeighborParam}} object specifying the nearest neighbor algorithm.
69
-This should be an algorithm supported by \code{\link[BiocNeighbors]{findNeighbors}}.}
70
-
71
-\item{BSPARAM}{A \code{\link[BiocSingular]{BiocSingularParam}} object specifying the algorithm to
72
-use for PCA, if \code{d} is not \code{NA}.}
73
-
74 33
 \item{BPPARAM}{A \code{\link{BiocParallelParam}} object specifying whether the
75 34
 neighbour searches should be parallelized.}
76 35
 }
... ...
@@ -80,13 +39,13 @@ A \link[SingleCellExperiment]{SingleCellExperiment} object with the
80 39
  \link{colData} slot.
81 40
 }
82 41
 \description{
83
-A wrapper function for \link[scran]{doubletCells}. Identify
42
+A wrapper function for \link[scDblFinder]{scDblFinder}. Identify
84 43
  potential doublet cells based on simulations of putative doublet expression
85 44
  profiles. Generate a doublet score for each cell.
86 45
 }
87 46
 \details{
88
-This function is a wrapper function for \link[scran]{doubletCells}.
89
- \code{runDoubletCells} runs \link[scran]{doubletCells} for each
47
+This function is a wrapper function for \link[scDblFinder]{scDblFinder}.
48
+ \code{runDoubletCells} runs \link[scDblFinder]{scDblFinder} for each
90 49
  \code{sample} within \code{inSCE} iteratively. The
91 50
  resulting doublet scores for all cells will be appended to the
92 51
  \link{colData} of \code{inSCE}.
... ...
@@ -51,7 +51,7 @@ test_that(desc = "Testing plotSCEViolin functions", {
51 51
 sceres <- sceres[, colData(sceres)$type != 'EmptyDroplet']
52 52
 sceres <- runCellQC(sceres, algorithms = c("QCMetrics", "cxds", "bcds", "cxds_bcds_hybrid",
53 53
                                               "doubletFinder", "decontX"))
54
-sceres <- runDoubletCells(sceres, size.factors.norm = rep(1, ncol(sceres)))
54
+sceres <- runDoubletCells(sceres)
55 55
 
56 56
 
57 57
 context("Testing QC functions")