Browse code

Edits to sampleSummaryStats function, add getter/setters

Yusuke Koga authored on 23/01/2022 16:15:55
Showing 9 changed files

... ...
@@ -36,6 +36,7 @@ export(getDEGTopTable)
36 36
 export(getGenesetNamesFromCollection)
37 37
 export(getMSigDBTable)
38 38
 export(getPathwayResultNames)
39
+export(getSampleSummaryStats)
39 40
 export(getSceParams)
40 41
 export(getTSNE)
41 42
 export(getTopHVG)
... ...
@@ -2,7 +2,7 @@
2 2
                                 simple = TRUE){
3 3
 
4 4
     metrics <- c("Number of Cells")
5
-    values <- c(ncol(inSCE))
5
+    values <- c(as.integer(ncol(inSCE)))
6 6
 
7 7
     if ("sum" %in% colnames(SummarizedExperiment::colData(inSCE))) {
8 8
         metrics <- c(metrics, "Mean counts", "Median counts")
... ...
@@ -107,7 +107,8 @@
107 107
 
108 108
 #' @title Generate table of SCTK QC outputs.
109 109
 #' @description  Creates a table of QC metrics generated from
110
-#'  QC algorithms via either kable or csv file.
110
+#'  QC algorithms, which is stored within the metadata slot of the
111
+#'  input SingleCellExperiment object.
111 112
 #' @param inSCE Input \linkS4class{SingleCellExperiment} object with saved
112 113
 #' \link{assay} data and/or \link{colData} data. Required.
113 114
 #' @param sample Character vector. Indicates which sample each cell belongs to.
... ...
@@ -116,11 +117,13 @@
116 117
 #' @param simple Boolean. Indicates whether to generate a table of only
117 118
 #' basic QC stats (ex. library size), or to generate a summary table of all
118 119
 #' QC stats stored in the inSCE.
119
-#' @return A matrix/array object.
120
+#' @return A SingleCellExperiment object with a summary table for QC statistics
121
+#' in the `sampleSummary` slot of metadata.
120 122
 #' @examples
121 123
 #' data(scExample, package = "singleCellTK")
122 124
 #' sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'")
123
-#' sampleSummaryStats(sce, simple = TRUE)
125
+#' sce <- sampleSummaryStats(sce, simple = TRUE)
126
+#' getSampleSummaryStats(sce)
124 127
 #' @importFrom magrittr %>%
125 128
 #' @export
126 129
 sampleSummaryStats <- function(inSCE,
... ...
@@ -179,5 +182,60 @@ sampleSummaryStats <- function(inSCE,
179 182
         return((signif(x,5)))
180 183
     })
181 184
 
182
-    return(dfTableRes)
185
+    inSCE <- setSampleSummaryStats(inSCE, slot = "sctk_qc",
186
+                                   stats = dfTableRes)
187
+    return(inSCE)
183 188
 }
189
+
190
+
191
+#' @title Store table of SCTK QC outputs to metadata. Executed within
192
+#' `sampleSummaryStats` function.
193
+#' @description  Stores a table of QC metrics generated from
194
+#'  QC algorithms within the metadata slot of the SingleCellExperiment object.
195
+#' @param inSCE Input \linkS4class{SingleCellExperiment} object with saved
196
+#' \link{assay} data and/or \link{colData} data. Required.
197
+#' @param slot A \code{character} value which specifies the
198
+#' desired slot to store the stats table within the metadata of
199
+#' the SingleCellExperiment object. Required.
200
+#' @param stats Input stats table that will be stored within the
201
+#' SingleCellExperiment object. Required.
202
+#' @return A SingleCellExperiment object with a summary table for QC statistics
203
+#' in the `sampleSummary` slot of metadata.
204
+setSampleSummaryStats <- function(inSCE,
205
+                                  slot,
206
+                                  stats){
207
+    inSCE@metadata$sctk$sampleSummary[[slot]] <- stats
208
+    return(inSCE)
209
+}
210
+
211
+#' @title Store table of SCTK QC outputs to metadata.
212
+#' @description  Stores a table of QC metrics generated from
213
+#'  QC algorithms within the metadata slot of the SingleCellExperiment object.
214
+#' @param inSCE Input \linkS4class{SingleCellExperiment} object with saved
215
+#' \link{assay} data and/or \link{colData} data. Required.
216
+#' @param slot A \code{character} value indicating the slot
217
+#' that stores the stats table within the metadata of the
218
+#' SingleCellExperiment object. Required.
219
+#' @return A matrix/array object. Contains a summary table for QC statistics
220
+#' generated from SingleCellTK.
221
+#' @export
222
+#' @examples
223
+#' data(scExample, package = "singleCellTK")
224
+#' sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'")
225
+#' sce <- runCellQC(sce)
226
+#' sce <- sampleSummaryStats(sce, simple = FALSE)
227
+#' getSampleSummaryStats(sce, slot = "sctk_qc")
228
+getSampleSummaryStats <- function(inSCE, slot){
229
+    if(missing(slot)){ # If slot parameter is missing, then return first element.
230
+        return(inSCE@metadata$sctk$sampleSummary[[1]])
231
+    }
232
+
233
+    if(!is.null(inSCE@metadata$sctk$sampleSummary[[slot]])){
234
+        return(inSCE@metadata$sctk$sampleSummary[[slot]])
235
+    }
236
+    else{
237
+        #Return error with message
238
+        stop("Please run the `sampleSummaryStats` function first to generate
239
+             the QC statistics summary table.")
240
+    }
241
+}
184 242
\ No newline at end of file
... ...
@@ -559,7 +559,7 @@ for(i in seq_along(process)) {
559 559
             for (name in names(metadata(mergedFilteredSCE))) {
560 560
                 if (name != "assayType") {
561 561
                     metadata(mergedFilteredSCE)[[name]] <- list(metadata(mergedFilteredSCE)[[name]])
562
-                    names(metadata(mergedFilteredSCE)[[name]]) <- samplename                    
562
+                    names(metadata(mergedFilteredSCE)[[name]]) <- samplename
563 563
                 }
564 564
             }
565 565
         }
... ...
@@ -596,7 +596,8 @@ for(i in seq_along(process)) {
596 596
             reportCellQC(inSCE = mergedFilteredSCE, output_dir = directory, output_file = paste0("SCTK_", samplename,'_cellQC.html'), subTitle = subTitle, studyDesign = studyDesign)
597 597
 
598 598
             ## generate QC metrics table for mergedFilteredSCE
599
-            QCsummary <- sampleSummaryStats(mergedFilteredSCE, simple=FALSE, sample = colData(mergedFilteredSCE)$sample) #colData(cellSCE)$Study_ID
599
+            mergedFilteredSCE <- sampleSummaryStats(mergedFilteredSCE, simple=FALSE, sample = colData(mergedFilteredSCE)$sample) #colData(cellSCE)$Study_ID
600
+            QCsummary <- getSampleSummaryStats(mergedFilteredSCE, slot = "sctk_qc")
600 601
             write.csv(QCsummary, file.path(directory,
601 602
                                            samplename,
602 603
                                            paste0("SCTK_", samplename,'_cellQC_summary.csv')))
... ...
@@ -647,7 +648,8 @@ for(i in seq_along(process)) {
647 648
                          samplename = samplename, writeYAML = TRUE,
648 649
                          skip = c("scrublet", "runDecontX", "runBarcodeRanksMetaOutput"))
649 650
 
650
-            QCsummary <- sampleSummaryStats(mergedFilteredSCE, simple=FALSE, sample = colData(mergedFilteredSCE)$sample) #colData(cellSCE)$Study_ID
651
+            mergedFilteredSCE <- sampleSummaryStats(mergedFilteredSCE, simple=FALSE, sample = colData(mergedFilteredSCE)$sample) #colData(cellSCE)$Study_ID
652
+            QCsummary <- getSampleSummaryStats(mergedFilteredSCE, slot = "sctk_qc")
651 653
             write.csv(QCsummary, file.path(directory,
652 654
                                            samplename,
653 655
                                            paste0("SCTK_", samplename,'_cellQC_summary.csv')))
... ...
@@ -688,7 +690,7 @@ if (!isTRUE(split)) {
688 690
                 if (name != "assayType") {
689 691
                     names(metadata(cellSCE)[[name]]) <- sample
690 692
                 }
691
-            }            
693
+            }
692 694
         }
693 695
 
694 696
         exportSCE(inSCE = dropletSCE, samplename = samplename, directory = directory, type = "Droplets", format=formats)
... ...
@@ -719,7 +721,8 @@ if (!isTRUE(split)) {
719 721
         }
720 722
 
721 723
         ## generate QC summary
722
-        QCsummary <- sampleSummaryStats(cellSCE, simple=FALSE, sample = colData(cellSCE)$sample)
724
+        cellSCE <- sampleSummaryStats(cellSCE, simple=FALSE, sample = colData(cellSCE)$sample)
725
+        QCsummary <- getSampleSummaryStats(cellSCE, slot = "sctk_qc")
723 726
         write.csv(QCsummary, file.path(directory,
724 727
                                        samplename,
725 728
                                        paste0("SCTK_", samplename,'_cellQC_summary.csv')))
... ...
@@ -733,7 +736,7 @@ if (!isTRUE(split)) {
733 736
             for (name in names(metadata(cellSCE))) {
734 737
               if (name != "assayType") {
735 738
                   metadata(cellSCE)[[name]] <- list(metadata(cellSCE)[[name]])
736
-                  names(metadata(cellSCE)[[name]]) <- samplename                
739
+                  names(metadata(cellSCE)[[name]]) <- samplename
737 740
               }
738 741
             }
739 742
         } else {
... ...
@@ -744,7 +747,7 @@ if (!isTRUE(split)) {
744 747
                 if (name != "assayType") { ### not important and hard to force name. Skipped
745 748
                     names(metadata(cellSCE)[[name]]) <- sample
746 749
                 }
747
-            }            
750
+            }
748 751
         }
749 752
 
750 753
         exportSCE(inSCE = cellSCE, samplename = samplename, directory = directory, type = "Cells", format=formats)
... ...
@@ -767,7 +770,8 @@ if (!isTRUE(split)) {
767 770
         reportCellQC(inSCE = cellSCE, output_dir = directory, output_file = paste0("SCTK_", samplename,'_cellQC.html'), subTitle = subTitle, studyDesign = studyDesign)
768 771
         getSceParams(inSCE = cellSCE, directory = directory, samplename = samplename, writeYAML = TRUE)
769 772
 
770
-        QCsummary <- sampleSummaryStats(cellSCE, simple=FALSE, sample = colData(cellSCE)$sample)
773
+        cellSCE <- sampleSummaryStats(cellSCE, simple=FALSE, sample = colData(cellSCE)$sample)
774
+        QCsummary <- getSampleSummaryStats(cellSCE, slot = "sctk_qc")
771 775
         write.csv(QCsummary, file.path(directory,
772 776
                                        samplename,
773 777
                                        paste0("SCTK_", samplename,'_cellQC_summary.csv')))
... ...
@@ -139,10 +139,11 @@ description_runPerCellQC <- descriptionRunPerCellQC()
139 139
 ```{r "SummaryStats", echo = FALSE, results="asis", fig.align="center", warning=FALSE, message=FALSE}
140 140
 cat("\n")
141 141
 
142
-summaryTable <- sampleSummaryStats(inSCE = sce.qc, 
142
+sce.qc <- sampleSummaryStats(inSCE = sce.qc, 
143 143
                                    sample = sceSample,
144 144
                                    simple = FALSE)
145 145
 
146
+summaryTable <- getSampleSummaryStats(sce.qc, slot = "sctk_qc")
146 147
 cat(apply(summaryTable, 1:2, as.character) %>% 
147 148
     knitr::kable(format = "html") %>%
148 149
       kableExtra::kable_styling() %>%
149 150
new file mode 100644
... ...
@@ -0,0 +1,31 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/sampleSummaryStats.R
3
+\name{getSampleSummaryStats}
4
+\alias{getSampleSummaryStats}
5
+\title{Store table of SCTK QC outputs to metadata.}
6
+\usage{
7
+getSampleSummaryStats(inSCE, slot)
8
+}
9
+\arguments{
10
+\item{inSCE}{Input \linkS4class{SingleCellExperiment} object with saved
11
+\link{assay} data and/or \link{colData} data. Required.}
12
+
13
+\item{slot}{A \code{character} value indicating the slot
14
+that stores the stats table within the metadata of the
15
+SingleCellExperiment object. Required.}
16
+}
17
+\value{
18
+A matrix/array object. Contains a summary table for QC statistics
19
+generated from SingleCellTK.
20
+}
21
+\description{
22
+Stores a table of QC metrics generated from
23
+ QC algorithms within the metadata slot of the SingleCellExperiment object.
24
+}
25
+\examples{
26
+data(scExample, package = "singleCellTK")
27
+sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'")
28
+sce <- runCellQC(sce)
29
+sce <- sampleSummaryStats(sce, simple = FALSE)
30
+getSampleSummaryStats(sce, slot = "sctk_qc")
31
+}
... ...
@@ -20,14 +20,17 @@ basic QC stats (ex. library size), or to generate a summary table of all
20 20
 QC stats stored in the inSCE.}
21 21
 }
22 22
 \value{
23
-A matrix/array object.
23
+A SingleCellExperiment object with a summary table for QC statistics
24
+in the `sampleSummary` slot of metadata.
24 25
 }
25 26
 \description{
26 27
 Creates a table of QC metrics generated from
27
- QC algorithms via either kable or csv file.
28
+ QC algorithms, which is stored within the metadata slot of the
29
+ input SingleCellExperiment object.
28 30
 }
29 31
 \examples{
30 32
 data(scExample, package = "singleCellTK")
31 33
 sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'")
32
-sampleSummaryStats(sce, simple = TRUE)
34
+sce <- sampleSummaryStats(sce, simple = TRUE)
35
+getSampleSummaryStats(sce)
33 36
 }
34 37
new file mode 100644
... ...
@@ -0,0 +1,28 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/sampleSummaryStats.R
3
+\name{setSampleSummaryStats}
4
+\alias{setSampleSummaryStats}
5
+\title{Store table of SCTK QC outputs to metadata. Executed within
6
+`sampleSummaryStats` function.}
7
+\usage{
8
+setSampleSummaryStats(inSCE, slot, stats)
9
+}
10
+\arguments{
11
+\item{inSCE}{Input \linkS4class{SingleCellExperiment} object with saved
12
+\link{assay} data and/or \link{colData} data. Required.}
13
+
14
+\item{slot}{A \code{character} value which specifies the
15
+desired slot to store the stats table within the metadata of
16
+the SingleCellExperiment object. Required.}
17
+
18
+\item{stats}{Input stats table that will be stored within the
19
+SingleCellExperiment object. Required.}
20
+}
21
+\value{
22
+A SingleCellExperiment object with a summary table for QC statistics
23
+in the `sampleSummary` slot of metadata.
24
+}
25
+\description{
26
+Stores a table of QC metrics generated from
27
+ QC algorithms within the metadata slot of the SingleCellExperiment object.
28
+}
... ...
@@ -13,8 +13,12 @@ test_that("summarizeSCE", {
13 13
 
14 14
 test_that(desc = "Testing sampleSummaryStats", {
15 15
   data(scExample)
16
-  stats <- sampleSummaryStats(sce, simple = FALSE)
17
-  expect_true("matrix" %in% class(stats))
16
+  sce <- sampleSummaryStats(sce, simple = FALSE)
17
+  expect_true("sctk" %in% names(metadata(sce)))
18
+  expect_is(metadata(sce)$sctk$sample_summary$sctk_qc,
19
+            "matrix")
20
+  expect_is(getSampleSummaryStats(sce, slot = "sctk_qc"),
21
+            "matrix")
18 22
 })
19 23
 
20 24
 
... ...
@@ -131,16 +131,18 @@ reducedDims(pbmc.combined)
131 131
 
132 132
 #### Generating a summary statistic table
133 133
 
134
-The function `sampleSummaryStats()` may be used to generate a table containing the mean and median of the data per sample. 
134
+The function `sampleSummaryStats()` may be used to generate a table containing the mean and median of the data per sample, which is stored within the `sctk_qc` table under `metadata`. The table can then be returned using `getSampleSummaryStats`.
135 135
 
136 136
 ```{r sampleSummaryStats}
137
-sampleSummaryStats(pbmc.combined, sample = sample.vector)
137
+pbmc.combined <- sampleSummaryStats(pbmc.combined, sample = sample.vector)
138
+getSampleSummaryStats(pbmc.combined, slot = "sctk_qc")
138 139
 ```
139 140
 
140 141
 If users choose to generate a table for all QC metrics generated through `runCellQC()`, they may set the `simple` parameter to `FALSE`.
141 142
 
142 143
 ```{r sampleSummaryStatsQC}
143
-sampleSummaryStats(pbmc.combined, sample = sample.vector, simple = FALSE)
144
+pbmc.combined <- sampleSummaryStats(pbmc.combined, sample = sample.vector, simple = FALSE)
145
+getSampleSummaryStats(pbmc.combined, slot = "sctk_qc")
144 146
 ```
145 147
 
146 148
 ### Running individual QC methods