Browse code

New function: diffAbundanceFET, plotClusterAbundance

Yichen Wang authored on 13/05/2021 21:09:54
Showing 5 changed files

... ...
@@ -10,6 +10,7 @@ export(convertGeneIDs)
10 10
 export(convertSCEToSeurat)
11 11
 export(convertSeuratToSCE)
12 12
 export(dedupRowNames)
13
+export(diffAbundanceFET)
13 14
 export(discreteColorPalette)
14 15
 export(distinctColors)
15 16
 export(downSampleCells)
... ...
@@ -63,6 +64,7 @@ export(plotBarcodeRankScatter)
63 64
 export(plotBatchVariance)
64 65
 export(plotBcdsResults)
65 66
 export(plotBiomarker)
67
+export(plotClusterAbundance)
66 68
 export(plotCxdsResults)
67 69
 export(plotDEGHeatmap)
68 70
 export(plotDEGRegression)
69 71
new file mode 100644
... ...
@@ -0,0 +1,173 @@
1
+#' Calculate Differential Abundance with FET
2
+#' @details This function will calculate the cell counting and fraction by
3
+#' dividing all cells to groups specified by the arguments, together with
4
+#' statistical summary by performing Fisher Exact Tests (FET).
5
+#' @param inSCE A \code{\link[SingleCellExperiment]{SingleCellExperiment}}
6
+#' object.
7
+#' @param cluster A single \code{character}, specifying the name to store the
8
+#' cluster label in \code{\link{colData}}.
9
+#' @param variable A single \code{character}, specifying the name to store the
10
+#' phenotype labels in \code{\link{colData}}.
11
+#' @param control \code{character}. Specifying one or more categories that can
12
+#' be found in the vector specified by \code{variable}.
13
+#' @param case \code{character}. Specifying one or more categories that can
14
+#' be found in the vector specified by \code{variable}.
15
+#' @param analysisName A single \code{character}. Will be used for naming the
16
+#' result table, which will be saved in metadata slot.
17
+#' @return The original \code{\link[SingleCellExperiment]{SingleCellExperiment}}
18
+#' object with \code{metadata(inSCE)} updated with a list
19
+#' \code{diffAbundanceFET}, containing a new \code{data.frame} for the analysis
20
+#' result, named by \code{analysisName}. The \code{data.frame} contains columns
21
+#' for number and fraction of cells that belong to different cases, as well as
22
+#' "Odds_Ratio", "PValue" and "FDR".
23
+#' @export
24
+#' @examples
25
+#' data("mouseBrainSubsetSCE", package = "singleCellTK")
26
+#' mouseBrainSubsetSCE <- diffAbundanceFET(inSCE = mouseBrainSubsetSCE,
27
+#'                                                 cluster = "tissue",
28
+#'                                                 variable = "level1class",
29
+#'                                                 case = "oligodendrocytes",
30
+#'                                                 control = "microglia",
31
+#'                                                 analysisName = "diffAbundFET")
32
+diffAbundanceFET <- function(inSCE, cluster, variable, control, case,
33
+                             analysisName) {
34
+  if (!inherits(inSCE, "SingleCellExperiment")) {
35
+    stop("'inSCE' should be a SingleCellExperiment object.")
36
+  }
37
+  if (is.null(cluster) ||
38
+      !cluster %in% names(SummarizedExperiment::colData(inSCE))) {
39
+    stop("Argument 'cluster' should be a column name in colData(inSCE).")
40
+  }
41
+  if (is.null(variable) ||
42
+      !variable %in% names(SummarizedExperiment::colData(inSCE))) {
43
+    stop("Argument 'variable' should be a column name in colData(inSCE).")
44
+  }
45
+
46
+  cluster <- SummarizedExperiment::colData(inSCE)[, cluster]
47
+  variable <- SummarizedExperiment::colData(inSCE)[, variable]
48
+  if (!all(control %in% variable)) {
49
+    nf1 <- control[which(!control %in% variable)]
50
+    stop("Given 'control' not all found in 'variable': ",
51
+         paste(nf1, collapse = ", "))
52
+  }
53
+  if (!all(case %in% variable)) {
54
+    nf2 <- case[which(!case %in% variable)]
55
+    stop("Given 'case' not all found in 'variable': ",
56
+         paste(nf2, collapse = ", "))
57
+  }
58
+  if (any(control %in% case)) {
59
+    warning("Overlapping variables found in 'control' and 'case'")
60
+  }
61
+
62
+  control.lab <- paste(control, collapse=",")
63
+  case.lab <- paste(case, collapse=",")
64
+  label <- rep(NA, length(variable))
65
+  label[variable %in% case] <- case.lab
66
+  label[variable %in% control] <- control.lab
67
+  label <- factor(label, levels=c(control.lab, case.lab))
68
+
69
+  res <- matrix(NA, nrow=length(unique(cluster)), ncol=10)
70
+  cluster.label <- sort(unique(cluster))
71
+  for(i in seq_along(cluster.label)) {
72
+    cluster.factor <- factor(ifelse(cluster %in% cluster.label[i],
73
+                                    cluster.label[i], "Other"),
74
+                             levels=c(i, "Other"))
75
+
76
+    ta <- table(cluster.factor, label)
77
+    ta.p <- prop.table(ta, 2)
78
+    fet <- stats::fisher.test(ta)
79
+    res[i,] <- c(ta, ta.p, fet$estimate, fet$p.value)
80
+  }
81
+  colnames(res) <- c(paste0("Number of cells in cluster and in ",
82
+                            control.lab),
83
+                     paste0("Number of cells NOT in cluster and in ",
84
+                            control.lab),
85
+                     paste0("Number of cells in cluster and in ",
86
+                            case.lab),
87
+                     paste0("Number of cells NOT in cluster and in ",
88
+                            case.lab),
89
+                     paste0("Fraction of cells in cluster and in ",
90
+                            control.lab),
91
+                     paste0("Fraction of cells NOT in cluster and in ",
92
+                            control.lab),
93
+                     paste0("Fraction of cells in cluster and in ",
94
+                            case.lab),
95
+                     paste0("Fraction of cells NOT in cluster and in ",
96
+                            case.lab),
97
+                     "Odds_Ratio", "Pvalue")
98
+  res <- data.frame(Cluster=cluster.label, res,
99
+                    FDR=stats::p.adjust(res[,"Pvalue"], 'fdr'),
100
+                    check.names=FALSE)
101
+  if (!"diffAbundanceFET" %in% names(S4Vectors::metadata(inSCE))) {
102
+    S4Vectors::metadata(inSCE)$diffAbundanceFET <- list()
103
+  }
104
+  S4Vectors::metadata(inSCE)$diffAbundanceFET[[analysisName]] <- res
105
+
106
+  return(inSCE)
107
+}
108
+
109
+#' Plot the differential Abundance
110
+#' @details This function will visualize the differential abundance in two given
111
+#' variables, by making bar plots that presents the cell counting and fraction
112
+#' in different cases.
113
+#' @param inSCE A \code{\link[SingleCellExperiment]{SingleCellExperiment}}
114
+#' object.
115
+#' @param cluster A single \code{character}, specifying the name to store the
116
+#' cluster label in \code{\link{colData}}.
117
+#' @param variable A single \code{character}, specifying the name to store the
118
+#' phenotype labels in \code{\link{colData}}.
119
+#' @return A \code{list} with 4 \code{ggplot} objects.
120
+#' @export
121
+#' @examples
122
+#' data("mouseBrainSubsetSCE", package = "singleCellTK")
123
+#' plotClusterAbundance(inSCE = mouseBrainSubsetSCE,
124
+#'                      cluster = "tissue",
125
+#'                      variable = "level1class")
126
+plotClusterAbundance <- function(inSCE, cluster, variable) {
127
+  if (!inherits(inSCE, "SingleCellExperiment")) {
128
+    stop("'inSCE' should be a SingleCellExperiment object.")
129
+  }
130
+  if (is.null(cluster) ||
131
+      !cluster %in% names(SummarizedExperiment::colData(inSCE))) {
132
+    stop("Argument 'cluster' should be a column name in colData(inSCE).")
133
+  }
134
+  if (is.null(variable) ||
135
+      !variable %in% names(SummarizedExperiment::colData(inSCE))) {
136
+    stop("Argument 'variable' should be a column name in colData(inSCE).")
137
+  }
138
+  cluster <- SummarizedExperiment::colData(inSCE)[, cluster]
139
+  color_palette <- distinctColors(length(unique(cluster)))
140
+
141
+  label <- SummarizedExperiment::colData(inSCE)[, variable]
142
+
143
+  cluster.color <- color_palette
144
+  df <- data.frame(Cluster=as.factor(cluster), Sample=as.factor(label))
145
+
146
+  g1 <- ggplot2::ggplot(df, ggplot2::aes_string("Cluster")) +
147
+    ggplot2::geom_bar(ggplot2::aes_string(fill="Sample")) +
148
+    ggplot2::theme_classic() +
149
+    ggplot2::scale_y_continuous(expand = c(0, 0))
150
+
151
+  g2 <- ggplot2::ggplot(df, ggplot2::aes_string("Cluster")) +
152
+    ggplot2::geom_bar(ggplot2::aes_string(fill="Sample"), position="fill") +
153
+    ggplot2::theme_classic() +
154
+    ggplot2::scale_y_continuous(expand = c(0, 0)) +
155
+    ggplot2:: ylab("Fraction")
156
+
157
+  g3 <- ggplot2::ggplot(df, ggplot2::aes_string("Sample")) +
158
+    ggplot2::geom_bar(ggplot2::aes_string(fill="Cluster")) +
159
+    ggplot2::theme_classic() +
160
+    ggplot2::scale_y_continuous(expand = c(0, 0)) +
161
+    ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1)) +
162
+    ggplot2::scale_fill_manual(values=cluster.color)
163
+
164
+  g4 <- ggplot2::ggplot(df, ggplot2::aes_string("Sample")) +
165
+    ggplot2::geom_bar(ggplot2::aes_string(fill="Cluster"), position="fill") +
166
+    ggplot2::theme_classic() +
167
+    ggplot2::scale_y_continuous(expand = c(0, 0)) +
168
+    ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1)) +
169
+    ggplot2::scale_fill_manual(values=cluster.color) +
170
+    ggplot2::ylab("Fraction")
171
+  g <- list(g1, g2, g3, g4)
172
+  return(g)
173
+}
0 174
\ No newline at end of file
... ...
@@ -58,7 +58,7 @@ runScranSNN <- function(inSCE, useAssay = NULL, useReducedDim = NULL,
58 58
                                       "leadingEigen")) {
59 59
   # TODO: parallele parameter
60 60
   if (!inherits(inSCE, "SingleCellExperiment")) {
61
-    stop("SCE")
61
+    stop("'inSCE' should be a SingleCellExperiment object.")
62 62
   }
63 63
   if (is.null(useAssay) + is.null(useReducedDim) + is.null(useAltExp) != 2) {
64 64
     stop("Scran SNN clustering requires one and only one of 'useAssay', ",
... ...
@@ -148,7 +148,7 @@ runKMeans <- function(inSCE, useReducedDim = "PCA",
148 148
                       nStart = 1, seed = 12345,
149 149
                       algorithm = c("Hartigan-Wong", "Lloyd", "MacQueen")){
150 150
   if (!inherits(inSCE, "SingleCellExperiment")) {
151
-    stop("SCE")
151
+    stop("'inSCE' should be a SingleCellExperiment object.")
152 152
   }
153 153
   if (is.null(useReducedDim)) {
154 154
     stop("runKMeans requires 'useReducedDim' input.")
155 155
new file mode 100644
... ...
@@ -0,0 +1,52 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/abundance.R
3
+\name{diffAbundanceFET}
4
+\alias{diffAbundanceFET}
5
+\title{Calculate Differential Abundance with FET}
6
+\usage{
7
+diffAbundanceFET(inSCE, cluster, variable, control, case, analysisName)
8
+}
9
+\arguments{
10
+\item{inSCE}{A \code{\link[SingleCellExperiment]{SingleCellExperiment}}
11
+object.}
12
+
13
+\item{cluster}{A single \code{character}, specifying the name to store the
14
+cluster label in \code{\link{colData}}.}
15
+
16
+\item{variable}{A single \code{character}, specifying the name to store the
17
+phenotype labels in \code{\link{colData}}.}
18
+
19
+\item{control}{\code{character}. Specifying one or more categories that can
20
+be found in the vector specified by \code{variable}.}
21
+
22
+\item{case}{\code{character}. Specifying one or more categories that can
23
+be found in the vector specified by \code{variable}.}
24
+
25
+\item{analysisName}{A single \code{character}. Will be used for naming the
26
+result table, which will be saved in metadata slot.}
27
+}
28
+\value{
29
+The original \code{\link[SingleCellExperiment]{SingleCellExperiment}}
30
+object with \code{metadata(inSCE)} updated with a list
31
+\code{diffAbundanceFET}, containing a new \code{data.frame} for the analysis
32
+result, named by \code{analysisName}. The \code{data.frame} contains columns
33
+for number and fraction of cells that belong to different cases, as well as
34
+"Odds_Ratio", "PValue" and "FDR".
35
+}
36
+\description{
37
+Calculate Differential Abundance with FET
38
+}
39
+\details{
40
+This function will calculate the cell counting and fraction by
41
+dividing all cells to groups specified by the arguments, together with
42
+statistical summary by performing Fisher Exact Tests (FET).
43
+}
44
+\examples{
45
+data("mouseBrainSubsetSCE", package = "singleCellTK")
46
+mouseBrainSubsetSCE <- diffAbundanceFET(inSCE = mouseBrainSubsetSCE,
47
+                                                cluster = "tissue",
48
+                                                variable = "level1class",
49
+                                                case = "oligodendrocytes",
50
+                                                control = "microglia",
51
+                                                analysisName = "diffAbundFET")
52
+}
0 53
new file mode 100644
... ...
@@ -0,0 +1,35 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/abundance.R
3
+\name{plotClusterAbundance}
4
+\alias{plotClusterAbundance}
5
+\title{Plot the differential Abundance}
6
+\usage{
7
+plotClusterAbundance(inSCE, cluster, variable)
8
+}
9
+\arguments{
10
+\item{inSCE}{A \code{\link[SingleCellExperiment]{SingleCellExperiment}}
11
+object.}
12
+
13
+\item{cluster}{A single \code{character}, specifying the name to store the
14
+cluster label in \code{\link{colData}}.}
15
+
16
+\item{variable}{A single \code{character}, specifying the name to store the
17
+phenotype labels in \code{\link{colData}}.}
18
+}
19
+\value{
20
+A \code{list} with 4 \code{ggplot} objects.
21
+}
22
+\description{
23
+Plot the differential Abundance
24
+}
25
+\details{
26
+This function will visualize the differential abundance in two given
27
+variables, by making bar plots that presents the cell counting and fraction
28
+in different cases.
29
+}
30
+\examples{
31
+data("mouseBrainSubsetSCE", package = "singleCellTK")
32
+plotClusterAbundance(inSCE = mouseBrainSubsetSCE,
33
+                     cluster = "tissue",
34
+                     variable = "level1class")
35
+}