... | ... |
@@ -19,6 +19,14 @@ |
19 | 19 |
#' larger than this value. Default \code{NULL} |
20 | 20 |
#' @param fdrThreshold Only out put DEGs with FDR value smaller than this |
21 | 21 |
#' value. Default \code{1} |
22 |
+#' @param minClustExprPerc A numeric scalar. The minimum cutoff of the |
|
23 |
+#' percentage of cells in the cluster of interests that expressed the marker |
|
24 |
+#' gene. Default \code{0.7}. |
|
25 |
+#' @param maxCtrlExprPerc A numeric scalar. The maximum cutoff of the |
|
26 |
+#' percentage of cells out of the cluster (control group) that expressed the |
|
27 |
+#' marker gene. Default \code{0.4}. |
|
28 |
+#' @param minMeanExpr A numeric scalar. The minimum cutoff of the mean |
|
29 |
+#' expression value of the marker in the cluster of interests. Default \code{1}. |
|
22 | 30 |
#' @return The input \linkS4class{SingleCellExperiment} object with |
23 | 31 |
#' \code{metadata(inSCE)$findMarker} updated with a data.table of the up- |
24 | 32 |
#' regulated DEGs for each cluster. |
... | ... |
@@ -27,7 +35,9 @@ |
27 | 35 |
findMarkerDiffExp <- function(inSCE, useAssay = 'logcounts', |
28 | 36 |
method = c('MAST', "DESeq2", "Limma"), |
29 | 37 |
cluster = 'cluster', covariates = NULL, |
30 |
- log2fcThreshold = 0.25, fdrThreshold = 0.05){ |
|
38 |
+ log2fcThreshold = 0.25, fdrThreshold = 0.05, |
|
39 |
+ minClustExprPerc = 0.6, maxCtrlExprPerc = 0.4, |
|
40 |
+ minMeanExpr = 0.5){ |
|
31 | 41 |
# Input checks |
32 | 42 |
if(!inherits(inSCE, "SingleCellExperiment")){ |
33 | 43 |
stop('"inSCE" should be a SingleCellExperiment inherited Object.') |
... | ... |
@@ -92,6 +102,50 @@ findMarkerDiffExp <- function(inSCE, useAssay = 'logcounts', |
92 | 102 |
} |
93 | 103 |
degFull <- degFull[stats::complete.cases(degFull),] |
94 | 104 |
attr(degFull, "useAssay") <- useAssay |
105 |
+ degFull <- .calcMarkerExpr(degFull, inSCE, clusterName, |
|
106 |
+ c(minClustExprPerc, maxCtrlExprPerc, |
|
107 |
+ minMeanExpr)) |
|
95 | 108 |
S4Vectors::metadata(inSCE)$findMarker <- degFull |
96 | 109 |
return(inSCE) |
97 | 110 |
} |
111 |
+ |
|
112 |
+.calcMarkerExpr <- function(markerTable, inSCE, clusterName, params) { |
|
113 |
+ genes <- markerTable$Gene |
|
114 |
+ cellLabels <- SummarizedExperiment::colData(inSCE)[[clusterName]] |
|
115 |
+ useAssay <- attr(markerTable, "useAssay") |
|
116 |
+ cells.in.col <- c() |
|
117 |
+ cells.out.col <- c() |
|
118 |
+ exprs.avg <- c() |
|
119 |
+ toRemove <- integer() |
|
120 |
+ for (i in seq_len(nrow(markerTable))) { |
|
121 |
+ gene <- genes[i] |
|
122 |
+ cluster <- markerTable[[clusterName]][i] |
|
123 |
+ if (gene %in% rownames(inSCE)) { |
|
124 |
+ # Logical indices |
|
125 |
+ cells.in <- cellLabels == cluster |
|
126 |
+ cells.out <- cellLabels != cluster |
|
127 |
+ # cells.in expressed percentage |
|
128 |
+ cells.in.expr <- SummarizedExperiment::assay(inSCE, useAssay)[gene, cells.in] |
|
129 |
+ cells.in.perc <- sum(cells.in.expr > 0) / length(which(cells.in)) |
|
130 |
+ # cells.out expressed percentage |
|
131 |
+ cells.out.expr <- SummarizedExperiment::assay(inSCE, useAssay)[gene, cells.out] |
|
132 |
+ cells.out.perc <- sum(cells.out.expr > 0) / length(which(cells.out)) |
|
133 |
+ # Average Expression in the cluster |
|
134 |
+ cells.in.mean <- mean(cells.in.expr) |
|
135 |
+ if (cells.in.perc >= params[1] && |
|
136 |
+ cells.out.perc <= params[2] && |
|
137 |
+ cells.in.mean >= params[3]) { |
|
138 |
+ cells.in.col <- c(cells.in.col, cells.in.perc) |
|
139 |
+ cells.out.col <- c(cells.out.col, cells.out.perc) |
|
140 |
+ exprs.avg <- c(exprs.avg, cells.in.mean) |
|
141 |
+ } else { |
|
142 |
+ toRemove <- c(toRemove, i) |
|
143 |
+ } |
|
144 |
+ } |
|
145 |
+ } |
|
146 |
+ markerTable <- markerTable[-toRemove,] |
|
147 |
+ markerTable$clusterExprPerc <- cells.in.col |
|
148 |
+ markerTable$ControlExprPerc <- cells.out.col |
|
149 |
+ markerTable$clusterAveExpr <- exprs.avg |
|
150 |
+ return(markerTable) |
|
151 |
+} |
... | ... |
@@ -457,8 +457,8 @@ plotMASTThresholdGenes <- function(inSCE, useAssay="logcounts", doPlot = TRUE, |
457 | 457 |
data_log = isLogged))) |
458 | 458 |
# plotting |
459 | 459 |
plotNRow <- ceiling(length(thres$valleys) / 4) |
460 |
- thres.grob <- as.grob(function(){ |
|
461 |
- par(mfrow = c(plotNRow, 4), mar = c(3, 3, 2, 1), |
|
460 |
+ thres.grob <- ggplotify::as.grob(function(){ |
|
461 |
+ graphics::par(mfrow = c(plotNRow, 4), mar = c(3, 3, 2, 1), |
|
462 | 462 |
mgp = c(2, 0.7, 0), tck = -0.01, new = TRUE) |
463 | 463 |
plot(thres) |
464 | 464 |
}) |
... | ... |
@@ -16,6 +16,14 @@ |
16 | 16 |
#' larger than this value. Default \code{1} |
17 | 17 |
#' @param fdrThreshold Only use DEGs with FDR value smaller than this value. |
18 | 18 |
#' Default \code{0.05} |
19 |
+#' @param minClustExprPerc A numeric scalar. The minimum cutoff of the |
|
20 |
+#' percentage of cells in the cluster of interests that expressed the marker |
|
21 |
+#' gene. Default \code{0.7}. |
|
22 |
+#' @param maxCtrlExprPerc A numeric scalar. The maximum cutoff of the |
|
23 |
+#' percentage of cells out of the cluster (control group) that expressed the |
|
24 |
+#' marker gene. Default \code{0.4}. |
|
25 |
+#' @param minMeanExpr A numeric scalar. The minimum cutoff of the mean |
|
26 |
+#' expression value of the marker in the cluster of interests. Default \code{1}. |
|
19 | 27 |
#' @param topN An integer. Only to plot this number of top markers for each |
20 | 28 |
#' cluster in maximum, in terms of log2FC value. Use \code{NULL} to cancel the |
21 | 29 |
#' top N subscription. Default \code{10}. |
... | ... |
@@ -56,7 +64,8 @@ |
56 | 64 |
#' @author Yichen Wang |
57 | 65 |
#' @export |
58 | 66 |
plotMarkerDiffExp <- function(inSCE, orderBy = 'size', |
59 |
- log2fcThreshold = 1, fdrThreshold = 0.05, topN = 10, decreasing = TRUE, |
|
67 |
+ log2fcThreshold = 1, fdrThreshold = 0.05, minClustExprPerc = 0.7, |
|
68 |
+ maxCtrlExprPerc = 0.4, minMeanExpr = 1, topN = 10, decreasing = TRUE, |
|
60 | 69 |
rowDataName = NULL, colDataName = NULL, featureAnnotations = NULL, |
61 | 70 |
cellAnnotations = NULL, featureAnnotationColor = NULL, |
62 | 71 |
cellAnnotationColor = NULL, |
... | ... |
@@ -101,6 +110,15 @@ plotMarkerDiffExp <- function(inSCE, orderBy = 'size', |
101 | 110 |
if(!is.null(fdrThreshold)){ |
102 | 111 |
degFull <- degFull[degFull$FDR < fdrThreshold,] |
103 | 112 |
} |
113 |
+ if (!is.null(minClustExprPerc)) { |
|
114 |
+ degFull <- degFull[degFull$clusterExprPerc >= minClustExprPerc,] |
|
115 |
+ } |
|
116 |
+ if (!is.null(maxCtrlExprPerc)) { |
|
117 |
+ degFull <- degFull[degFull$ControlExprPerc <= maxCtrlExprPerc,] |
|
118 |
+ } |
|
119 |
+ if (!is.null(minMeanExpr)) { |
|
120 |
+ degFull <- degFull[degFull$clusterAveExpr >= minMeanExpr,] |
|
121 |
+ } |
|
104 | 122 |
# Remove duplicate by assigning the duplicated genes to the cluster where |
105 | 123 |
# their log2FC is the highest. |
106 | 124 |
# Done by keeping all unique genes and the rows with highest Log2FC entry |
... | ... |
@@ -14,7 +14,10 @@ findMarkerDiffExp( |
14 | 14 |
cluster = "cluster", |
15 | 15 |
covariates = NULL, |
16 | 16 |
log2fcThreshold = 0.25, |
17 |
- fdrThreshold = 0.05 |
|
17 |
+ fdrThreshold = 0.05, |
|
18 |
+ minClustExprPerc = 0.6, |
|
19 |
+ maxCtrlExprPerc = 0.4, |
|
20 |
+ minMeanExpr = 0.5 |
|
18 | 21 |
) |
19 | 22 |
} |
20 | 23 |
\arguments{ |
... | ... |
@@ -41,6 +44,17 @@ larger than this value. Default \code{NULL}} |
41 | 44 |
|
42 | 45 |
\item{fdrThreshold}{Only out put DEGs with FDR value smaller than this |
43 | 46 |
value. Default \code{1}} |
47 |
+ |
|
48 |
+\item{minClustExprPerc}{A numeric scalar. The minimum cutoff of the |
|
49 |
+percentage of cells in the cluster of interests that expressed the marker |
|
50 |
+gene. Default \code{0.7}.} |
|
51 |
+ |
|
52 |
+\item{maxCtrlExprPerc}{A numeric scalar. The maximum cutoff of the |
|
53 |
+percentage of cells out of the cluster (control group) that expressed the |
|
54 |
+marker gene. Default \code{0.4}.} |
|
55 |
+ |
|
56 |
+\item{minMeanExpr}{A numeric scalar. The minimum cutoff of the mean |
|
57 |
+expression value of the marker in the cluster of interests. Default \code{1}.} |
|
44 | 58 |
} |
45 | 59 |
\value{ |
46 | 60 |
The input \linkS4class{SingleCellExperiment} object with |
... | ... |
@@ -9,6 +9,9 @@ plotMarkerDiffExp( |
9 | 9 |
orderBy = "size", |
10 | 10 |
log2fcThreshold = 1, |
11 | 11 |
fdrThreshold = 0.05, |
12 |
+ minClustExprPerc = 0.7, |
|
13 |
+ maxCtrlExprPerc = 0.4, |
|
14 |
+ minMeanExpr = 1, |
|
12 | 15 |
topN = 10, |
13 | 16 |
decreasing = TRUE, |
14 | 17 |
rowDataName = NULL, |
... | ... |
@@ -36,6 +39,17 @@ larger than this value. Default \code{1}} |
36 | 39 |
\item{fdrThreshold}{Only use DEGs with FDR value smaller than this value. |
37 | 40 |
Default \code{0.05}} |
38 | 41 |
|
42 |
+\item{minClustExprPerc}{A numeric scalar. The minimum cutoff of the |
|
43 |
+percentage of cells in the cluster of interests that expressed the marker |
|
44 |
+gene. Default \code{0.7}.} |
|
45 |
+ |
|
46 |
+\item{maxCtrlExprPerc}{A numeric scalar. The maximum cutoff of the |
|
47 |
+percentage of cells out of the cluster (control group) that expressed the |
|
48 |
+marker gene. Default \code{0.4}.} |
|
49 |
+ |
|
50 |
+\item{minMeanExpr}{A numeric scalar. The minimum cutoff of the mean |
|
51 |
+expression value of the marker in the cluster of interests. Default \code{1}.} |
|
52 |
+ |
|
39 | 53 |
\item{topN}{An integer. Only to plot this number of top markers for each |
40 | 54 |
cluster in maximum, in terms of log2FC value. Use \code{NULL} to cancel the |
41 | 55 |
top N subscription. Default \code{10}.} |