Browse code

findMarker update

Yichen Wang authored on 10/11/2020 21:21:44
Showing 5 changed files

... ...
@@ -19,6 +19,14 @@
19 19
 #' larger than this value. Default \code{NULL}
20 20
 #' @param fdrThreshold Only out put DEGs with FDR value smaller than this
21 21
 #' value. Default \code{1}
22
+#' @param minClustExprPerc A numeric scalar. The minimum cutoff of the
23
+#' percentage of cells in the cluster of interests that expressed the marker
24
+#' gene. Default \code{0.7}.
25
+#' @param maxCtrlExprPerc A numeric scalar. The maximum cutoff of the
26
+#' percentage of cells out of the cluster (control group) that expressed the
27
+#' marker gene. Default \code{0.4}.
28
+#' @param minMeanExpr A numeric scalar. The minimum cutoff of the mean
29
+#' expression value of the marker in the cluster of interests. Default \code{1}.
22 30
 #' @return The input \linkS4class{SingleCellExperiment} object with
23 31
 #' \code{metadata(inSCE)$findMarker} updated with a data.table of the up-
24 32
 #' regulated DEGs for each cluster.
... ...
@@ -27,7 +35,9 @@
27 35
 findMarkerDiffExp <- function(inSCE, useAssay = 'logcounts',
28 36
                               method = c('MAST', "DESeq2", "Limma"),
29 37
                               cluster = 'cluster', covariates = NULL,
30
-                              log2fcThreshold = 0.25, fdrThreshold = 0.05){
38
+                              log2fcThreshold = 0.25, fdrThreshold = 0.05,
39
+                              minClustExprPerc = 0.6, maxCtrlExprPerc = 0.4,
40
+                              minMeanExpr = 0.5){
31 41
     # Input checks
32 42
     if(!inherits(inSCE, "SingleCellExperiment")){
33 43
         stop('"inSCE" should be a SingleCellExperiment inherited Object.')
... ...
@@ -92,6 +102,50 @@ findMarkerDiffExp <- function(inSCE, useAssay = 'logcounts',
92 102
     }
93 103
     degFull <- degFull[stats::complete.cases(degFull),]
94 104
     attr(degFull, "useAssay") <- useAssay
105
+    degFull <- .calcMarkerExpr(degFull, inSCE, clusterName,
106
+                               c(minClustExprPerc, maxCtrlExprPerc,
107
+                                 minMeanExpr))
95 108
     S4Vectors::metadata(inSCE)$findMarker <- degFull
96 109
     return(inSCE)
97 110
 }
111
+
112
+.calcMarkerExpr <- function(markerTable, inSCE, clusterName, params) {
113
+    genes <- markerTable$Gene
114
+    cellLabels <- SummarizedExperiment::colData(inSCE)[[clusterName]]
115
+    useAssay <- attr(markerTable, "useAssay")
116
+    cells.in.col <- c()
117
+    cells.out.col <- c()
118
+    exprs.avg <- c()
119
+    toRemove <- integer()
120
+    for (i in seq_len(nrow(markerTable))) {
121
+        gene <- genes[i]
122
+        cluster <- markerTable[[clusterName]][i]
123
+        if (gene %in% rownames(inSCE)) {
124
+            # Logical indices
125
+            cells.in <- cellLabels == cluster
126
+            cells.out <- cellLabels != cluster
127
+            # cells.in expressed percentage
128
+            cells.in.expr <- SummarizedExperiment::assay(inSCE, useAssay)[gene, cells.in]
129
+            cells.in.perc <- sum(cells.in.expr > 0) / length(which(cells.in))
130
+            # cells.out expressed percentage
131
+            cells.out.expr <- SummarizedExperiment::assay(inSCE, useAssay)[gene, cells.out]
132
+            cells.out.perc <- sum(cells.out.expr > 0) / length(which(cells.out))
133
+            # Average Expression in the cluster
134
+            cells.in.mean <- mean(cells.in.expr)
135
+            if (cells.in.perc >= params[1] &&
136
+                cells.out.perc <= params[2] &&
137
+                cells.in.mean >= params[3]) {
138
+                cells.in.col <- c(cells.in.col, cells.in.perc)
139
+                cells.out.col <- c(cells.out.col, cells.out.perc)
140
+                exprs.avg <- c(exprs.avg, cells.in.mean)
141
+            } else {
142
+                toRemove <- c(toRemove, i)
143
+            }
144
+        }
145
+    }
146
+    markerTable <- markerTable[-toRemove,]
147
+    markerTable$clusterExprPerc <- cells.in.col
148
+    markerTable$ControlExprPerc <- cells.out.col
149
+    markerTable$clusterAveExpr <- exprs.avg
150
+    return(markerTable)
151
+}
... ...
@@ -457,8 +457,8 @@ plotMASTThresholdGenes <- function(inSCE, useAssay="logcounts", doPlot = TRUE,
457 457
     data_log = isLogged)))
458 458
   # plotting
459 459
   plotNRow <- ceiling(length(thres$valleys) / 4)
460
-  thres.grob <- as.grob(function(){
461
-    par(mfrow = c(plotNRow, 4), mar = c(3, 3, 2, 1),
460
+  thres.grob <- ggplotify::as.grob(function(){
461
+    graphics::par(mfrow = c(plotNRow, 4), mar = c(3, 3, 2, 1),
462 462
         mgp = c(2, 0.7, 0), tck = -0.01, new = TRUE)
463 463
     plot(thres)
464 464
   })
... ...
@@ -16,6 +16,14 @@
16 16
 #' larger than this value. Default \code{1}
17 17
 #' @param fdrThreshold Only use DEGs with FDR value smaller than this value.
18 18
 #' Default \code{0.05}
19
+#' @param minClustExprPerc A numeric scalar. The minimum cutoff of the
20
+#' percentage of cells in the cluster of interests that expressed the marker
21
+#' gene. Default \code{0.7}.
22
+#' @param maxCtrlExprPerc A numeric scalar. The maximum cutoff of the
23
+#' percentage of cells out of the cluster (control group) that expressed the
24
+#' marker gene. Default \code{0.4}.
25
+#' @param minMeanExpr A numeric scalar. The minimum cutoff of the mean
26
+#' expression value of the marker in the cluster of interests. Default \code{1}.
19 27
 #' @param topN An integer. Only to plot this number of top markers for each
20 28
 #' cluster in maximum, in terms of log2FC value. Use \code{NULL} to cancel the
21 29
 #' top N subscription. Default \code{10}.
... ...
@@ -56,7 +64,8 @@
56 64
 #' @author Yichen Wang
57 65
 #' @export
58 66
 plotMarkerDiffExp <- function(inSCE, orderBy = 'size',
59
-    log2fcThreshold = 1, fdrThreshold = 0.05, topN = 10, decreasing = TRUE,
67
+    log2fcThreshold = 1, fdrThreshold = 0.05, minClustExprPerc = 0.7,
68
+    maxCtrlExprPerc = 0.4, minMeanExpr = 1, topN = 10, decreasing = TRUE,
60 69
     rowDataName = NULL, colDataName = NULL, featureAnnotations = NULL,
61 70
     cellAnnotations = NULL, featureAnnotationColor = NULL,
62 71
     cellAnnotationColor = NULL,
... ...
@@ -101,6 +110,15 @@ plotMarkerDiffExp <- function(inSCE, orderBy = 'size',
101 110
     if(!is.null(fdrThreshold)){
102 111
         degFull <- degFull[degFull$FDR < fdrThreshold,]
103 112
     }
113
+    if (!is.null(minClustExprPerc)) {
114
+      degFull <- degFull[degFull$clusterExprPerc >= minClustExprPerc,]
115
+    }
116
+    if (!is.null(maxCtrlExprPerc)) {
117
+      degFull <- degFull[degFull$ControlExprPerc <= maxCtrlExprPerc,]
118
+    }
119
+    if (!is.null(minMeanExpr)) {
120
+      degFull <- degFull[degFull$clusterAveExpr >= minMeanExpr,]
121
+    }
104 122
     # Remove duplicate by assigning the duplicated genes to the cluster where
105 123
     # their log2FC is the highest.
106 124
     # Done by keeping all unique genes and the rows  with highest Log2FC entry
... ...
@@ -14,7 +14,10 @@ findMarkerDiffExp(
14 14
   cluster = "cluster",
15 15
   covariates = NULL,
16 16
   log2fcThreshold = 0.25,
17
-  fdrThreshold = 0.05
17
+  fdrThreshold = 0.05,
18
+  minClustExprPerc = 0.6,
19
+  maxCtrlExprPerc = 0.4,
20
+  minMeanExpr = 0.5
18 21
 )
19 22
 }
20 23
 \arguments{
... ...
@@ -41,6 +44,17 @@ larger than this value. Default \code{NULL}}
41 44
 
42 45
 \item{fdrThreshold}{Only out put DEGs with FDR value smaller than this
43 46
 value. Default \code{1}}
47
+
48
+\item{minClustExprPerc}{A numeric scalar. The minimum cutoff of the
49
+percentage of cells in the cluster of interests that expressed the marker
50
+gene. Default \code{0.7}.}
51
+
52
+\item{maxCtrlExprPerc}{A numeric scalar. The maximum cutoff of the
53
+percentage of cells out of the cluster (control group) that expressed the
54
+marker gene. Default \code{0.4}.}
55
+
56
+\item{minMeanExpr}{A numeric scalar. The minimum cutoff of the mean
57
+expression value of the marker in the cluster of interests. Default \code{1}.}
44 58
 }
45 59
 \value{
46 60
 The input \linkS4class{SingleCellExperiment} object with
... ...
@@ -9,6 +9,9 @@ plotMarkerDiffExp(
9 9
   orderBy = "size",
10 10
   log2fcThreshold = 1,
11 11
   fdrThreshold = 0.05,
12
+  minClustExprPerc = 0.7,
13
+  maxCtrlExprPerc = 0.4,
14
+  minMeanExpr = 1,
12 15
   topN = 10,
13 16
   decreasing = TRUE,
14 17
   rowDataName = NULL,
... ...
@@ -36,6 +39,17 @@ larger than this value. Default \code{1}}
36 39
 \item{fdrThreshold}{Only use DEGs with FDR value smaller than this value.
37 40
 Default \code{0.05}}
38 41
 
42
+\item{minClustExprPerc}{A numeric scalar. The minimum cutoff of the
43
+percentage of cells in the cluster of interests that expressed the marker
44
+gene. Default \code{0.7}.}
45
+
46
+\item{maxCtrlExprPerc}{A numeric scalar. The maximum cutoff of the
47
+percentage of cells out of the cluster (control group) that expressed the
48
+marker gene. Default \code{0.4}.}
49
+
50
+\item{minMeanExpr}{A numeric scalar. The minimum cutoff of the mean
51
+expression value of the marker in the cluster of interests. Default \code{1}.}
52
+
39 53
 \item{topN}{An integer. Only to plot this number of top markers for each
40 54
 cluster in maximum, in terms of log2FC value. Use \code{NULL} to cancel the
41 55
 top N subscription. Default \code{10}.}