Browse code

enable marker plotting with topN markers

Yichen Wang authored on 28/08/2020 16:47:53
Showing 2 changed files

... ...
@@ -18,6 +18,9 @@
18 18
 #' larger than this value. Default \code{1}
19 19
 #' @param fdrThreshold Only use DEGs with FDR value smaller than this value.
20 20
 #' Default \code{0.05}
21
+#' @param topN An integer. Only to plot this number of top markers for each
22
+#' cluster in maximum, in terms of log2FC value. Use \code{NULL} to cancel the
23
+#' top N subscription. Default \code{10}.
21 24
 #' @param orderBy The ordering method of the clusters on the splitted heatmap.
22 25
 #' Can be chosen from \code{"size"} or \code{"name"}, specified with vector of
23 26
 #' ordered unique cluster labels, or set as \code{NULL} for unsplitted heatmap.
... ...
@@ -55,7 +58,7 @@
55 58
 #' @author Yichen Wang
56 59
 #' @export
57 60
 plotMarkerDiffExp <- function(inSCE, useAssay = 'logcounts', orderBy = 'size',
58
-    log2fcThreshold = 1, fdrThreshold = 0.05, decreasing = TRUE,
61
+    log2fcThreshold = 1, fdrThreshold = 0.05, topN = 10, decreasing = TRUE,
59 62
     rowDataName = NULL, colDataName = NULL, featureAnnotations = NULL,
60 63
     cellAnnotations = NULL, featureAnnotationColor = NULL,
61 64
     cellAnnotationColor = NULL,
... ...
@@ -87,6 +90,10 @@ plotMarkerDiffExp <- function(inSCE, useAssay = 'logcounts', orderBy = 'size',
87 90
     if(!all(c("Gene", "Pvalue", "Log2_FC", "FDR") %in% colnames(degFull)[1:4])){
88 91
         stop('"findMarker" result cannot be interpreted properly')
89 92
     }
93
+    if(length(which(!degFull$Gene %in% rownames(inSCE))) > 0){
94
+      # Remove genes happen in deg table but not in sce. Weird.
95
+      degFull <- degFull[-which(!degFull$Gene %in% rownames(inSCE)),]
96
+    }
90 97
     if(!is.null(log2fcThreshold)){
91 98
         degFull <- degFull[degFull$Log2_FC > log2fcThreshold,]
92 99
     }
... ...
@@ -105,11 +112,23 @@ plotMarkerDiffExp <- function(inSCE, useAssay = 'logcounts', orderBy = 'size',
105 112
         toRemove <- which(deg.gix)[-toKeep]
106 113
         degFull <- degFull[-toRemove,]
107 114
     }
108
-    if(length(which(!degFull$Gene %in% rownames(inSCE))) > 0){
109
-      degFull <- degFull[-which(!degFull$Gene %in% rownames(inSCE)),]
115
+    clusterName <- colnames(degFull)[5]
116
+    selected <- character()
117
+    if (!is.null(topN)) {
118
+      for (c in unique(degFull[[clusterName]])) {
119
+        deg.cluster <- degFull[degFull[[clusterName]] == c,]
120
+        deg.cluster <- deg.cluster[order(deg.cluster$Log2_FC, decreasing = TRUE),]
121
+        if (dim(deg.cluster)[1] > topN) {
122
+          deg.cluster <- deg.cluster[1:topN,]
123
+        }
124
+        selected <- c(selected, deg.cluster$Gene)
125
+      }
126
+    } else {
127
+      selected <- degFull$Gene
110 128
     }
129
+    degFull <- degFull[degFull$Gene %in% selected,]
111 130
     inSCE <- inSCE[degFull$Gene,]
112
-    clusterName <- colnames(degFull)[5]
131
+
113 132
     z <- SummarizedExperiment::colData(inSCE)[[clusterName]]
114 133
     if(is.factor(z)){
115 134
         z.order <- levels(z)
... ...
@@ -10,6 +10,7 @@ plotMarkerDiffExp(
10 10
   orderBy = "size",
11 11
   log2fcThreshold = 1,
12 12
   fdrThreshold = 0.05,
13
+  topN = 10,
13 14
   decreasing = TRUE,
14 15
   rowDataName = NULL,
15 16
   colDataName = NULL,
... ...
@@ -39,6 +40,10 @@ larger than this value. Default \code{1}}
39 40
 \item{fdrThreshold}{Only use DEGs with FDR value smaller than this value.
40 41
 Default \code{0.05}}
41 42
 
43
+\item{topN}{An integer. Only to plot this number of top markers for each
44
+cluster in maximum, in terms of log2FC value. Use \code{NULL} to cancel the
45
+top N subscription. Default \code{10}.}
46
+
42 47
 \item{decreasing}{Order the cluster decreasingly. Default \code{TRUE}.}
43 48
 
44 49
 \item{rowDataName}{character. The column name(s) in \code{rowData} that need