... | ... |
@@ -271,6 +271,7 @@ import(GSVAdata) |
271 | 271 |
import(SingleCellExperiment) |
272 | 272 |
import(eds) |
273 | 273 |
importFrom(BiocParallel,SerialParam) |
274 |
+importFrom(ComplexHeatmap,anno_barplot) |
|
274 | 275 |
importFrom(S4Vectors,"metadata<-") |
275 | 276 |
importFrom(S4Vectors,metadata) |
276 | 277 |
importFrom(SingleCellExperiment,"counts<-") |
... | ... |
@@ -317,8 +318,11 @@ importFrom(stats,prcomp) |
317 | 318 |
importFrom(stats,quantile) |
318 | 319 |
importFrom(stringr,str_c) |
319 | 320 |
importFrom(stringr,str_replace_all) |
321 |
+importFrom(tibble,column_to_rownames) |
|
322 |
+importFrom(tibble,remove_rownames) |
|
320 | 323 |
importFrom(tibble,tibble) |
321 | 324 |
importFrom(tidyr,spread) |
325 |
+importFrom(tidyr,unite) |
|
322 | 326 |
importFrom(tools,file_ext) |
323 | 327 |
importFrom(utils,head) |
324 | 328 |
importFrom(utils,packageVersion) |
... | ... |
@@ -25,6 +25,11 @@ |
25 | 25 |
#' @param cellIndexBy A single character specifying a column name of |
26 | 26 |
#' \code{colData(inSCE)}, or a vector of the same length as \code{ncol(inSCE)}, |
27 | 27 |
#' where we search for the non-rowname cell indices. Default \code{"rownames"}. |
28 |
+#' @param cluster_columns A logical scalar that turns on/off |
|
29 |
+#' clustering of columns. Default \code{FALSE}. Clustering columns should be turned off when using reduced dim |
|
30 |
+#' for plotting as it will be sorted by PCs |
|
31 |
+#' @param cluster_rows A logical scalar that turns on/off clustering of rows. |
|
32 |
+#' Default \code{FALSE}. |
|
28 | 33 |
#' @param rowDataName character. The column name(s) in \code{rowData} that need |
29 | 34 |
#' to be added to the annotation. Not applicable for |
30 | 35 |
#' \code{plotSCEDimReduceHeatmap}. Default \code{NULL}. |
... | ... |
@@ -103,7 +108,8 @@ |
103 | 108 |
#' @importFrom stringr str_replace_all str_c |
104 | 109 |
#' @importFrom stats prcomp quantile |
105 | 110 |
#' @importFrom dplyr select arrange group_by count ungroup mutate one_of desc |
106 |
-#' @importFrom tidyr spread unite column_to_rownames remove_rownames |
|
111 |
+#' @importFrom tidyr spread unite |
|
112 |
+#' @importFrom tibble column_to_rownames remove_rownames |
|
107 | 113 |
#' @importFrom grid gpar |
108 | 114 |
#' @importFrom ComplexHeatmap anno_barplot |
109 | 115 |
#' @importFrom rlang .data |
... | ... |
@@ -113,6 +119,8 @@ plotSCEHeatmap <- function(inSCE, useAssay = 'logcounts', useReducedDim = NULL, |
113 | 119 |
scale = TRUE, trim = c(-2,2), |
114 | 120 |
featureIndexBy = 'rownames', |
115 | 121 |
cellIndexBy = 'rownames', |
122 |
+ cluster_columns = FALSE, |
|
123 |
+ cluster_rows = FALSE, |
|
116 | 124 |
rowDataName = NULL, colDataName = NULL, |
117 | 125 |
aggregateRow = NULL, aggregateCol = NULL, |
118 | 126 |
featureAnnotations = NULL, cellAnnotations = NULL, |
... | ... |
@@ -282,8 +290,8 @@ plotSCEHeatmap <- function(inSCE, useAssay = 'logcounts', useReducedDim = NULL, |
282 | 290 |
temp_df<-as.data.frame(colData(SCE)[,c(aggregateCol),drop=FALSE]) %>% |
283 | 291 |
unite("new_colnames",1:ncol(.),sep = "_",remove = FALSE) %>% |
284 | 292 |
remove_rownames() %>% |
285 |
- mutate(aggregated_column = new_colnames) %>% |
|
286 |
- dplyr::select(new_colnames, aggregated_column) %>% |
|
293 |
+ # mutate(aggregated_column = new_colnames) %>% |
|
294 |
+ # dplyr::select(new_colnames, aggregated_column) %>% |
|
287 | 295 |
column_to_rownames("new_colnames") |
288 | 296 |
|
289 | 297 |
colData(SCE)<-DataFrame(temp_df) |
... | ... |
@@ -446,7 +454,8 @@ plotSCEHeatmap <- function(inSCE, useAssay = 'logcounts', useReducedDim = NULL, |
446 | 454 |
show_row_dend = rowDend, |
447 | 455 |
show_column_dend = colDend, |
448 | 456 |
row_dend_reorder = TRUE, |
449 |
- cluster_columns = FALSE, |
|
457 |
+ cluster_columns = cluster_columns, |
|
458 |
+ cluster_rows = cluster_rows, |
|
450 | 459 |
show_column_names = colLabel, |
451 | 460 |
column_names_gp = grid::gpar(fontsize = colLabelSize), |
452 | 461 |
row_gap = rowGap, column_gap = colGap, |
... | ... |
@@ -15,6 +15,8 @@ plotSCEHeatmap( |
15 | 15 |
trim = c(-2, 2), |
16 | 16 |
featureIndexBy = "rownames", |
17 | 17 |
cellIndexBy = "rownames", |
18 |
+ cluster_columns = FALSE, |
|
19 |
+ cluster_rows = FALSE, |
|
18 | 20 |
rowDataName = NULL, |
19 | 21 |
colDataName = NULL, |
20 | 22 |
aggregateRow = NULL, |
... | ... |
@@ -65,8 +67,8 @@ another feature list indicated by \code{featureIndexBy}. Default \code{NULL}.} |
65 | 67 |
(cells). Alternatively, it can be a vector identifying cells in another |
66 | 68 |
cell list indicated by \code{featureIndexBy}. Default \code{NULL}.} |
67 | 69 |
|
68 |
-\item{scale}{Whether to perform z-score scaling on each row. Default |
|
69 |
-\code{TRUE}.} |
|
70 |
+\item{scale}{Whether to perform z-score or min-max scaling on each row.Choose from \code{"zscore"}, \code{"min-max"} or default |
|
71 |
+\code{TRUE} or \code{FALSE}} |
|
70 | 72 |
|
71 | 73 |
\item{trim}{A 2-element numeric vector. Values outside of this range will be |
72 | 74 |
trimmed to their nearst bound. Default \code{c(-2, 2)}} |
... | ... |
@@ -80,6 +82,13 @@ where we search for the non-rowname feature indices. Not applicable for |
80 | 82 |
\code{colData(inSCE)}, or a vector of the same length as \code{ncol(inSCE)}, |
81 | 83 |
where we search for the non-rowname cell indices. Default \code{"rownames"}.} |
82 | 84 |
|
85 |
+\item{cluster_columns}{A logical scalar that turns on/off |
|
86 |
+clustering of columns. Default \code{FALSE}. Clustering columns should be turned off when using reduced dim |
|
87 |
+for plotting as it will be sorted by PCs} |
|
88 |
+ |
|
89 |
+\item{cluster_rows}{A logical scalar that turns on/off clustering of rows. |
|
90 |
+Default \code{FALSE}.} |
|
91 |
+ |
|
83 | 92 |
\item{rowDataName}{character. The column name(s) in \code{rowData} that need |
84 | 93 |
to be added to the annotation. Not applicable for |
85 | 94 |
\code{plotSCEDimReduceHeatmap}. Default \code{NULL}.} |
... | ... |
@@ -207,34 +207,56 @@ Other heatmap settings will also be automatically filled for a DE specific heatm |
207 | 207 |
<div id="console" class="tabcontent"> |
208 | 208 |
```` |
209 | 209 |
|
210 |
-To present the usage of `plotSCEHeatmap()`, we would like to use a small example provided with SCTK. |
|
210 |
+To present the usage of `plotSCEHeatmap()`, we would like to use a small example provided with SCTK. |
|
211 |
+ |
|
212 |
+**"Raw" plotting** |
|
213 |
+ |
|
214 |
+The minimum setting for `plotSCEHeatmap()` is the input SCE object and the data matrix to plot (default `"logcounts"`). In this way, all cells and features will be presented while no annotation or legend (except the main color scheme) will be shown. |
|
211 | 215 |
|
212 | 216 |
```{R setup, eval=TRUE, message=FALSE, cache=TRUE} |
213 | 217 |
library(singleCellTK) |
214 | 218 |
data("scExample") # This imports SCE object "sce" |
215 | 219 |
sce |
216 |
-``` |
|
217 | 220 |
|
218 |
-**"Raw" plotting** |
|
221 |
+# QC - Remove empty droplets |
|
222 |
+sce2<-subsetSCECols(sce, colData = c("type != 'EmptyDroplet'")) |
|
219 | 223 |
|
220 |
-The minimum setting for `plotSCEHeatmap()` is the input SCE object and the data matrix to plot (default `"logcounts"`). In this way, all cells and features will be presented while no annotation or legend (except the main color scheme) will be shown. |
|
224 |
+# Normalize the counts |
|
225 |
+sce2<-runNormalization(sce2, useAssay = "counts", outAssayName = "logcounts", |
|
226 |
+ normalizationMethod = "logNormCounts",scale = TRUE) |
|
221 | 227 |
|
222 |
-```{R hmFull, eval=TRUE, cache=TRUE} |
|
223 |
-plotSCEHeatmap(sce, useAssay = "counts") |
|
228 |
+# plot the data |
|
229 |
+plotSCEHeatmap(sce2,useAssay = "logcounts",cluster_rows = TRUE, cluster_columns = TRUE) |
|
224 | 230 |
``` |
225 | 231 |
|
226 | 232 |
**Subsetting** |
227 | 233 |
|
228 | 234 |
SCTK allows relatively flexible approaches to select the cells/features to plot. |
229 | 235 |
|
230 |
-The basic way to subset the heatmap is to directly use an index vector that can subset the input SCE object to `featureIndex` and `cellIndex`, including `numeric`, and `logical` vectors, which are widely used, and `character` vector containing the row/col names. Of course, user can directly use a subsetted SCE object as input. |
|
236 |
+The basic way to subset the heatmap is to directly use an index vector that can subset the input SCE object to `featureIndex` and `cellIndex`, including `numeric`, and `logical` vectors, which are widely used, and `character` vector containing the row/col names. Of course, user can directly use a subsetted SCE object as input. First let's run a simple clustering workflow to identify clusters and find DE genes for each cluster. We can subset the heatmap using this list of DE genes |
|
237 |
+ |
|
238 |
+```{R idxSubset, eval=TRUE, cache=TRUE, message=FALSE,warnings=FALSE, echo=FALSE} |
|
239 |
+ |
|
240 |
+# Run Clustering workflow |
|
241 |
+set.seed(348389) |
|
242 |
+sce2 <- runFeatureSelection(sce2, useAssay = "counts") |
|
243 |
+sce2 <- setTopHVG(sce2, featureSubsetName = "hvf") |
|
244 |
+sce2 <- runDimReduce(sce2, useAssay = "logcounts", useFeatureSubset = "hvf", scale = TRUE, reducedDimName = "PCA") |
|
245 |
+sce2 <- runDimReduce(sce2, method = "scaterUMAP", useReducedDim = "PCA", reducedDimName = "UMAP", nComponents = 10) |
|
246 |
+sce2 <- runScranSNN(inSCE = sce2, useReducedDim = "PCA", nComp = 10, clusterName = "scranSNN_PCA") |
|
231 | 247 |
|
232 |
-```{R idxSubset, eval=TRUE, cache=TRUE} |
|
233 |
-# Make up random downsampling numeric vector |
|
234 |
-featureSubset <- sample(nrow(sce), 50) |
|
235 |
-cellSubset <- sample(ncol(sce), 50) |
|
248 |
+# set gene ID as rownames |
|
249 |
+sce2<-setRowNames(sce2,"feature_name") |
|
236 | 250 |
|
237 |
-plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = featureSubset, cellIndex = cellSubset) |
|
251 |
+ |
|
252 |
+# Find markers for each cluster |
|
253 |
+sce2 <- runFindMarker(sce2, useAssay = "logcounts", method = "wilcox", cluster = "scranSNN_PCA") |
|
254 |
+topMarkers <- getFindMarkerTopTable(sce2, topN = 5, log2fcThreshold = 0.5, |
|
255 |
+ fdrThreshold = 0.05, minClustExprPerc = 0.5, |
|
256 |
+ maxCtrlExprPerc = 0.5, minMeanExpr = 0) |
|
257 |
+ |
|
258 |
+# Using feature index to select for genes in topMarkers list |
|
259 |
+plotSCEHeatmap(sce2,useAssay = "logcounts",rowLabel = TRUE,featureIndex = topMarkers$Gene,cluster_columns = TRUE) |
|
238 | 260 |
``` |
239 | 261 |
|
240 | 262 |
````{=html} |
... | ... |
@@ -246,9 +268,11 @@ plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = featureSubset, c |
246 | 268 |
In a more complex situation, where users might only have a set of identifiers which are not inside the row/col names (i.e. unable to directly subset the SCE object), we provide another approach. The subset, in this situation, can be accessed via specifying a vector that contains the identifiers users have, to `featureIndexBy` or `cellIndexBy`. This specification allows directly giving one column name of `rowData` or `colData`. |
247 | 269 |
|
248 | 270 |
```{R indexBy, eval=TRUE, cache=TRUE} |
249 |
-subsetFeatureName <- sample(rowData(sce)$feature_name, 50) |
|
250 |
-subsetCellBarcode <- sample(sce$cell_barcode, 50) |
|
251 |
-plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = subsetFeatureName, featureIndexBy = "feature_name", cellIndex = subsetCellBarcode, cellIndexBy = "cell_barcode") |
|
271 |
+ |
|
272 |
+list_of_FIDs<-c("ENSG00000251562","ENSG00000205542","ENSG00000177954","ENSG00000166710") |
|
273 |
+ |
|
274 |
+plotSCEHeatmap(inSCE = sce2, useAssay = "logcounts", featureIndexBy = "feature_ID", featureIndex = list_of_FIDs, cluster_rows = TRUE, cluster_columns = TRUE, rowLabel = TRUE) |
|
275 |
+ |
|
252 | 276 |
``` |
253 | 277 |
|
254 | 278 |
````{=html} |
... | ... |
@@ -260,12 +284,8 @@ plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = subsetFeatureNam |
260 | 284 |
As introduced before, we allow directly using column names of `rowData` or `colData` to attach color bar annotations. To make use of this functionality, pass a `character` vector to `rowDataName` or `colDataName`. |
261 | 285 |
|
262 | 286 |
```{R colRowAnn, eval=TRUE, cache=TRUE} |
263 |
-# Make up arbitrary annotation, |
|
264 |
-rowRandLabel <- c(rep('aa', 100), rep('bb', 100)) |
|
265 |
-rowData(sce)$randLabel <- rowRandLabel |
|
266 |
-colRandLabel <- c(rep('cc', 195), rep('dd', 195)) |
|
267 |
-colData(sce)$randLabel <- colRandLabel |
|
268 |
-plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = featureSubset, cellIndex = cellSubset, rowDataName = "randLabel", colDataName = c("type", "randLabel")) |
|
287 |
+# Creat new annotation for markers |
|
288 |
+plotSCEHeatmap(inSCE = sce2, useAssay = "logcounts", featureIndex = topMarkers$Gene, colDataName = c( "scranSNN_PCA"),rowLabel = TRUE, cluster_rows = TRUE, cluster_columns = TRUE) |
|
269 | 289 |
``` |
270 | 290 |
|
271 | 291 |
````{=html} |
... | ... |
@@ -273,12 +293,12 @@ plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = featureSubset, c |
273 | 293 |
<summary><b>Customized Annotation</b></summary> |
274 | 294 |
```` |
275 | 295 |
|
276 |
-Fully customized annotation is also supported, though it can be complexed for users. For the labeling, it is more recommanded to insert the information into `rowData` or `colData` and then make use. For coloring, information should be passed to `featureAnnotationColor` or `cellAnnotationColor`. The argument must be a `list` object with names matching the annotation classes (such as `"randLabel"` and `"type"`); each inner object under a name must be a named vector, with colors as the values and existing categories as the names. The working instance looks like this: |
|
296 |
+Fully customized annotation is also supported, though it can be complex for users. For the labeling, it is more recommended to insert the information into `rowData` or `colData` and then make use. For coloring, information should be passed to `featureAnnotationColor` or `cellAnnotationColor`. The argument must be a `list` object with names matching the annotation classes (such as `"randLabel"` and `"type"`); each inner object under a name must be a named vector, with colors as the values and existing categories as the names. The working instance looks like this: |
|
277 | 297 |
|
278 | 298 |
```{R colorEG, eval=FALSE, echo=FALSE} |
279 | 299 |
colAnnotattionColor <- list( |
280 | 300 |
sample = c(pbmc_4k = "FF4D4D"), |
281 |
- type = c(Singlet = "#4DFFFF", Doublet = "#FFC04D", EmptyDroplet = "#4D4DFF") |
|
301 |
+ type = c(Singlet = "#4DFFFF", Doublet = "#FFC04D") |
|
282 | 302 |
) |
283 | 303 |
``` |
284 | 304 |
|
... | ... |
@@ -291,7 +311,27 @@ colAnnotattionColor <- list( |
291 | 311 |
**1. Grouping/Splitting** In some cases, it might be better to do a "semi-heatmap" (i.e. split the rows/columns first and cluster them within each group) to visualize some expression pattern, such as evaluating the differential expression. For this need, use `rowSplitBy` or `colSplitBy`, and the arguments must be a `character` vector that is a subset of the specified annotation. |
292 | 312 |
|
293 | 313 |
```{R split, eval=TRUE, cache=TRUE} |
294 |
-plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = featureSubset, cellIndex = cellSubset, rowDataName = "randLabel", colDataName = c("type", "randLabel"), rowSplitBy = "randLabel", colSplitBy = "type") |
|
314 |
+ |
|
315 |
+# Create a new label in the rowData using the cluster markers |
|
316 |
+ |
|
317 |
+data.frame(rowData(sce2)) %>% |
|
318 |
+ left_join(topMarkers, by = c("feature_name" = "Gene")) %>% |
|
319 |
+ rename("cluster_markers" = "scranSNN_PCA") -> new_row_data |
|
320 |
+ |
|
321 |
+rownames(new_row_data)<-new_row_data$feature_name |
|
322 |
+ |
|
323 |
+rowData(sce2)<-new_row_data |
|
324 |
+ |
|
325 |
+plotSCEHeatmap(inSCE = sce2, useAssay = "logcounts", featureIndex = topMarkers$Gene, colDataName = c("type"), aggregateCol = "scranSNN_PCA", rowGap = grid::unit(2, 'mm'),rowLabel = TRUE, rowDataName = "cluster_markers", rowSplitBy = "cluster_markers") |
|
326 |
+ |
|
327 |
+# Adding a summary |
|
328 |
+ |
|
329 |
+data.frame(colData(sce2)) %>% |
|
330 |
+ mutate(summary_col = sample(5,n(), replace = TRUE)) -> new_col_data |
|
331 |
+ |
|
332 |
+colData(sce2)<-DataFrame(new_col_data) |
|
333 |
+ |
|
334 |
+plotSCEHeatmap(inSCE = sce2, useAssay = "logcounts", featureIndex = topMarkers$Gene, colDataName = c("type"), aggregateCol = "scranSNN_PCA", rowGap = grid::unit(2, 'mm'),rowLabel = TRUE, rowDataName = "cluster_markers", rowSplitBy = "cluster_markers", addCellSummary = "summary_col" ) |
|
295 | 335 |
``` |
296 | 336 |
|
297 | 337 |
**2. Cell/Feature Labeling** Text labels of features or cells can be added via `rowLabel` or `colLabel`. Use `TRUE` or `FALSE` to specify whether to show the `rownames` or `colnames` of the subsetted SCE object. Additionally, giving a single string of a column name of `rowData` or `colData` can enable the labeling of the annotation. Furthermore, users can directly throw a character vector to the parameter, with the same length of either the full SCE object or the subsetted. |
... | ... |
@@ -301,7 +341,7 @@ plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = featureSubset, c |
301 | 341 |
**4. Row/Column titles** The row title (`"Genes"`) and column title (`"Cells"`) can be changed or removed by passing a string or `NULL` to `rowTitle` or `colTitle`, respectively. |
302 | 342 |
|
303 | 343 |
```{R label, eval=TRUE, cache=TRUE} |
304 |
-plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = featureSubset, cellIndex = cellSubset, rowLabel = "feature_name", colLabel = seq(ncol(sce)), colDend = FALSE, rowTitle = "Downsampled features") |
|
344 |
+plotSCEHeatmap(inSCE = sce2, useAssay = "logcounts", featureIndex = topMarkers$Gene, rowGap = grid::unit(2, 'mm'),rowLabel = TRUE, rowTitle = "Markers",colTitle = "Clusters", cluster_columns = TRUE, cluster_rows = TRUE) |
|
305 | 345 |
``` |
306 | 346 |
|
307 | 347 |
There are still some parameters not mentioned here, but they are not frequently used. Please refer to `?plotSCEHeatmap` as well as `?ComplexHeatmap::Heatmap`. |