Bioconductor Code: singleCellTK

Browse code

Merge pull request #31 from compbiomed/devel

Devel

mingl1997 authored on 28/10/2024 16:00:42 • GitHub committed on 28/10/2024 16:00:42
Showing 4 changed files

NAMESPACE index e34b6d0f..c069ce18 100644
R/plotSCEHeatmap.R index 05358967..ee78d12f 100644
man/plotSCEHeatmap.Rd index ea46597d..f62dba28 100644
vignettes/articles/heatmap.Rmd index 8eed173c..fe018f2e 100644

History View file @ 87a7fb08

@@ -271,6 +271,7 @@ import(GSVAdata)
                      import(SingleCellExperiment)
                      import(eds)
                      importFrom(BiocParallel,SerialParam)
                     +importFrom(ComplexHeatmap,anno_barplot)
                      importFrom(S4Vectors,"metadata<-")
                      importFrom(S4Vectors,metadata)
                      importFrom(SingleCellExperiment,"counts<-")
@@ -317,8 +318,11 @@ importFrom(stats,prcomp)
                      importFrom(stats,quantile)
                      importFrom(stringr,str_c)
                      importFrom(stringr,str_replace_all)
                     +importFrom(tibble,column_to_rownames)
                     +importFrom(tibble,remove_rownames)
                      importFrom(tibble,tibble)
                      importFrom(tidyr,spread)
                     +importFrom(tidyr,unite)
                      importFrom(tools,file_ext)
                      importFrom(utils,head)
                      importFrom(utils,packageVersion)

R/plotSCEHeatmap.R

History View file @ 87a7fb08

@@ -25,6 +25,11 @@
                      #' @param cellIndexBy A single character specifying a column name of
                      #' \code{colData(inSCE)}, or a vector of the same length as \code{ncol(inSCE)},
                      #' where we search for the non-rowname cell indices. Default \code{"rownames"}.
                     +#' @param cluster_columns A logical scalar that turns on/off
                     +#' clustering of columns. Default \code{FALSE}. Clustering columns should be turned off when using reduced dim
                     +#' for plotting as it will be sorted by PCs
                     +#' @param cluster_rows A logical scalar that turns on/off clustering of rows.
                     +#' Default \code{FALSE}.
                      #' @param rowDataName character. The column name(s) in \code{rowData} that need
                      #' to be added to the annotation. Not applicable for
                      #' \code{plotSCEDimReduceHeatmap}. Default \code{NULL}.
@@ -103,7 +108,8 @@
                      #' @importFrom stringr str_replace_all str_c
                      #' @importFrom stats prcomp quantile
                      #' @importFrom dplyr select arrange group_by count ungroup mutate one_of desc
                     -#' @importFrom tidyr spread unite column_to_rownames remove_rownames
                     +#' @importFrom tidyr spread unite
                     +#' @importFrom tibble column_to_rownames remove_rownames
                      #' @importFrom grid gpar
                      #' @importFrom ComplexHeatmap anno_barplot
                      #' @importFrom rlang .data
@@ -113,6 +119,8 @@ plotSCEHeatmap <- function(inSCE, useAssay = 'logcounts', useReducedDim = NULL,
                                                 scale = TRUE, trim = c(-2,2),
                                                 featureIndexBy = 'rownames',
                                                 cellIndexBy = 'rownames',
                     +                           cluster_columns = FALSE,
                     +                           cluster_rows = FALSE,
                                                 rowDataName = NULL, colDataName = NULL,
                                                 aggregateRow = NULL, aggregateCol = NULL,
                                                 featureAnnotations = NULL, cellAnnotations = NULL,
@@ -282,8 +290,8 @@ plotSCEHeatmap <- function(inSCE, useAssay = 'logcounts', useReducedDim = NULL,
                          temp_df<-as.data.frame(colData(SCE)[,c(aggregateCol),drop=FALSE]) %>%
                            unite("new_colnames",1:ncol(.),sep = "_",remove = FALSE) %>%
                            remove_rownames() %>%
                     -      mutate(aggregated_column = new_colnames) %>%
                     -      dplyr::select(new_colnames, aggregated_column) %>%
                     +    #  mutate(aggregated_column = new_colnames) %>%
                     +    #  dplyr::select(new_colnames, aggregated_column) %>%
                            column_to_rownames("new_colnames")
                          colData(SCE)<-DataFrame(temp_df)
@@ -446,7 +454,8 @@ plotSCEHeatmap <- function(inSCE, useAssay = 'logcounts', useReducedDim = NULL,
                                                      show_row_dend = rowDend,
                                                      show_column_dend = colDend,
                                                      row_dend_reorder = TRUE,
                     -                                cluster_columns = FALSE,
                     +                                cluster_columns = cluster_columns,
                     +                                cluster_rows = cluster_rows,
                                                      show_column_names = colLabel,
                                                      column_names_gp = grid::gpar(fontsize = colLabelSize),
                                                      row_gap = rowGap, column_gap = colGap,

man/plotSCEHeatmap.Rd

History View file @ 87a7fb08

@@ -15,6 +15,8 @@ plotSCEHeatmap(
                        trim = c(-2, 2),
                        featureIndexBy = "rownames",
                        cellIndexBy = "rownames",
                     +  cluster_columns = FALSE,
                     +  cluster_rows = FALSE,
                        rowDataName = NULL,
                        colDataName = NULL,
                        aggregateRow = NULL,
@@ -65,8 +67,8 @@ another feature list indicated by \code{featureIndexBy}. Default \code{NULL}.}
                      (cells). Alternatively, it can be a vector identifying cells in another
                      cell list indicated by \code{featureIndexBy}. Default \code{NULL}.}
                     -\item{scale}{Whether to perform z-score scaling on each row. Default
                     -\code{TRUE}.}
                     +\item{scale}{Whether to perform z-score or min-max scaling on each row.Choose from \code{"zscore"}, \code{"min-max"} or default
                     +\code{TRUE} or \code{FALSE}}
                      \item{trim}{A 2-element numeric vector. Values outside of this range will be
                      trimmed to their nearst bound. Default \code{c(-2, 2)}}
@@ -80,6 +82,13 @@ where we search for the non-rowname feature indices. Not applicable for
                      \code{colData(inSCE)}, or a vector of the same length as \code{ncol(inSCE)},
                      where we search for the non-rowname cell indices. Default \code{"rownames"}.}
                     +\item{cluster_columns}{A logical scalar that turns on/off
                     +clustering of columns. Default \code{FALSE}. Clustering columns should be turned off when using reduced dim
                     +for plotting as it will be sorted by PCs}
+                    +
                     +\item{cluster_rows}{A logical scalar that turns on/off clustering of rows.
                     +Default \code{FALSE}.}
+                    +
                      \item{rowDataName}{character. The column name(s) in \code{rowData} that need
                      to be added to the annotation. Not applicable for
                      \code{plotSCEDimReduceHeatmap}. Default \code{NULL}.}

vignettes/articles/heatmap.Rmd

History View file @ 87a7fb08

@@ -207,34 +207,56 @@ Other heatmap settings will also be automatically filled for a DE specific heatm
                      <div id="console" class="tabcontent">
                      ````
                     -To present the usage of `plotSCEHeatmap()`, we would like to use a small example provided with SCTK.
                     +To present the usage of `plotSCEHeatmap()`, we would like to use a small example provided with SCTK.
+                    +
                     +**"Raw" plotting**
+                    +
                     +The minimum setting for `plotSCEHeatmap()` is the input SCE object and the data matrix to plot (default `"logcounts"`). In this way, all cells and features will be presented while no annotation or legend (except the main color scheme) will be shown.
                      ```{R setup, eval=TRUE, message=FALSE, cache=TRUE}
                      library(singleCellTK)
                      data("scExample") # This imports SCE object "sce"
                      sce
                     -```
                     -**"Raw" plotting**
                     +# QC - Remove empty droplets
                     +sce2<-subsetSCECols(sce, colData = c("type != 'EmptyDroplet'"))
                     -The minimum setting for `plotSCEHeatmap()` is the input SCE object and the data matrix to plot (default `"logcounts"`). In this way, all cells and features will be presented while no annotation or legend (except the main color scheme) will be shown.
                     +# Normalize the counts
                     +sce2<-runNormalization(sce2, useAssay = "counts", outAssayName = "logcounts",
                     +                        normalizationMethod = "logNormCounts",scale = TRUE)
                     -```{R hmFull, eval=TRUE, cache=TRUE}
                     -plotSCEHeatmap(sce, useAssay = "counts")
                     +# plot the data
                     +plotSCEHeatmap(sce2,useAssay = "logcounts",cluster_rows = TRUE, cluster_columns = TRUE)
                      ```
                      **Subsetting**
                      SCTK allows relatively flexible approaches to select the cells/features to plot.
                     -The basic way to subset the heatmap is to directly use an index vector that can subset the input SCE object to `featureIndex` and `cellIndex`, including `numeric`, and `logical` vectors, which are widely used, and `character` vector containing the row/col names. Of course, user can directly use a subsetted SCE object as input.
                     +The basic way to subset the heatmap is to directly use an index vector that can subset the input SCE object to `featureIndex` and `cellIndex`, including `numeric`, and `logical` vectors, which are widely used, and `character` vector containing the row/col names. Of course, user can directly use a subsetted SCE object as input.  First let's run a simple clustering workflow to identify clusters and find DE genes for each cluster. We can subset the heatmap using this list of DE genes
+                    +
                     +```{R idxSubset, eval=TRUE, cache=TRUE, message=FALSE,warnings=FALSE, echo=FALSE}
+                    +
                     +# Run Clustering workflow
                     +set.seed(348389)
                     +sce2 <- runFeatureSelection(sce2, useAssay = "counts")
                     +sce2 <- setTopHVG(sce2, featureSubsetName = "hvf")
                     +sce2 <- runDimReduce(sce2, useAssay = "logcounts", useFeatureSubset = "hvf", scale = TRUE, reducedDimName = "PCA")
                     +sce2 <- runDimReduce(sce2, method = "scaterUMAP", useReducedDim = "PCA", reducedDimName = "UMAP", nComponents = 10)
                     +sce2 <- runScranSNN(inSCE = sce2, useReducedDim = "PCA", nComp = 10, clusterName = "scranSNN_PCA")
                     -```{R idxSubset, eval=TRUE, cache=TRUE}
                     -# Make up random downsampling numeric vector
                     -featureSubset <- sample(nrow(sce), 50)
                     -cellSubset <- sample(ncol(sce), 50)
                     +# set gene ID as rownames
                     +sce2<-setRowNames(sce2,"feature_name")
                     -plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = featureSubset, cellIndex = cellSubset)
+                    +
                     +# Find markers for each cluster
                     +sce2 <- runFindMarker(sce2, useAssay = "logcounts", method = "wilcox", cluster = "scranSNN_PCA")
                     +topMarkers <- getFindMarkerTopTable(sce2, topN = 5, log2fcThreshold = 0.5,
                     +                                    fdrThreshold = 0.05, minClustExprPerc = 0.5,
                     +                                    maxCtrlExprPerc = 0.5, minMeanExpr = 0)
+                    +
                     +# Using feature index to select for genes in topMarkers list
                     +plotSCEHeatmap(sce2,useAssay = "logcounts",rowLabel = TRUE,featureIndex = topMarkers$Gene,cluster_columns = TRUE)
                      ```
                      ````{=html}
@@ -246,9 +268,11 @@ plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = featureSubset, c
                      In a more complex situation, where users might only have a set of identifiers which are not inside the row/col names (i.e. unable to directly subset the SCE object), we provide another approach. The subset, in this situation, can be accessed via specifying a vector that contains the identifiers users have, to `featureIndexBy` or `cellIndexBy`. This specification allows directly giving one column name of `rowData` or `colData`.
                      ```{R indexBy, eval=TRUE, cache=TRUE}
                     -subsetFeatureName <- sample(rowData(sce)$feature_name, 50)
                     -subsetCellBarcode <- sample(sce$cell_barcode, 50)
                     -plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = subsetFeatureName, featureIndexBy = "feature_name", cellIndex = subsetCellBarcode, cellIndexBy = "cell_barcode")
+                    +
                     +list_of_FIDs<-c("ENSG00000251562","ENSG00000205542","ENSG00000177954","ENSG00000166710")
+                    +
                     +plotSCEHeatmap(inSCE = sce2, useAssay = "logcounts", featureIndexBy = "feature_ID",  featureIndex = list_of_FIDs, cluster_rows = TRUE, cluster_columns = TRUE, rowLabel = TRUE)
+                    +
                      ```
                      ````{=html}
@@ -260,12 +284,8 @@ plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = subsetFeatureNam
                      As introduced before, we allow directly using column names of `rowData` or `colData` to attach color bar annotations. To make use of this functionality, pass a `character` vector to `rowDataName` or `colDataName`.
                      ```{R colRowAnn, eval=TRUE, cache=TRUE}
                     -# Make up arbitrary annotation,
                     -rowRandLabel <- c(rep('aa', 100), rep('bb', 100))
                     -rowData(sce)$randLabel <- rowRandLabel
                     -colRandLabel <- c(rep('cc', 195), rep('dd', 195))
                     -colData(sce)$randLabel <- colRandLabel
                     -plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = featureSubset, cellIndex = cellSubset, rowDataName = "randLabel", colDataName = c("type", "randLabel"))
                     +# Creat new annotation for markers
                     +plotSCEHeatmap(inSCE = sce2, useAssay = "logcounts", featureIndex = topMarkers$Gene, colDataName = c( "scranSNN_PCA"),rowLabel = TRUE, cluster_rows = TRUE, cluster_columns = TRUE)
                      ```
                      ````{=html}
@@ -273,12 +293,12 @@ plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = featureSubset, c
                        <summary><b>Customized Annotation</b></summary>
                      ````
                     -Fully customized annotation is also supported, though it can be complexed for users. For the labeling, it is more recommanded to insert the information into `rowData` or `colData` and then make use. For coloring, information should be passed to `featureAnnotationColor` or `cellAnnotationColor`. The argument must be a `list` object with names matching the annotation classes (such as `"randLabel"` and `"type"`); each inner object under a name must be a named vector, with colors as the values and existing categories as the names. The working instance looks like this:
                     +Fully customized annotation is also supported, though it can be complex for users. For the labeling, it is more recommended to insert the information into `rowData` or `colData` and then make use. For coloring, information should be passed to `featureAnnotationColor` or `cellAnnotationColor`. The argument must be a `list` object with names matching the annotation classes (such as `"randLabel"` and `"type"`); each inner object under a name must be a named vector, with colors as the values and existing categories as the names. The working instance looks like this:
                      ```{R colorEG, eval=FALSE, echo=FALSE}
                      colAnnotattionColor <- list(
                        sample = c(pbmc_4k = "FF4D4D"),
                     -  type = c(Singlet = "#4DFFFF", Doublet = "#FFC04D", EmptyDroplet = "#4D4DFF")
                     +  type = c(Singlet = "#4DFFFF", Doublet = "#FFC04D")
+                     )
                      ```
@@ -291,7 +311,27 @@ colAnnotattionColor <- list(
                      **1. Grouping/Splitting** In some cases, it might be better to do a "semi-heatmap" (i.e. split the rows/columns first and cluster them within each group) to visualize some expression pattern, such as evaluating the differential expression. For this need, use `rowSplitBy` or `colSplitBy`, and the arguments must be a `character` vector that is a subset of the specified annotation.
                      ```{R split, eval=TRUE, cache=TRUE}
                     -plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = featureSubset, cellIndex = cellSubset, rowDataName = "randLabel", colDataName = c("type", "randLabel"), rowSplitBy = "randLabel", colSplitBy = "type")
+                    +
                     +# Create a new label in the rowData using the cluster markers
+                    +
                     +data.frame(rowData(sce2)) %>%
                     +  left_join(topMarkers, by = c("feature_name" = "Gene")) %>%
                     +  rename("cluster_markers" = "scranSNN_PCA") -> new_row_data
+                    +
                     +rownames(new_row_data)<-new_row_data$feature_name
+                    +
                     +rowData(sce2)<-new_row_data
+                    +
                     +plotSCEHeatmap(inSCE = sce2, useAssay = "logcounts", featureIndex = topMarkers$Gene, colDataName = c("type"), aggregateCol = "scranSNN_PCA", rowGap = grid::unit(2, 'mm'),rowLabel = TRUE, rowDataName = "cluster_markers", rowSplitBy = "cluster_markers")
+                    +
                     +# Adding a summary
+                    +
                     +data.frame(colData(sce2)) %>%
                     +  mutate(summary_col = sample(5,n(), replace = TRUE)) -> new_col_data
+                    +
                     +colData(sce2)<-DataFrame(new_col_data)
+                    +
                     +plotSCEHeatmap(inSCE = sce2, useAssay = "logcounts", featureIndex = topMarkers$Gene, colDataName = c("type"), aggregateCol = "scranSNN_PCA", rowGap = grid::unit(2, 'mm'),rowLabel = TRUE, rowDataName = "cluster_markers", rowSplitBy = "cluster_markers", addCellSummary = "summary_col" )
                      ```
                      **2. Cell/Feature Labeling** Text labels of features or cells can be added via `rowLabel` or `colLabel`. Use `TRUE` or `FALSE` to specify whether to show the `rownames` or `colnames` of the subsetted SCE object. Additionally, giving a single string of a column name of `rowData` or `colData` can enable the labeling of the annotation. Furthermore, users can directly throw a character vector to the parameter, with the same length of either the full SCE object or the subsetted.
@@ -301,7 +341,7 @@ plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = featureSubset, c
                      **4. Row/Column titles** The row title (`"Genes"`) and column title (`"Cells"`) can be changed or removed by passing a string or `NULL` to `rowTitle` or `colTitle`, respectively.
                      ```{R label, eval=TRUE, cache=TRUE}
                     -plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = featureSubset, cellIndex = cellSubset, rowLabel = "feature_name", colLabel = seq(ncol(sce)), colDend = FALSE, rowTitle = "Downsampled features")
                     +plotSCEHeatmap(inSCE = sce2, useAssay = "logcounts", featureIndex = topMarkers$Gene, rowGap = grid::unit(2, 'mm'),rowLabel = TRUE,  rowTitle = "Markers",colTitle = "Clusters", cluster_columns = TRUE, cluster_rows = TRUE)
                      ```
                      There are still some parameters not mentioned here, but they are not frequently used. Please refer to `?plotSCEHeatmap` as well as `?ComplexHeatmap::Heatmap`.