R/plotFindMarkerHeatmap.R
c9178750
 #' Plot a heatmap to visualize the result of \code{\link{runFindMarker}}
a9233cb6
 #' @description This function will first reads the result saved in
 #' \code{metadata} slot, named by \code{"findMarker"} and generated by
b1877916
 #' \code{\link{runFindMarker}}. Then it do the filtering on the statistics
a9233cb6
 #' based on the input parameters and get unique genes to plot. We choose the
 #' genes that are identified as up-regulated only. As for the genes identified
 #' as up-regulated for multiple clusters, we only keep the belonging towards the
 #' one they have the highest Log2FC value.
 #' In the heatmap, there will always be a cell annotation for the cluster
 #' labeling used when finding the markers, and a feature annotation for which
 #' cluster each gene belongs to. And by default we split the heatmap by these
 #' two annotations. Additional legends can be added and the splitting can be
 #' canceled.
 #' @param inSCE \linkS4class{SingleCellExperiment} inherited object.
 #' @param log2fcThreshold Only use DEGs with the absolute values of log2FC
 #' larger than this value. Default \code{1}
 #' @param fdrThreshold Only use DEGs with FDR value smaller than this value.
 #' Default \code{0.05}
df03c634
 #' @param minClustExprPerc A numeric scalar. The minimum cutoff of the
 #' percentage of cells in the cluster of interests that expressed the marker
 #' gene. Default \code{0.7}.
 #' @param maxCtrlExprPerc A numeric scalar. The maximum cutoff of the
 #' percentage of cells out of the cluster (control group) that expressed the
 #' marker gene. Default \code{0.4}.
 #' @param minMeanExpr A numeric scalar. The minimum cutoff of the mean
 #' expression value of the marker in the cluster of interests. Default \code{1}.
a9233cb6
 #' @param topN An integer. Only to plot this number of top markers for each
 #' cluster in maximum, in terms of log2FC value. Use \code{NULL} to cancel the
 #' top N subscription. Default \code{10}.
 #' @param orderBy The ordering method of the clusters on the splitted heatmap.
 #' Can be chosen from \code{"size"} or \code{"name"}, specified with vector of
 #' ordered unique cluster labels, or set as \code{NULL} for unsplitted heatmap.
 #' Default \code{"size"}.
 #' @param decreasing Order the cluster decreasingly. Default \code{TRUE}.
ef608c1e
 #' @param rowLabel \code{TRUE} for displaying \code{rownames} of \code{inSCE}, a
 #' \code{rowData} variable to use other feature identifiers, or a vector for
 #' customized row labels. Use \code{FALSE} for not displaying. Default
 #' \code{TRUE}.
a9233cb6
 #' @param rowDataName character. The column name(s) in \code{rowData} that need
 #' to be added to the annotation. Default \code{NULL}.
 #' @param colDataName character. The column name(s) in \code{colData} that need
 #' to be added to the annotation. Default \code{NULL}.
 #' @param featureAnnotations \code{data.frame}, with \code{rownames} containing
 #' all the features going to be plotted. Character columns should be factors.
 #' Default \code{NULL}.
 #' @param cellAnnotations \code{data.frame}, with \code{rownames} containing
 #' all the cells going to be plotted. Character columns should be factors.
 #' Default \code{NULL}.
 #' @param featureAnnotationColor A named list. Customized color settings for
 #' feature labeling. Should match the entries in the \code{featureAnnotations}
 #' or \code{rowDataName}. For each entry, there should be a list/vector of
 #' colors named with categories. Default \code{NULL}.
 #' @param cellAnnotationColor A named list. Customized color settings for
 #' cell labeling. Should match the entries in the \code{cellAnnotations} or
 #' \code{colDataName}. For each entry, there should be a list/vector of colors
 #' named with categories. Default \code{NULL}.
 #' @param colSplitBy character vector. Do semi-heatmap based on the grouping of
 #' this(these) annotation(s). Should exist in either \code{colDataName} or
 #' \code{names(cellAnnotations)}. Default is the value of \code{cluster} in
c9178750
 #' \code{\link{runFindMarker}} when \code{orderBy} is not \code{NULL}, or
a9233cb6
 #' \code{NULL} otherwise.
 #' @param rowSplitBy character vector. Do semi-heatmap based on the grouping of
 #' this(these) annotation(s). Should exist in either \code{rowDataName} or
 #' \code{names(featureAnnotations)}. Default \code{"marker"}, which indicates an
 #' auto generated annotation for this plot.
f0f8cb90
 #' @param rowDend Whether to display row dendrogram. Default \code{FALSE}.
 #' @param colDend Whether to display column dendrogram. Default \code{FALSE}.
cd268d2f
 #' @param title Text of the title, at the top of the heatmap. Default
 #' \code{"Top Marker Heatmap"}.
a9233cb6
 #' @param ... Other arguments passed to \code{\link{plotSCEHeatmap}}.
 #' @return A \code{\link[ComplexHeatmap]{Heatmap}} object
c9178750
 #' @seealso \code{\link{runFindMarker}}, \code{\link{getFindMarkerTopTable}}
a9233cb6
 #' @author Yichen Wang
 #' @export
930ec858
 #' @examples
 #' data("sceBatches")
b1877916
 #' logcounts(sceBatches) <- log1p(counts(sceBatches))
930ec858
 #' sce.w <- subsetSCECols(sceBatches, colData = "batch == 'w'")
b1877916
 #' sce.w <- runFindMarker(sce.w, method = "wilcox", cluster = "cell_type")
 #' plotFindMarkerHeatmap(sce.w)
 plotFindMarkerHeatmap <- function(inSCE, orderBy = 'size',
                               log2fcThreshold = 1, fdrThreshold = 0.05,
                               minClustExprPerc = 0.7, maxCtrlExprPerc = 0.4,
2599ca7f
                               minMeanExpr = 1, topN = 10, decreasing = TRUE,
84664fd7
                               rowLabel = TRUE,
b1877916
                               rowDataName = NULL, colDataName = NULL,
                               featureAnnotations = NULL, cellAnnotations = NULL,
2599ca7f
                               featureAnnotationColor = NULL,
                               cellAnnotationColor = NULL,
                               colSplitBy = NULL,
b1877916
                               rowSplitBy = "marker", rowDend = FALSE,
2599ca7f
                               colDend = FALSE,
c9178750
                               title = "Top Marker Heatmap", ...) {
   if (!inherits(inSCE, 'SingleCellExperiment')) {
2599ca7f
     stop('"inSCE" should be a SingleCellExperiment inherited Object.')
   }
c9178750
   if (!'findMarker' %in% names(S4Vectors::metadata(inSCE))) {
2599ca7f
     stop('"findMarker" result not found in metadata. ',
c9178750
          'Run runFindMarker() before plotting.')
2599ca7f
   }
b1877916
   colSplitBy <-
2599ca7f
     ifelse(is.null(orderBy), NULL, colnames(metadata(inSCE)$findMarker)[5])
b1877916
 
c9178750
   if (!is.null(orderBy)) {
     if (length(orderBy) == 1) {
       if (!orderBy %in% c('size', 'name')) {
2599ca7f
         stop('Single charater setting for "orderBy" should be chosen',
              'from "size" or "name".')
a9233cb6
       }
2599ca7f
     }# else if(any(!SummarizedExperiment::colData(inSCE)[[cluster]] %in%
     #             orderBy)){
     #   stop('Invalid "orderBy", please input a vector of unique ordered ',
     #        'cluster identifiers that match all clusters in ',
     #        'colData(inSCE) specified by "cluster" to adjust the order ',
     #        'of clusters.')
     #}
   }
   # Extract and basic filter
   degFull <- S4Vectors::metadata(inSCE)$findMarker
c9178750
   if (!all(c("Gene", "Pvalue", "Log2_FC", "FDR") %in%
           colnames(degFull)[seq_len(4)])) {
2599ca7f
     stop('"findMarker" result cannot be interpreted properly')
   }
84664fd7
   clusterVar <- attr(degFull, "cluster")
   clusterName <- attr(degFull, "clusterName")
2599ca7f
   #if(length(which(!degFull$Gene %in% rownames(inSCE))) > 0){
   # Remove genes happen in deg table but not in sce. Weird.
   # degFull <- degFull[-which(!degFull$Gene %in% rownames(inSCE)),]
   #}
84664fd7
   if (!is.null(log2fcThreshold)) {
2599ca7f
     degFull <- degFull[degFull$Log2_FC > log2fcThreshold,]
   }
84664fd7
   if (!is.null(fdrThreshold)) {
2599ca7f
     degFull <- degFull[degFull$FDR < fdrThreshold,]
   }
   if (!is.null(minClustExprPerc)) {
     degFull <- degFull[degFull$clusterExprPerc >= minClustExprPerc,]
   }
   if (!is.null(maxCtrlExprPerc)) {
     degFull <- degFull[degFull$ControlExprPerc <= maxCtrlExprPerc,]
   }
   if (!is.null(minMeanExpr)) {
     degFull <- degFull[degFull$clusterAveExpr >= minMeanExpr,]
   }
   # Remove duplicate by assigning the duplicated genes to the cluster where
   # their log2FC is the highest.
   # Done by keeping all unique genes and the rows  with highest Log2FC entry
   # for each duplicated gene.
   dup.gene <- unique(degFull$Gene[duplicated(degFull$Gene)])
c9178750
   for (g in dup.gene) {
2599ca7f
     deg.gix <- degFull$Gene == g
     deg.gtable <- degFull[deg.gix,]
     toKeep <- which.max(deg.gtable$Log2_FC)
     toRemove <- which(deg.gix)[-toKeep]
     degFull <- degFull[-toRemove,]
   }
   selected <- character()
   if (!is.null(topN)) {
     for (c in unique(degFull[[clusterName]])) {
       deg.cluster <- degFull[degFull[[clusterName]] == c,]
       deg.cluster <- deg.cluster[order(deg.cluster$Log2_FC,
                                        decreasing = TRUE),]
       if (dim(deg.cluster)[1] > topN) {
         deg.cluster <- deg.cluster[seq_len(topN),]
       }
       selected <- c(selected, deg.cluster$Gene)
a9233cb6
     }
2599ca7f
   } else {
     selected <- degFull$Gene
   }
   degFull <- degFull[degFull$Gene %in% selected,]
b1877916
 
2599ca7f
   if (!is.null(attr(degFull, "useReducedDim"))) {
     mat <- t(expData(inSCE, attr(degFull, "useReducedDim")))
     assayList <- list(mat)
     names(assayList) <- attr(degFull, "useReducedDim")
     tmpSCE <- SingleCellExperiment::SingleCellExperiment(assays = assayList)
b1877916
     SummarizedExperiment::colData(tmpSCE) <-
2599ca7f
       SummarizedExperiment::colData(inSCE)
     assayName <- attr(degFull, "useReducedDim")
b1877916
 
2599ca7f
   } else {
     tmpSCE <- inSCE
     assayName <- attr(degFull, "useAssay")
b1877916
 
2599ca7f
   }
b1877916
 
2599ca7f
   inSCE <- tmpSCE[degFull$Gene,]
84664fd7
   z <- clusterVar
c9178750
   if (is.factor(z)) {
2599ca7f
     z.order <- levels(z)
     # When 'z' is a subset factor, there would be redundant levels.
     z.order <- z.order[z.order %in% as.vector(unique(z))]
     z <- factor(z, levels = z.order)
     SummarizedExperiment::rowData(inSCE)[[clusterName]] <-
       factor(degFull[[clusterName]], levels = z.order)
   } else {
     SummarizedExperiment::rowData(inSCE)[[clusterName]] <-
       degFull[[clusterName]]
   }
c9178750
   if (!is.null(orderBy)) {
     if (length(orderBy) == 1 && orderBy == 'size') {
2599ca7f
       z.order <- names(table(z)[order(table(z), decreasing = decreasing)])
c9178750
     } else if (length(orderBy) == 1 && orderBy == 'name') {
       if (is.factor(z)) {
2599ca7f
         z.order <- sort(levels(z), decreasing = decreasing)
       } else {
         z.order <- sort(unique(z), decreasing = decreasing)
       }
a9233cb6
     } else {
2599ca7f
       z.order <- orderBy[-which(!orderBy %in% z)]
a9233cb6
     }
2599ca7f
   }
   SummarizedExperiment::colData(inSCE)[[clusterName]] <-
     factor(z, levels = z.order)
b1877916
 
 
2599ca7f
   y <- SummarizedExperiment::rowData(inSCE)[[clusterName]]
b1877916
   SummarizedExperiment::rowData(inSCE)[["marker"]] <-
2599ca7f
     factor(y, levels = z.order)
84664fd7
 
   #markerConsistColor <-
   #  list(marker = dataAnnotationColor(inSCE, 'col')[[clusterName]])
b1877916
 
2599ca7f
   # Organize plotSCEHeatmap arguments
   colDataName <- c(colDataName, clusterName)
   rowDataName <- c(rowDataName, "marker")
b1877916
 
84664fd7
   #featureAnnotationColor <- c(featureAnnotationColor, markerConsistColor)
b1877916
 
ef608c1e
   hm <- plotSCEHeatmap(inSCE, useAssay = assayName, colDataName = colDataName,
2599ca7f
                        rowDataName = rowDataName, colSplitBy = colSplitBy,
                        rowSplitBy = rowSplitBy,
                        featureAnnotations = featureAnnotations,
                        cellAnnotations = cellAnnotations,
                        featureAnnotationColor = featureAnnotationColor,
fb78e682
                        cellAnnotationColor = cellAnnotationColor, rowLabel = rowLabel,
2599ca7f
                        rowDend = rowDend, colDend = colDend, title = title, ...)
   return(hm)
a9233cb6
 }
2599ca7f
 
b1877916
 #' @rdname plotFindMarkerHeatmap
 #' @export
84664fd7
 plotMarkerDiffExp <- function(inSCE, orderBy = 'size',
                               log2fcThreshold = 1, fdrThreshold = 0.05,
                               minClustExprPerc = 0.7, maxCtrlExprPerc = 0.4,
                               minMeanExpr = 1, topN = 10, decreasing = TRUE,
                               rowDataName = NULL, colDataName = NULL,
                               featureAnnotations = NULL, cellAnnotations = NULL,
                               featureAnnotationColor = NULL,
                               cellAnnotationColor = NULL,
                               colSplitBy = NULL,
                               rowSplitBy = "marker", rowDend = FALSE,
                               colDend = FALSE,
                               title = "Top Marker Heatmap", ...) {
b1877916
   .Deprecated("plotFindMarkerHeatmap")
84664fd7
   plotFindMarkerHeatmap(inSCE = inSCE, orderBy = orderBy,
                         log2fcThreshold = log2fcThreshold,
                         fdrThreshold = fdrThreshold,
                         minClustExprPerc = minClustExprPerc,
                         maxCtrlExprPerc = maxCtrlExprPerc,
                         minMeanExpr = minMeanExpr, topN = topN,
                         decreasing = decreasing,
                         rowDataName = rowDataName, colDataName = colDataName,
                         featureAnnotations = featureAnnotations,
                         cellAnnotations = cellAnnotations,
                         featureAnnotationColor = featureAnnotationColor,
                         cellAnnotationColor = cellAnnotationColor,
                         colSplitBy = colSplitBy,
                         rowSplitBy = rowSplitBy, rowDend = rowDend,
                         colDend = colDend, title = title, ...)
b1877916
 }