#' Plot a heatmap to visualize the result of \code{\link{runFindMarker}} #' @description This function will first reads the result saved in #' \code{metadata} slot, named by \code{"findMarker"} and generated by #' \code{\link{runFindMarker}}. Then it do the filtering on the statistics #' based on the input parameters and get unique genes to plot. We choose the #' genes that are identified as up-regulated only. As for the genes identified #' as up-regulated for multiple clusters, we only keep the belonging towards the #' one they have the highest Log2FC value. #' In the heatmap, there will always be a cell annotation for the cluster #' labeling used when finding the markers, and a feature annotation for which #' cluster each gene belongs to. And by default we split the heatmap by these #' two annotations. Additional legends can be added and the splitting can be #' canceled. #' @param inSCE \linkS4class{SingleCellExperiment} inherited object. #' @param log2fcThreshold Only use DEGs with the absolute values of log2FC #' larger than this value. Default \code{1} #' @param fdrThreshold Only use DEGs with FDR value smaller than this value. #' Default \code{0.05} #' @param minClustExprPerc A numeric scalar. The minimum cutoff of the #' percentage of cells in the cluster of interests that expressed the marker #' gene. Default \code{0.7}. #' @param maxCtrlExprPerc A numeric scalar. The maximum cutoff of the #' percentage of cells out of the cluster (control group) that expressed the #' marker gene. Default \code{0.4}. #' @param minMeanExpr A numeric scalar. The minimum cutoff of the mean #' expression value of the marker in the cluster of interests. Default \code{1}. #' @param topN An integer. Only to plot this number of top markers for each #' cluster in maximum, in terms of log2FC value. Use \code{NULL} to cancel the #' top N subscription. Default \code{10}. #' @param orderBy The ordering method of the clusters on the splitted heatmap. #' Can be chosen from \code{"size"} or \code{"name"}, specified with vector of #' ordered unique cluster labels, or set as \code{NULL} for unsplitted heatmap. #' Default \code{"size"}. #' @param decreasing Order the cluster decreasingly. Default \code{TRUE}. #' @param rowLabel \code{TRUE} for displaying \code{rownames} of \code{inSCE}, a #' \code{rowData} variable to use other feature identifiers, or a vector for #' customized row labels. Use \code{FALSE} for not displaying. Default #' \code{TRUE}. #' @param rowDataName character. The column name(s) in \code{rowData} that need #' to be added to the annotation. Default \code{NULL}. #' @param colDataName character. The column name(s) in \code{colData} that need #' to be added to the annotation. Default \code{NULL}. #' @param featureAnnotations \code{data.frame}, with \code{rownames} containing #' all the features going to be plotted. Character columns should be factors. #' Default \code{NULL}. #' @param cellAnnotations \code{data.frame}, with \code{rownames} containing #' all the cells going to be plotted. Character columns should be factors. #' Default \code{NULL}. #' @param featureAnnotationColor A named list. Customized color settings for #' feature labeling. Should match the entries in the \code{featureAnnotations} #' or \code{rowDataName}. For each entry, there should be a list/vector of #' colors named with categories. Default \code{NULL}. #' @param cellAnnotationColor A named list. Customized color settings for #' cell labeling. Should match the entries in the \code{cellAnnotations} or #' \code{colDataName}. For each entry, there should be a list/vector of colors #' named with categories. Default \code{NULL}. #' @param colSplitBy character vector. Do semi-heatmap based on the grouping of #' this(these) annotation(s). Should exist in either \code{colDataName} or #' \code{names(cellAnnotations)}. Default is the value of \code{cluster} in #' \code{\link{runFindMarker}} when \code{orderBy} is not \code{NULL}, or #' \code{NULL} otherwise. #' @param rowSplitBy character vector. Do semi-heatmap based on the grouping of #' this(these) annotation(s). Should exist in either \code{rowDataName} or #' \code{names(featureAnnotations)}. Default \code{"marker"}, which indicates an #' auto generated annotation for this plot. #' @param rowDend Whether to display row dendrogram. Default \code{FALSE}. #' @param colDend Whether to display column dendrogram. Default \code{FALSE}. #' @param title Text of the title, at the top of the heatmap. Default #' \code{"Top Marker Heatmap"}. #' @param ... Other arguments passed to \code{\link{plotSCEHeatmap}}. #' @return A \code{\link[ComplexHeatmap]{Heatmap}} object #' @seealso \code{\link{runFindMarker}}, \code{\link{getFindMarkerTopTable}} #' @author Yichen Wang #' @export #' @examples #' data("sceBatches") #' logcounts(sceBatches) <- log1p(counts(sceBatches)) #' sce.w <- subsetSCECols(sceBatches, colData = "batch == 'w'") #' sce.w <- runFindMarker(sce.w, method = "wilcox", cluster = "cell_type") #' plotFindMarkerHeatmap(sce.w) plotFindMarkerHeatmap <- function(inSCE, orderBy = 'size', log2fcThreshold = 1, fdrThreshold = 0.05, minClustExprPerc = 0.7, maxCtrlExprPerc = 0.4, minMeanExpr = 1, topN = 10, decreasing = TRUE, rowLabel = TRUE, rowDataName = NULL, colDataName = NULL, featureAnnotations = NULL, cellAnnotations = NULL, featureAnnotationColor = NULL, cellAnnotationColor = NULL, colSplitBy = NULL, rowSplitBy = "marker", rowDend = FALSE, colDend = FALSE, title = "Top Marker Heatmap", ...) { if (!inherits(inSCE, 'SingleCellExperiment')) { stop('"inSCE" should be a SingleCellExperiment inherited Object.') } if (!'findMarker' %in% names(S4Vectors::metadata(inSCE))) { stop('"findMarker" result not found in metadata. ', 'Run runFindMarker() before plotting.') } colSplitBy <- ifelse(is.null(orderBy), NULL, colnames(metadata(inSCE)$findMarker)[5]) if (!is.null(orderBy)) { if (length(orderBy) == 1) { if (!orderBy %in% c('size', 'name')) { stop('Single charater setting for "orderBy" should be chosen', 'from "size" or "name".') } }# else if(any(!SummarizedExperiment::colData(inSCE)[[cluster]] %in% # orderBy)){ # stop('Invalid "orderBy", please input a vector of unique ordered ', # 'cluster identifiers that match all clusters in ', # 'colData(inSCE) specified by "cluster" to adjust the order ', # 'of clusters.') #} } # Extract and basic filter degFull <- S4Vectors::metadata(inSCE)$findMarker if (!all(c("Gene", "Pvalue", "Log2_FC", "FDR") %in% colnames(degFull)[seq_len(4)])) { stop('"findMarker" result cannot be interpreted properly') } clusterVar <- attr(degFull, "cluster") clusterName <- attr(degFull, "clusterName") #if(length(which(!degFull$Gene %in% rownames(inSCE))) > 0){ # Remove genes happen in deg table but not in sce. Weird. # degFull <- degFull[-which(!degFull$Gene %in% rownames(inSCE)),] #} if (!is.null(log2fcThreshold)) { degFull <- degFull[degFull$Log2_FC > log2fcThreshold,] } if (!is.null(fdrThreshold)) { degFull <- degFull[degFull$FDR < fdrThreshold,] } if (!is.null(minClustExprPerc)) { degFull <- degFull[degFull$clusterExprPerc >= minClustExprPerc,] } if (!is.null(maxCtrlExprPerc)) { degFull <- degFull[degFull$ControlExprPerc <= maxCtrlExprPerc,] } if (!is.null(minMeanExpr)) { degFull <- degFull[degFull$clusterAveExpr >= minMeanExpr,] } # Remove duplicate by assigning the duplicated genes to the cluster where # their log2FC is the highest. # Done by keeping all unique genes and the rows with highest Log2FC entry # for each duplicated gene. dup.gene <- unique(degFull$Gene[duplicated(degFull$Gene)]) for (g in dup.gene) { deg.gix <- degFull$Gene == g deg.gtable <- degFull[deg.gix,] toKeep <- which.max(deg.gtable$Log2_FC) toRemove <- which(deg.gix)[-toKeep] degFull <- degFull[-toRemove,] } selected <- character() if (!is.null(topN)) { for (c in unique(degFull[[clusterName]])) { deg.cluster <- degFull[degFull[[clusterName]] == c,] deg.cluster <- deg.cluster[order(deg.cluster$Log2_FC, decreasing = TRUE),] if (dim(deg.cluster)[1] > topN) { deg.cluster <- deg.cluster[seq_len(topN),] } selected <- c(selected, deg.cluster$Gene) } } else { selected <- degFull$Gene } degFull <- degFull[degFull$Gene %in% selected,] if (!is.null(attr(degFull, "useReducedDim"))) { mat <- t(expData(inSCE, attr(degFull, "useReducedDim"))) assayList <- list(mat) names(assayList) <- attr(degFull, "useReducedDim") tmpSCE <- SingleCellExperiment::SingleCellExperiment(assays = assayList) SummarizedExperiment::colData(tmpSCE) <- SummarizedExperiment::colData(inSCE) assayName <- attr(degFull, "useReducedDim") } else { tmpSCE <- inSCE assayName <- attr(degFull, "useAssay") } inSCE <- tmpSCE[degFull$Gene,] z <- clusterVar if (is.factor(z)) { z.order <- levels(z) # When 'z' is a subset factor, there would be redundant levels. z.order <- z.order[z.order %in% as.vector(unique(z))] z <- factor(z, levels = z.order) SummarizedExperiment::rowData(inSCE)[[clusterName]] <- factor(degFull[[clusterName]], levels = z.order) } else { SummarizedExperiment::rowData(inSCE)[[clusterName]] <- degFull[[clusterName]] } if (!is.null(orderBy)) { if (length(orderBy) == 1 && orderBy == 'size') { z.order <- names(table(z)[order(table(z), decreasing = decreasing)]) } else if (length(orderBy) == 1 && orderBy == 'name') { if (is.factor(z)) { z.order <- sort(levels(z), decreasing = decreasing) } else { z.order <- sort(unique(z), decreasing = decreasing) } } else { z.order <- orderBy[-which(!orderBy %in% z)] } } SummarizedExperiment::colData(inSCE)[[clusterName]] <- factor(z, levels = z.order) y <- SummarizedExperiment::rowData(inSCE)[[clusterName]] SummarizedExperiment::rowData(inSCE)[["marker"]] <- factor(y, levels = z.order) #markerConsistColor <- # list(marker = dataAnnotationColor(inSCE, 'col')[[clusterName]]) # Organize plotSCEHeatmap arguments colDataName <- c(colDataName, clusterName) rowDataName <- c(rowDataName, "marker") #featureAnnotationColor <- c(featureAnnotationColor, markerConsistColor) hm <- plotSCEHeatmap(inSCE, useAssay = assayName, colDataName = colDataName, rowDataName = rowDataName, colSplitBy = colSplitBy, rowSplitBy = rowSplitBy, featureAnnotations = featureAnnotations, cellAnnotations = cellAnnotations, featureAnnotationColor = featureAnnotationColor, cellAnnotationColor = cellAnnotationColor, rowLabel = rowLabel, rowDend = rowDend, colDend = colDend, title = title, ...) return(hm) } #' @rdname plotFindMarkerHeatmap #' @export plotMarkerDiffExp <- function(inSCE, orderBy = 'size', log2fcThreshold = 1, fdrThreshold = 0.05, minClustExprPerc = 0.7, maxCtrlExprPerc = 0.4, minMeanExpr = 1, topN = 10, decreasing = TRUE, rowDataName = NULL, colDataName = NULL, featureAnnotations = NULL, cellAnnotations = NULL, featureAnnotationColor = NULL, cellAnnotationColor = NULL, colSplitBy = NULL, rowSplitBy = "marker", rowDend = FALSE, colDend = FALSE, title = "Top Marker Heatmap", ...) { .Deprecated("plotFindMarkerHeatmap") plotFindMarkerHeatmap(inSCE = inSCE, orderBy = orderBy, log2fcThreshold = log2fcThreshold, fdrThreshold = fdrThreshold, minClustExprPerc = minClustExprPerc, maxCtrlExprPerc = maxCtrlExprPerc, minMeanExpr = minMeanExpr, topN = topN, decreasing = decreasing, rowDataName = rowDataName, colDataName = colDataName, featureAnnotations = featureAnnotations, cellAnnotations = cellAnnotations, featureAnnotationColor = featureAnnotationColor, cellAnnotationColor = cellAnnotationColor, colSplitBy = colSplitBy, rowSplitBy = rowSplitBy, rowDend = rowDend, colDend = colDend, title = title, ...) }