Bioconductor Code: singleCellTK

R/plotFindMarkerHeatmap.R

c9178750	#' Plot a heatmap to visualize the result of \code{\link{runFindMarker}}
a9233cb6	#' @description This function will first reads the result saved in #' \code{metadata} slot, named by \code{"findMarker"} and generated by
b1877916	#' \code{\link{runFindMarker}}. Then it do the filtering on the statistics
a9233cb6	#' based on the input parameters and get unique genes to plot. We choose the #' genes that are identified as up-regulated only. As for the genes identified #' as up-regulated for multiple clusters, we only keep the belonging towards the #' one they have the highest Log2FC value. #' In the heatmap, there will always be a cell annotation for the cluster #' labeling used when finding the markers, and a feature annotation for which #' cluster each gene belongs to. And by default we split the heatmap by these #' two annotations. Additional legends can be added and the splitting can be #' canceled. #' @param inSCE \linkS4class{SingleCellExperiment} inherited object. #' @param log2fcThreshold Only use DEGs with the absolute values of log2FC #' larger than this value. Default \code{1} #' @param fdrThreshold Only use DEGs with FDR value smaller than this value. #' Default \code{0.05}
df03c634	#' @param minClustExprPerc A numeric scalar. The minimum cutoff of the #' percentage of cells in the cluster of interests that expressed the marker #' gene. Default \code{0.7}. #' @param maxCtrlExprPerc A numeric scalar. The maximum cutoff of the #' percentage of cells out of the cluster (control group) that expressed the #' marker gene. Default \code{0.4}. #' @param minMeanExpr A numeric scalar. The minimum cutoff of the mean #' expression value of the marker in the cluster of interests. Default \code{1}.
a9233cb6	#' @param topN An integer. Only to plot this number of top markers for each #' cluster in maximum, in terms of log2FC value. Use \code{NULL} to cancel the #' top N subscription. Default \code{10}. #' @param orderBy The ordering method of the clusters on the splitted heatmap. #' Can be chosen from \code{"size"} or \code{"name"}, specified with vector of #' ordered unique cluster labels, or set as \code{NULL} for unsplitted heatmap. #' Default \code{"size"}. #' @param decreasing Order the cluster decreasingly. Default \code{TRUE}.
ef608c1e	#' @param rowLabel \code{TRUE} for displaying \code{rownames} of \code{inSCE}, a #' \code{rowData} variable to use other feature identifiers, or a vector for #' customized row labels. Use \code{FALSE} for not displaying. Default #' \code{TRUE}.
a9233cb6	#' @param rowDataName character. The column name(s) in \code{rowData} that need #' to be added to the annotation. Default \code{NULL}. #' @param colDataName character. The column name(s) in \code{colData} that need #' to be added to the annotation. Default \code{NULL}. #' @param featureAnnotations \code{data.frame}, with \code{rownames} containing #' all the features going to be plotted. Character columns should be factors. #' Default \code{NULL}. #' @param cellAnnotations \code{data.frame}, with \code{rownames} containing #' all the cells going to be plotted. Character columns should be factors. #' Default \code{NULL}. #' @param featureAnnotationColor A named list. Customized color settings for #' feature labeling. Should match the entries in the \code{featureAnnotations} #' or \code{rowDataName}. For each entry, there should be a list/vector of #' colors named with categories. Default \code{NULL}. #' @param cellAnnotationColor A named list. Customized color settings for #' cell labeling. Should match the entries in the \code{cellAnnotations} or #' \code{colDataName}. For each entry, there should be a list/vector of colors #' named with categories. Default \code{NULL}. #' @param colSplitBy character vector. Do semi-heatmap based on the grouping of #' this(these) annotation(s). Should exist in either \code{colDataName} or #' \code{names(cellAnnotations)}. Default is the value of \code{cluster} in
c9178750	#' \code{\link{runFindMarker}} when \code{orderBy} is not \code{NULL}, or
a9233cb6	#' \code{NULL} otherwise. #' @param rowSplitBy character vector. Do semi-heatmap based on the grouping of #' this(these) annotation(s). Should exist in either \code{rowDataName} or #' \code{names(featureAnnotations)}. Default \code{"marker"}, which indicates an #' auto generated annotation for this plot.
f0f8cb90	#' @param rowDend Whether to display row dendrogram. Default \code{FALSE}. #' @param colDend Whether to display column dendrogram. Default \code{FALSE}.
cd268d2f	#' @param title Text of the title, at the top of the heatmap. Default #' \code{"Top Marker Heatmap"}.
a9233cb6	#' @param ... Other arguments passed to \code{\link{plotSCEHeatmap}}. #' @return A \code{\link[ComplexHeatmap]{Heatmap}} object
c9178750	#' @seealso \code{\link{runFindMarker}}, \code{\link{getFindMarkerTopTable}}
a9233cb6	#' @author Yichen Wang #' @export
930ec858	#' @examples #' data("sceBatches")
b1877916	#' logcounts(sceBatches) <- log1p(counts(sceBatches))
930ec858	#' sce.w <- subsetSCECols(sceBatches, colData = "batch == 'w'")
b1877916	#' sce.w <- runFindMarker(sce.w, method = "wilcox", cluster = "cell_type") #' plotFindMarkerHeatmap(sce.w) plotFindMarkerHeatmap <- function(inSCE, orderBy = 'size', log2fcThreshold = 1, fdrThreshold = 0.05, minClustExprPerc = 0.7, maxCtrlExprPerc = 0.4,
2599ca7f	minMeanExpr = 1, topN = 10, decreasing = TRUE,
84664fd7	rowLabel = TRUE,
b1877916	rowDataName = NULL, colDataName = NULL, featureAnnotations = NULL, cellAnnotations = NULL,
2599ca7f	featureAnnotationColor = NULL, cellAnnotationColor = NULL, colSplitBy = NULL,
b1877916	rowSplitBy = "marker", rowDend = FALSE,
2599ca7f	colDend = FALSE,
c9178750	title = "Top Marker Heatmap", ...) { if (!inherits(inSCE, 'SingleCellExperiment')) {
2599ca7f	stop('"inSCE" should be a SingleCellExperiment inherited Object.') }
c9178750	if (!'findMarker' %in% names(S4Vectors::metadata(inSCE))) {
2599ca7f	stop('"findMarker" result not found in metadata. ',
c9178750	'Run runFindMarker() before plotting.')
2599ca7f	}
b1877916	colSplitBy <-
2599ca7f	ifelse(is.null(orderBy), NULL, colnames(metadata(inSCE)$findMarker)[5])
b1877916
c9178750	if (!is.null(orderBy)) { if (length(orderBy) == 1) { if (!orderBy %in% c('size', 'name')) {
2599ca7f	stop('Single charater setting for "orderBy" should be chosen', 'from "size" or "name".')
a9233cb6	}
2599ca7f	}# else if(any(!SummarizedExperiment::colData(inSCE)[[cluster]] %in% # orderBy)){ # stop('Invalid "orderBy", please input a vector of unique ordered ', # 'cluster identifiers that match all clusters in ', # 'colData(inSCE) specified by "cluster" to adjust the order ', # 'of clusters.') #} } # Extract and basic filter degFull <- S4Vectors::metadata(inSCE)$findMarker
c9178750	if (!all(c("Gene", "Pvalue", "Log2_FC", "FDR") %in% colnames(degFull)[seq_len(4)])) {
2599ca7f	stop('"findMarker" result cannot be interpreted properly') }
84664fd7	clusterVar <- attr(degFull, "cluster") clusterName <- attr(degFull, "clusterName")
2599ca7f	#if(length(which(!degFull$Gene %in% rownames(inSCE))) > 0){ # Remove genes happen in deg table but not in sce. Weird. # degFull <- degFull[-which(!degFull$Gene %in% rownames(inSCE)),] #}
84664fd7	if (!is.null(log2fcThreshold)) {
2599ca7f	degFull <- degFull[degFull$Log2_FC > log2fcThreshold,] }
84664fd7	if (!is.null(fdrThreshold)) {
2599ca7f	degFull <- degFull[degFull$FDR < fdrThreshold,] } if (!is.null(minClustExprPerc)) { degFull <- degFull[degFull$clusterExprPerc >= minClustExprPerc,] } if (!is.null(maxCtrlExprPerc)) { degFull <- degFull[degFull$ControlExprPerc <= maxCtrlExprPerc,] } if (!is.null(minMeanExpr)) { degFull <- degFull[degFull$clusterAveExpr >= minMeanExpr,] } # Remove duplicate by assigning the duplicated genes to the cluster where # their log2FC is the highest. # Done by keeping all unique genes and the rows with highest Log2FC entry # for each duplicated gene. dup.gene <- unique(degFull$Gene[duplicated(degFull$Gene)])
c9178750	for (g in dup.gene) {
2599ca7f	deg.gix <- degFull$Gene == g deg.gtable <- degFull[deg.gix,] toKeep <- which.max(deg.gtable$Log2_FC) toRemove <- which(deg.gix)[-toKeep] degFull <- degFull[-toRemove,] } selected <- character() if (!is.null(topN)) { for (c in unique(degFull[[clusterName]])) { deg.cluster <- degFull[degFull[[clusterName]] == c,] deg.cluster <- deg.cluster[order(deg.cluster$Log2_FC, decreasing = TRUE),] if (dim(deg.cluster)[1] > topN) { deg.cluster <- deg.cluster[seq_len(topN),] } selected <- c(selected, deg.cluster$Gene)
a9233cb6	}
2599ca7f	} else { selected <- degFull$Gene } degFull <- degFull[degFull$Gene %in% selected,]
b1877916
2599ca7f	if (!is.null(attr(degFull, "useReducedDim"))) { mat <- t(expData(inSCE, attr(degFull, "useReducedDim"))) assayList <- list(mat) names(assayList) <- attr(degFull, "useReducedDim") tmpSCE <- SingleCellExperiment::SingleCellExperiment(assays = assayList)
b1877916	SummarizedExperiment::colData(tmpSCE) <-
2599ca7f	SummarizedExperiment::colData(inSCE) assayName <- attr(degFull, "useReducedDim")
b1877916
2599ca7f	} else { tmpSCE <- inSCE assayName <- attr(degFull, "useAssay")
b1877916
2599ca7f	}
b1877916
2599ca7f	inSCE <- tmpSCE[degFull$Gene,]
84664fd7	z <- clusterVar
c9178750	if (is.factor(z)) {
2599ca7f	z.order <- levels(z) # When 'z' is a subset factor, there would be redundant levels. z.order <- z.order[z.order %in% as.vector(unique(z))] z <- factor(z, levels = z.order) SummarizedExperiment::rowData(inSCE)[[clusterName]] <- factor(degFull[[clusterName]], levels = z.order) } else { SummarizedExperiment::rowData(inSCE)[[clusterName]] <- degFull[[clusterName]] }
c9178750	if (!is.null(orderBy)) { if (length(orderBy) == 1 && orderBy == 'size') {
2599ca7f	z.order <- names(table(z)[order(table(z), decreasing = decreasing)])
c9178750	} else if (length(orderBy) == 1 && orderBy == 'name') { if (is.factor(z)) {
2599ca7f	z.order <- sort(levels(z), decreasing = decreasing) } else { z.order <- sort(unique(z), decreasing = decreasing) }
a9233cb6	} else {
2599ca7f	z.order <- orderBy[-which(!orderBy %in% z)]
a9233cb6	}
2599ca7f	} SummarizedExperiment::colData(inSCE)[[clusterName]] <- factor(z, levels = z.order)
b1877916
2599ca7f	y <- SummarizedExperiment::rowData(inSCE)[[clusterName]]
b1877916	SummarizedExperiment::rowData(inSCE)[["marker"]] <-
2599ca7f	factor(y, levels = z.order)
84664fd7	#markerConsistColor <- # list(marker = dataAnnotationColor(inSCE, 'col')[[clusterName]])
b1877916
2599ca7f	# Organize plotSCEHeatmap arguments colDataName <- c(colDataName, clusterName) rowDataName <- c(rowDataName, "marker")
b1877916
84664fd7	#featureAnnotationColor <- c(featureAnnotationColor, markerConsistColor)
b1877916
ef608c1e	hm <- plotSCEHeatmap(inSCE, useAssay = assayName, colDataName = colDataName,
2599ca7f	rowDataName = rowDataName, colSplitBy = colSplitBy, rowSplitBy = rowSplitBy, featureAnnotations = featureAnnotations, cellAnnotations = cellAnnotations, featureAnnotationColor = featureAnnotationColor,
fb78e682	cellAnnotationColor = cellAnnotationColor, rowLabel = rowLabel,
2599ca7f	rowDend = rowDend, colDend = colDend, title = title, ...) return(hm)
a9233cb6	}
2599ca7f
b1877916	#' @rdname plotFindMarkerHeatmap #' @export
84664fd7	plotMarkerDiffExp <- function(inSCE, orderBy = 'size', log2fcThreshold = 1, fdrThreshold = 0.05, minClustExprPerc = 0.7, maxCtrlExprPerc = 0.4, minMeanExpr = 1, topN = 10, decreasing = TRUE, rowDataName = NULL, colDataName = NULL, featureAnnotations = NULL, cellAnnotations = NULL, featureAnnotationColor = NULL, cellAnnotationColor = NULL, colSplitBy = NULL, rowSplitBy = "marker", rowDend = FALSE, colDend = FALSE, title = "Top Marker Heatmap", ...) {
b1877916	.Deprecated("plotFindMarkerHeatmap")
84664fd7	plotFindMarkerHeatmap(inSCE = inSCE, orderBy = orderBy, log2fcThreshold = log2fcThreshold, fdrThreshold = fdrThreshold, minClustExprPerc = minClustExprPerc, maxCtrlExprPerc = maxCtrlExprPerc, minMeanExpr = minMeanExpr, topN = topN, decreasing = decreasing, rowDataName = rowDataName, colDataName = colDataName, featureAnnotations = featureAnnotations, cellAnnotations = cellAnnotations, featureAnnotationColor = featureAnnotationColor, cellAnnotationColor = cellAnnotationColor, colSplitBy = colSplitBy, rowSplitBy = rowSplitBy, rowDend = rowDend, colDend = colDend, title = title, ...)
b1877916	}