#' scrna_evalPlot_silh #' #' Plot a min/max/mean/median silhouette width heatmap from aggregated #' evaluation results of the `scrna_pipeline`. #' #' @param res Aggregated pipeline results (i.e. the output of `runPipeline` or #' `aggregateResults`) #' @param what What metric to plot, possible values are “minSilWidth”, #' “meanSilWidth” (default), “medianSilWidth”, or “maxSilWidth”. #' @param step Name of the step for which to plot the evaluation results. #' Defaults to "dimreduction". #' @param dims If multiple sets of dimensions are available, which one to use #' (defaults to the first). #' @param agg.by Aggregate results by these columns (default no aggregation) #' @param agg.fn Function for aggregation (default mean) #' @param filterExpr An optional filtering expression based on the columns of #' the target dataframe, (e.g. `filterExpr=param1=="value1"`). #' @param value_format Format for displaying cells' values (no label by default) #' @param reorder_rows Whether to sort rows (default FALSE). The row #' names themselves can also be passed to specify an order, or a #' `ComplexHeatmap`. #' @param reorder_columns Whether to sort columns (default TRUE). #' @param row_split Optional column (included in `agg.by`) by which to split #' the rows. Alternatively, an expression using the columns (retained after #' aggregation) can be passed. #' @param show_heatmap_legend Passed to `Heatmap` (default FALSE) #' @param show_column_names Passed to `Heatmap` (default FALSE) #' @param col Colors for the heatmap #' @param font_factor A scaling factor applied to fontsizes (default 1) #' @param value_cols A vector of length 2 indicating the colors of the values #' (above and below the mean), if printed #' @param title Plot title #' @param shortNames Logical; whether to use short row names (with only #' the parameter values instead of the parameter name and value pairs), default #' TRUE. #' @param anno_legend Logical; whether to plot the legend for the datasets #' @param ... Passed to `Heatmap` #' #' @return A Heatmap #' @export #' @import ComplexHeatmap #' @examples #' data("exampleResults", package="pipeComp") #' scrna_evalPlot_silh( exampleResults, agg.by=c("filt","norm"), #' row_split="norm" ) scrna_evalPlot_silh <- function( res, what=c("minSilWidth","meanSilWidth"), step="dimreduction", dims=1, agg.by=NULL, agg.fn=mean, filterExpr=NULL, value_format="", reorder_rows=FALSE, reorder_columns=TRUE, show_heatmap_legend=TRUE, show_column_names=FALSE, col=rev(RColorBrewer::brewer.pal(n=11,"RdBu")), font_factor=0.9, row_split=NULL, shortNames=TRUE, value_cols=c("white","black"), title=NULL, anno_legend=TRUE, ...){ pd <- NULL if(is(res,"SimpleList")) pd <- metadata(res)$PipelineDefinition if(is.null(pd)) stop("Could not find the PipelineDefinition.") if(length(what)>1){ fcall <- match.call() H <- NULL for(i in what){ fcall$what <- i H <- H + eval(fcall) } return(H) } if(is(res,"SimpleList") && "evaluation" %in% names(res)) res <- res$evaluation if(is(res,"list") && step %in% names(res)) res <- res[[step]] if(is(res,"list") && "silhouette" %in% names(res)) res <- res[["silhouette"]] res <- res[[dims]] what_options <- setdiff( colnames(res), c("dataset","subpopulation",unlist(arguments(pd))) ) what <- match.arg(what, choices=what_options) res <- .prepRes(res, what=what, agg.by=agg.by, pipDef=pd, returnParams=TRUE, filt=substitute(filterExpr), shortNames=shortNames) pp <- res$pp res <- as.matrix(res$res) ro <- .dosort(res, reorder_rows) if(reorder_columns) res <- res[,order(colMeans(res), decreasing=TRUE),drop=FALSE] cellfn <- .getCellFn(res, res, value_format, value_cols, font_factor) if(is.null(title)) title <- gsub("\\.","\n",what) suppressWarnings({ if(!tryCatch(is.null(row_split), error=function(e) FALSE)){ if(tryCatch(is.character(row_split), error=function(e) FALSE)){ if(row_split %in% colnames(pp)){ row_split <- pp[,row_split] }else{ warning("`row_split` wasn't found and will be ignored.") row_split <- NULL } }else{ row_split <- eval(substitute(row_split), envir=pp) } } }) col <- .silScale(res, col) Heatmap( res, name=what, cluster_rows=FALSE, cluster_columns=FALSE, show_heatmap_legend=show_heatmap_legend, row_order=ro, bottom_annotation=.ds_anno(colnames(res),anno_legend,font_factor), show_column_names=show_column_names, cell_fun=cellfn, col=col, column_title=title, row_split=row_split, row_title_gp = gpar(fontsize = (14*font_factor)), column_title_gp = gpar(fontsize = (14*font_factor)), row_names_gp = gpar(fontsize = (12*font_factor)), column_names_gp = gpar(fontsize = (12*font_factor)), ...) } .getClustAggFields <- function(res, pipDef=NULL){ if(is.null(pipDef)) pipDef <- metadata(res)$PipelineDefinition if("evaluation" %in% names(res)) res <- res$evaluation if("clustering" %in% names(res)) res <- res$clustering fields <- unlist(arguments(pipDef)) fields <- setdiff(fields, c("k", "dims", "steps","resolution","min.size")) fields <- intersect(colnames(res), fields) fields[vapply( res[,fields,drop=FALSE], FUN=function(x) length(unique(x))>1, logical(1) )] } .mergeFilterOut <- function(ll){ ll <- lapply(ll, FUN=function(x){ if(any(is.na(x$N.lost))){ w <- which(is.na(x$N.lost)) x$N.lost[w] <- x$N.before[w] x$pc.lost[w] <- 100 } x }) if(length(ll)==1){ ll[[1]]$N <- ll[[1]]$N.before return(ll[[1]]) } # get back the initial number of cells mm <- ll[[1]] for(i in seq_len(length(ll)-1)){ f <- setdiff(colnames(ll[[i]]), c("N.before","N.lost","pc.lost")) suf <- paste0(".",names(ll)[i+0:1]) if(i>1) suf[1] <- "" mm <- merge(mm, ll[[i+1]], by=f, suffixes=suf) } mm$N <- mm[,grep("N\\.before",colnames(mm))[1]] mm$N.lost <- rowSums(mm[,grep("N\\.lost",colnames(mm))]) mm$pc.lost <- 100*mm$N.lost/mm$N mm } #' scrna_evalPlot_filtering #' #' @param res Aggregated pipeline results (i.e. the output of `runPipeline` or #' `aggregateResults`) #' @param steps Steps to include (default 'doublet' and 'filtering'); other #' steps will be averaged. #' @param clustMetric Clustering accuracy metric to use (default `mean_F1``) #' @param filterExpr An optional filtering expression based on the columns of #' the clustering evaluation (e.g. `filterExpr=param1=="value1"` or #' `filterExpr=n_clus==true.nbClusts`). #' @param atNearestK Logical; whether to restrict analyses to those giving the #' smallest deviation from the real number of clusters (default FALSE). #' @param returnTable Logical; whether to return the data rather than plot. #' @param point.size Size of the points #' @param ... passed to `geom_point` #' #' @return A ggplot, or a data.frame if `returnTable=TRUE` #' @importFrom stats median #' @import ggplot2 #' @export #' @examples #' data("exampleResults", package="pipeComp") #' scrna_evalPlot_filtering(exampleResults) scrna_evalPlot_filtering <- function(res, steps=c("doublet","filtering"), clustMetric="mean_F1", filterExpr=TRUE, atNearestK=FALSE, returnTable=FALSE, point.size=2.2, ...){ param_fields <- tryCatch( unlist(arguments(metadata(res)$PipelineDefinition)[steps]), error=function(e) unlist(arguments(scrna_pipeline())[steps]) ) res <- res$evaluation co <- .mergeFilterOut(res[steps]) coI <- co[,c("dataset",param_fields)] ci <- split(seq_len(nrow(co)), coI, drop=TRUE) agfns <- list(min.lost=min, mean.lost=mean, median.lost=median, max.lost=max) x <- as.data.frame(do.call(cbind, lapply( agfns, FUN=function(agf){ vapply( ci, function(x) agf(co[x,"pc.lost"]), numeric(1) ) }))) x$total.lost <- vapply(ci, FUN=function(x) sum(co[x,"N.lost"]), numeric(1)) x$pc.lost <- vapply( ci, FUN.VALUE=numeric(1), FUN=function(x) 100*sum(co[x,"N.lost"])/sum(co[x,"N"]) ) x <- cbind(coI[vapply(ci, FUN=function(x) x[1], integer(1)),], x) # get clustering data cl <- res$clustering[eval(substitute(filterExpr), res$clustering),] if(atNearestK){ cl$absdiff <- abs(cl$n_clus-cl$true.nbClusts) cl <- do.call(rbind, lapply(split(cl, cl$dataset), FUN=function(x){ cl[cl$absdiff==min(cl$absdiff),] })) } cl <- aggregate( cl[,clustMetric,drop=FALSE], by=cl[,c("dataset",param_fields)], FUN=mean ) m <- merge(cl, x, by=c("dataset",param_fields)) m$method <- apply( m[,param_fields,drop=FALSE], 1, collapse="+", FUN=paste ) if(returnTable) return(m) if( length(param_fields)==2 && all(sort(param_fields)==c("doubletmethod","filt")) ){ return(ggplot(m, aes(max.lost, mean_F1, colour=filt, shape=doubletmethod))+ geom_point(size=point.size, ...) + facet_wrap(~dataset, scales="free") + xlab("Max proportion of subpopulation excluded") + labs(colour="filter set", shape="doublet removal")) } ggplot(m, aes(max.lost, mean_F1, colour=method, shape=method)) + geom_point(size=point.size, ...) + facet_wrap(~dataset, scales="free") + xlab("Max proportion of subpopulation excluded") } #' scrna_describeDatasets #' #' Plots descriptive information about the datasets #' #' @param sces A character vector of paths to SCE rds files, or a list of SCEs #' @param pt.size Point size (for reduced dims) #' @param ... Passed to geom_point() #' #' @return A plot_grid output #' #' @export #' @import SingleCellExperiment scran scater ggplot2 scales #' @importFrom SummarizedExperiment colData colData<- #' @importFrom cowplot plot_grid #' @importFrom RColorBrewer brewer.pal #' @importFrom Rtsne Rtsne #' @importFrom uwot umap scrna_describeDatasets <- function(sces, pt.size=0.3, ...){ if(is.null(names(sces))) names(sces) <- paste0("dataset",seq_along(sces)) if(is.character(sces)){ sces <- lapply(sces, FUN=function(x){ sce <- readRDS(x) sce <- logNormCounts(sce) var.stats <- modelGeneVar(sce) sce <- denoisePCA(sce, technical=var.stats) reducedDim(sce, "tSNE") <- Rtsne(reducedDim(sce))$Y reducedDim(sce, "umap") <- umap(reducedDim(sce)) sce }) } tt <- lapply(sces, FUN=function(x) table(x$phenoid)) cols <- lapply(tt, FUN=function(x){ y <- getQualitativePalette(length(x)) names(y) <- names(x) y }) for(i in seq_along(sces)) sces[[i]]$cluster <- cols[[i]][as.character(colData(sces[[i]])$phenoid)] cd <- lapply(sces, FUN=function(x) as.data.frame(colData(x))) names(cols2) <- cols2 <- unique(unlist(cols)) noy <- theme( axis.line=element_blank(),axis.text.y=element_blank(), axis.ticks.y=element_blank(), axis.title.y=element_blank(), strip.background = element_blank(), legend.position = "none", aspect.ratio = 1, axis.text=element_text(size=10), plot.margin=margin(l=0,r=0)) cs <- scale_fill_manual(values=cols2) d <- data.frame( dataset=rep(names(sces), vapply(tt, length, integer(1))), cluster=unlist(cols), nb=unlist(tt) ) p1 <- ggplot(d, aes(x=cluster, y=nb, fill=cluster)) + geom_bar(stat = "identity") + scale_y_log10() + facet_wrap(~dataset, scales="free_y", ncol=1, strip.position = "left") + coord_flip() + ylab("Number of cells") + xlab("") + noy + cs for(x in names(sces)) colData(sces[[x]])$cluster <- unlist(cols)[paste(x, colData(sces[[x]])$phenoid,sep=".")] d <- suppressWarnings(dplyr::bind_rows(lapply(cd, FUN=function(x) x[,c("total_counts","total_features","cluster")])) ) d$dataset <- rep(names(cd), vapply(cd, nrow, integer(1))) pf <- function(d, x) ggplot(d, aes_string(x="cluster", y=x, fill="cluster"))+ geom_violin() + xlab("") + coord_flip() + noy + cs + facet_wrap(~dataset, scales="free_y", ncol=1) + theme( strip.text.x = element_blank(), panel.grid.major.x=element_line(color="grey", linetype="dashed")) + scale_y_continuous( trans = 'log10', breaks=10^pretty(log10(d[[x]]),n=3), labels=trans_format('log10', math_format(10^.x)) ) rd <- function(dimred,...){ d2 <- cbind(d, do.call(rbind, lapply(sces, FUN=function(x){ reducedDim(x,dimred) }))) colnames(d2)[5:6] <- c("x","y") ggplot(d2, aes(x,y,col=cluster)) + geom_point(...) + facet_wrap(~dataset, scales="free", ncol=1) + scale_color_manual(values=cols2) + xlab(paste0("\n",dimred)) + theme(axis.line=element_blank(), axis.text=element_blank(), axis.ticks=element_blank(), axis.title.y=element_blank(), strip.background = element_blank(), strip.text.x=element_blank(), legend.position = "none", aspect.ratio = 1 ) } p5 <- rd("umap", size=pt.size, ...) plot_grid( p1, pf(d,"total_counts"), pf(d,"total_features"), rd("tSNE", size=pt.size, ...), rd("umap", size=pt.size, ...), nrow=1 ) } #' @importFrom circlize colorRamp2 .silScale <- function(x, cols=NULL){ if(is.null(cols)) cols <- rev(brewer.pal(n=11,"RdBu")) if(is.function(cols)) cols <- cols(11) if(length(cols)!=11) stop("`cols` should contain 11 colors.") bb <- c( -seq(from=sqrt(abs(min(x, na.rm=TRUE))), to=0, length.out=6)^2, seq(from=0, to=sqrt(max(x, na.rm=TRUE)), length.out=6)[-1]^2 ) colorRamp2(bb, cols) } #' scrna_evalPlot_overall #' #' Plots a multi-level summary heatmap of many analyses of the `scrna_pipeline`. #' #' @param res Aggregated pipeline results (i.e. the output of `runPipeline` or #' `aggregateResults`) #' @param agg.by The paramters by which to aggregate. #' @param width The width of individual heatmap bodies. #' @param datasets_as_columnNames Logical; whether dataset names should be #' printed below the columns (except for silhouette) rather than using a #' legend. #' @param rowAnnoColors Optional list of colors for the row annotation variables #' (passed to `HeatmapAnnotation(col=...)`) #' @param column_names_gp Passed to each calls to `Heatmap` #' @param column_title_gp Passed to each calls to `Heatmap` #' @param heatmap_legend_param Passed to each calls to `Heatmap` #' @param ... Passed to each calls to `Heatmap` #' #' @return A HeatmapList #' @importFrom stats hclust #' @export #' #' @examples #' library(ComplexHeatmap) #' data("exampleResults") #' h <- scrna_evalPlot_overall(exampleResults) #' draw(h, heatmap_legend_side="bottom") scrna_evalPlot_overall <- function(res, agg.by=NULL, width=NULL, datasets_as_columnNames=TRUE, rowAnnoColors=NULL, column_names_gp=gpar(fontsize=10), column_title_gp=gpar(fontsize=12), heatmap_legend_param=list( by_row=TRUE, direction="horizontal", nrow=1), ... ){ a <- arguments(metadata(res)$PipelineDefinition) if(is.null(agg.by)){ agg.by <- c(unlist(a[-length(a)]),c("clustmethod", "dims")) agg.by <- agg.by[sapply(agg.by, FUN=function(x) length(unique(res$evaluation$clustering[[x]]))>1)] } agg.by <- as.character(agg.by) if(!all(agg.by %in% unlist(a))) stop("`agg.by` should be a vector of pipeline parameters.") # dimred sil <- res$evaluation$dimreduction$silhouette if(is(sil,"list")) sil <- sil[[1]] ll <- lapply(c("minSilWidth", "meanSilWidth"), FUN=function(x){ .prepRes( sil, what=x, agg.by=intersect(agg.by, colnames(sil)), returnParams=TRUE, shortNames=TRUE, pipDef=metadata(res)$PipelineDefinition ) }) pp1 <- ll[[1]]$pp pp1$method <- NULL ll1 <- lapply(ll, FUN=function(x) x$res) # clustering ll <- lapply(c("ARI", "MI"), FUN=function(x){ .prepRes( res$evaluation$clustering, what=x, returnParams=TRUE, agg.by=agg.by, shortNames=TRUE, pipDef=metadata(res)$PipelineDefinition ) }) ll[[3]] <- .prepRes(res$evaluation$clustering, what="ARI", returnParams=TRUE, agg.by=agg.by, filt=expr(true.nbClusts==n_clus), shortNames=TRUE, pipDef=metadata(res)$PipelineDefinition) pp <- ll[[1]]$pp pp$method <- NULL ll2 <- lapply(ll, FUN=function(x){ x <- as.data.frame(x$res) x <- as.matrix(colCenterScale(x[row.names(pp),])) row.names(x) <- row.names(pp) x }) # merge dimred and clust results tmp <- as.character(apply( pp[,colnames(pp1),drop=FALSE], 1, collapse=" > ",FUN=paste )) ll1 <- lapply(ll1, FUN=function(x){ x <- x[tmp,] row.names(x) <- row.names(ll2[[1]]) x }) ll <- c(ll1,ll2) # get max % lost pclost <- scrna_evalPlot_filtering(res, returnTable=TRUE) filt.agg.by <- intersect(agg.by,unlist(a[1:2])) pclost <- aggregate( pclost[,"max.lost",drop=FALSE], pclost[,c(filt.agg.by,"dataset"),drop=FALSE], FUN=mean) if(length(filt.agg.by)>0){ f <- as.formula(paste(paste(filt.agg.by,collapse="+"),"~dataset")) pclost <- reshape2::dcast(pclost, f, value.var="max.lost") row.names(pclost) <- apply( pclost[,seq_along(filt.agg.by),drop=FALSE], 1, collapse=" > ",FUN=paste ) tmp <- as.character(apply( pp[,filt.agg.by,drop=FALSE], 1, collapse=" > ", FUN=paste )) pclost <- pclost[tmp,setdiff(colnames(pclost), filt.agg.by),] row.names(pclost) <- row.names(ll2[[1]]) }else{ pclost <- matrix(pclost$max.lost, nrow=nrow(ll2[[1]]), ncol=nrow(pclost), byrow=TRUE, dimnames=list(row.names(ll2[[1]]), pclost$dataset)) } pclost <- apply(pclost,1,FUN=max) ll2 <- list( list(mat=ll[[1]], title="min silh.\nwidth", cluster_columns=TRUE, name="silhouette width"), list(mat=ll[[2]], title="mean silh.\nwidth", cluster_columns=TRUE, show_heatmap_legend=FALSE), list(mat=ll[[3]], title="mean ARI", name="ARI (MADs)", cluster_columns=FALSE), list(mat=ll[[4]], title="mean MI", name="MI (MADs)", cluster_columns=FALSE), list(mat=ll[[5]], title="mean ARI\nat true k", name="ARI at true k (MADs)", cluster_columns=FALSE) ) if("doubletmethod" %in% colnames(pp)) pp$doubletmethod <- gsub("^doublet\\.","",pp$doubletmethod) if("clustmethod" %in% colnames(pp)) pp$clustmethod <- gsub("^clust\\.","",pp$doubletmethod) for(f in c("filt","sel","norm")){ if(f %in% colnames(pp)) pp[[f]] <- gsub(paste0("^",f,"\\."),"",pp[[f]]) } if(is.null(rowAnnoColors)){ ha <- HeatmapAnnotation( which="row", "max\n% lost"=anno_barplot( pclost, bar_width=0.85, border=FALSE, width=unit(1.5,"cm"), gp=gpar(fill="#282828", col="#282828") ), df=pp, annotation_legend_param=list("side"="right") ) } else { ha <- HeatmapAnnotation( which="row", col=rowAnnoColors, "max\n% lost"=anno_barplot( pclost, bar_width=0.85, border=FALSE, width=unit(1.5,"cm"), gp=gpar(fill="#282828", col="#282828") ), df=pp, annotation_legend_param=list("side"="right") ) } h <- hclust(dist(do.call(cbind, ll))) silhscale <- .silScale(cbind(ll2[[1]]$mat, ll2[[2]]$mat)) H <- NULL for(i in seq_along(ll2)){ if(i==1){ hi <- h }else{ hi <- FALSE } if(grepl("silh", ll2[[i]]$title)){ col <- silhscale scn <- FALSE }else{ col <- .defaultColorMapping(ll2[[i]]$mat, center=TRUE) scn <- datasets_as_columnNames } la <- ra <- NULL if(i==length(ll2)) ra <- ha ba <- .ds_anno(colnames(ll2[[i]]$mat), legend=(!datasets_as_columnNames && i==1)) name <- ifelse(is.null(ll2[[i]]$name),ll2[[i]]$title,ll2[[i]]$name) wi <- ifelse(is.null(width), unit(ifelse(i<=2,3.2,2.5), "cm"), width) H <- H + Heatmap( ll2[[i]]$mat, name=name, cluster_rows=hi, show_row_names=FALSE, show_column_names=scn, heatmap_legend_param=heatmap_legend_param, column_title=ll2[[i]]$title, col=col, width=wi, cluster_columns=ll2[[i]]$cluster_columns, show_column_dend=FALSE, bottom_annotation=ba, left_annotation=la, right_annotation=ra, column_names_gp=column_names_gp, column_title_gp=column_title_gp, show_heatmap_legend=ifelse( is.null(ll2[[i]]$show_heatmap_legend),TRUE, ll2[[i]]$show_heatmap_legend), ... ) } H }