Bioconductor Code: pipeComp

Browse code

Merge pull request #39 from plger/devel

added overall plot function for scrna_pipeline

Pierre-Luc authored on 18/06/2020 13:47:55 • GitHub committed on 18/06/2020 13:47:55
Showing 4 changed files

DESCRIPTION index df739ab..0c4e25c 100644
NAMESPACE index 16c1996..eddd0b4 100644
R/scrna_plot.R index 9014bb7..4ba942b 100644
man/scrna_evalPlot_overall.Rd index 0000000..0fda41d

History View file @ ff087a8

@@ -1,7 +1,7 @@
                      Package: pipeComp
                      Type: Package
                      Title: pipeComp pipeline benchmarking framework
                     -Version: 0.99.40
                     +Version: 0.99.42
                      Depends: R (>= 4.0)
                      Authors@R: c(
                      	person("Pierre-Luc", "Germain", email="[email protected]", role=c("cre","aut"), comment = c(ORCID = "0000-0003-3418-4218")),

NAMESPACE

History View file @ ff087a8

@@ -25,6 +25,7 @@ export(readPipelineResults)
                      export(runPipeline)
                      export(scrna_describeDatasets)
                      export(scrna_evalPlot_filtering)
                     +export(scrna_evalPlot_overall)
                      export(scrna_evalPlot_silh)
                      export(scrna_pipeline)
                      exportClasses(PipelineDefinition)

R/scrna_plot.R

History View file @ ff087a8

@@ -314,3 +314,161 @@ scrna_describeDatasets <- function(sces, pt.size=0.3, ...){
+                     }
                     +#' scrna_evalPlot_overall
                     +#'
                     +#' Plots a multi-level summary heatmap of many analyses of the `scrna_pipeline`.
                     +#'
                     +#' @param res Aggregated pipeline results (i.e. the output of `runPipeline` or
                     +#' `aggregateResults`)
                     +#' @param agg.by The paramters by which to aggregate.
                     +#' @param width The width of individual heatmap bodies.
                     +#' @param datasets_as_columnNames Logical; whether dataset names should be
                     +#' printed below the columns (except for silhouette) rather than using a
                     +#' legend.
                     +#' @param heatmap_legend_param Passed to each calls to `Heatmap`
                     +#' @param ... Passed to each calls to `Heatmap`
                     +#'
                     +#' @return A HeatmapList
                     +#' @export
                     +#'
                     +#' @examples
                     +#' data("exampleResults")
                     +#' h <- scrna_evalPlot_overall(exampleResults)
                     +#' draw(h, heatmap_legend_side="bottom")
                     +scrna_evalPlot_overall <- function(res, agg.by=NULL, width=NULL,
                     +                              datasets_as_columnNames=TRUE,
                     +                              heatmap_legend_param=list(direction="horizontal",nrow=1,
                     +                                                        by_row=TRUE),
                     +                              ... ){
                     +  a <- arguments(metadata(res)$PipelineDefinition)
                     +  if(is.null(agg.by)){
                     +    agg.by <- c(unlist(a[-length(a)]),c("clustmethod", "dims"))
                     +    agg.by <- agg.by[sapply(agg.by, FUN=function(x)
                     +                              length(unique(res$evaluation$clustering[[x]]))>1)]
                     +  }
                     +  print(agg.by)
                     +  agg.by <- as.character(agg.by)
                     +  if(!all(agg.by %in% unlist(a)))
                     +    stop("`agg.by` should be a vector of pipeline parameters.")
+                    +
                     +  # dimred
                     +  sil <- res$evaluation$dimreduction$silhouette
                     +  if(is(sil,"list")) sil <- sil[[1]]
                     +  ll <- lapply(c("minSilWidth", "meanSilWidth"), FUN=function(x){
                     +    .prepRes( sil, what=x, agg.by=intersect(agg.by, colnames(sil)),
                     +              returnParams=TRUE, shortNames=TRUE,
                     +              pipDef=metadata(res)$PipelineDefinition )
                     +  })
                     +  pp1 <- ll[[1]]$pp
                     +  pp1$method <- NULL
                     +  ll1 <- lapply(ll, FUN=function(x) x$res)
+                    +
                     +  # clustering
                     +  ll <- lapply(c("ARI", "MI"), FUN=function(x){
                     +    .prepRes( res$evaluation$clustering, what=x, returnParams=TRUE,
                     +              agg.by=agg.by, shortNames=TRUE,
                     +              pipDef=metadata(res)$PipelineDefinition )
                     +  })
                     +  ll[[3]] <- .prepRes(res$evaluation$clustering, what="ARI", returnParams=TRUE,
                     +                      agg.by=agg.by, filt=expr(true.nbClusts==n_clus),
                     +                      shortNames=TRUE, pipDef=metadata(res)$PipelineDefinition)
                     +  pp <- ll[[1]]$pp
                     +  pp$method <- NULL
                     +  ll2 <- lapply(ll, FUN=function(x){
                     +    x <- as.data.frame(x$res)
                     +    x <- as.matrix(colCenterScale(x[row.names(pp),]))
                     +    row.names(x) <- row.names(pp)
                     +    x
                     +  })
+                    +
                     +  # merge dimred and clust results
                     +  tmp <- as.character(apply( pp[,colnames(pp1),drop=FALSE], 1,
                     +                             collapse=" > ",FUN=paste ))
                     +  ll1 <- lapply(ll1, FUN=function(x){
                     +    x <- x[tmp,]
                     +    row.names(x) <- row.names(ll2[[1]])
                     +    x
                     +  })
                     +  ll <- c(ll1,ll2)
+                    +
                     +  # get max % lost
                     +  pclost <- scrna_evalPlot_filtering(res, returnTable=TRUE)
                     +  filt.agg.by <- intersect(agg.by,unlist(a[1:2]))
                     +  pclost <- aggregate( pclost[,"max.lost",drop=FALSE],
                     +                        pclost[,c(filt.agg.by,"dataset"),drop=FALSE], FUN=mean)
                     +  if(length(filt.agg.by)>0){
                     +    f <- as.formula(paste(paste(filt.agg.by,collapse="+"),"~dataset"))
                     +    pclost <- reshape2::dcast(pclost, f, value.var="max.lost")
                     +    row.names(pclost) <- apply( pclost[,seq_along(filt.agg.by),drop=FALSE], 1,
                     +                                collapse=" > ",FUN=paste )
                     +    tmp <- as.character(apply( pp[,filt.agg.by,drop=FALSE], 1, collapse=" > ",
                     +                               FUN=paste ))
                     +    pclost <- pclost[tmp,setdiff(colnames(pclost), filt.agg.by),]
                     +    row.names(pclost) <- row.names(ll2[[1]])
                     +  }else{
                     +    pclost <- matrix(pclost$max.lost, nrow=nrow(ll2[[1]]), ncol=nrow(pclost),
                     +                     byrow=TRUE,
                     +                     dimnames=list(row.names(ll2[[1]])), pclost$dataset)
                     +  }
                     +  pclost <- apply(pclost,1,FUN=max)
+                    +
                     +  ll2 <- list( list(mat=ll[[1]], title="min silhouette\nwidth",
                     +                    cluster_columns=TRUE, name="silhouette width"),
                     +               list(mat=ll[[2]], title="mean silhouette\nwidth",
                     +                    cluster_columns=TRUE, show_heatmap_legend=FALSE),
                     +               list(mat=ll[[3]], title="mean ARI", name="ARI (MADs)",
                     +                    cluster_columns=FALSE),
                     +               list(mat=ll[[4]], title="mean MI", name="MI (MADs)",
                     +                    cluster_columns=FALSE),
                     +               list(mat=ll[[5]], title="mean ARI at\ntrue k",
                     +                    name="ARI at true k (MADs)", cluster_columns=FALSE)
                     +               )
+                    +
                     +  if("doubletmethod" %in% colnames(pp))
                     +    pp$doubletmethod <- gsub("^doublet\\.","",pp$doubletmethod)
                     +  if("clustmethod" %in% colnames(pp))
                     +    pp$clustmethod <- gsub("^clust\\.","",pp$doubletmethod)
                     +  for(f in c("filt","sel","norm")){
                     +    if(f %in% colnames(pp)) pp[[f]] <- gsub(paste0("^",f,"\\."),"",pp[[f]])
                     +  }
+                    +
                     +  ha <- HeatmapAnnotation(which="row", "max\n% lost"=anno_barplot(
                     +    pclost, bar_width=1, border=FALSE, gp=gpar(fill="#282828", col="#282828"),
                     +    width=unit(1.5,"cm")), df=pp, annotation_legend_param=list("side"="right"))
+                    +
                     +  h <- hclust(dist(do.call(cbind, ll)))
                     +  silhscale <- .silScale(cbind(ll2[[1]]$mat, ll2[[2]]$mat))
                     +  H <- NULL
                     +  for(i in seq_along(ll2)){
                     +    if(i==1){
                     +      hi <- h
                     +    }else{
                     +      hi <- FALSE
                     +    }
                     +    if(grepl("silhouette", ll2[[i]]$title)){
                     +      col <- silhscale
                     +      scn <- FALSE
                     +    }else{
                     +      col <- .defaultColorMapping(ll2[[i]]$mat, center=TRUE)
                     +      scn <- datasets_as_columnNames
                     +    }
                     +    la <- ra <- NULL
                     +    if(i==length(ll2)) ra <- ha
                     +    ba <- .ds_anno(colnames(ll2[[i]]$mat),
                     +                   legend=(!datasets_as_columnNames && i==1))
                     +    name <- ifelse(is.null(ll2[[i]]$name),ll2[[i]]$title,ll2[[i]]$name)
                     +    wi <- ifelse(is.null(width), unit(ifelse(i<=2,3.2,2.5), "cm"), width)
                     +    H <- H + Heatmap( ll2[[i]]$mat, name=name, cluster_rows=hi,
                     +                      show_row_names=FALSE, show_column_names=scn,
                     +                      heatmap_legend_param=heatmap_legend_param,
                     +                      column_title=ll2[[i]]$title, col=col, width=wi,
                     +                      cluster_columns=ll2[[i]]$cluster_columns,
                     +                      show_column_dend=FALSE, bottom_annotation=ba,
                     +                      left_annotation=la, right_annotation=ra,
                     +                      column_names_gp = gpar(fontsize=10),
                     +                      show_heatmap_legend=ifelse(
                     +                        is.null(ll2[[i]]$show_heatmap_legend),TRUE,
                     +                        ll2[[i]]$show_heatmap_legend), ... )
                     +  }
                     +  H
                     +}
                     \ No newline at end of file

man/scrna_evalPlot_overall.Rd

History View file @ ff087a8

                     new file mode 100644
@@ -0,0 +1,42 @@
                     +% Generated by roxygen2: do not edit by hand
                     +% Please edit documentation in R/scrna_plot.R
                     +\name{scrna_evalPlot_overall}
                     +\alias{scrna_evalPlot_overall}
                     +\title{scrna_evalPlot_overall}
                     +\usage{
                     +scrna_evalPlot_overall(
                     +  res,
                     +  agg.by = NULL,
                     +  width = NULL,
                     +  datasets_as_columnNames = TRUE,
                     +  heatmap_legend_param = list(direction = "horizontal", nrow = 1, by_row = TRUE),
                     +  ...
                     +)
                     +}
                     +\arguments{
                     +\item{res}{Aggregated pipeline results (i.e. the output of `runPipeline` or
                     +`aggregateResults`)}
+                    +
                     +\item{agg.by}{The paramters by which to aggregate.}
+                    +
                     +\item{width}{The width of individual heatmap bodies.}
+                    +
                     +\item{datasets_as_columnNames}{Logical; whether dataset names should be
                     +printed below the columns (except for silhouette) rather than using a
                     +legend.}
+                    +
                     +\item{heatmap_legend_param}{Passed to each calls to `Heatmap`}
+                    +
                     +\item{...}{Passed to each calls to `Heatmap`}
                     +}
                     +\value{
                     +A HeatmapList
                     +}
                     +\description{
                     +Plots a multi-level summary heatmap of many analyses of the `scrna_pipeline`.
                     +}
                     +\examples{
                     +data("exampleResults")
                     +h <- scrna_evalPlot_overall(exampleResults)
                     +draw(h, heatmap_legend_side="bottom")
                     +}