Browse code

Merge pull request #39 from plger/devel

added overall plot function for scrna_pipeline

Pierre-Luc authored on 18/06/2020 13:47:55 • GitHub committed on 18/06/2020 13:47:55
Showing 4 changed files

... ...
@@ -1,7 +1,7 @@
1 1
 Package: pipeComp
2 2
 Type: Package
3 3
 Title: pipeComp pipeline benchmarking framework
4
-Version: 0.99.40
4
+Version: 0.99.42
5 5
 Depends: R (>= 4.0)
6 6
 Authors@R: c(
7 7
 	person("Pierre-Luc", "Germain", email="[email protected]", role=c("cre","aut"), comment = c(ORCID = "0000-0003-3418-4218")), 
... ...
@@ -25,6 +25,7 @@ export(readPipelineResults)
25 25
 export(runPipeline)
26 26
 export(scrna_describeDatasets)
27 27
 export(scrna_evalPlot_filtering)
28
+export(scrna_evalPlot_overall)
28 29
 export(scrna_evalPlot_silh)
29 30
 export(scrna_pipeline)
30 31
 exportClasses(PipelineDefinition)
... ...
@@ -314,3 +314,161 @@ scrna_describeDatasets <- function(sces, pt.size=0.3, ...){
314 314
 }
315 315
 
316 316
 
317
+#' scrna_evalPlot_overall
318
+#' 
319
+#' Plots a multi-level summary heatmap of many analyses of the `scrna_pipeline`.
320
+#'
321
+#' @param res Aggregated pipeline results (i.e. the output of `runPipeline` or
322
+#' `aggregateResults`)
323
+#' @param agg.by The paramters by which to aggregate.
324
+#' @param width The width of individual heatmap bodies.
325
+#' @param datasets_as_columnNames Logical; whether dataset names should be 
326
+#' printed below the columns (except for silhouette) rather than using a
327
+#' legend.
328
+#' @param heatmap_legend_param Passed to each calls to `Heatmap`
329
+#' @param ... Passed to each calls to `Heatmap`
330
+#'
331
+#' @return A HeatmapList
332
+#' @export
333
+#'
334
+#' @examples
335
+#' data("exampleResults")
336
+#' h <- scrna_evalPlot_overall(exampleResults)
337
+#' draw(h, heatmap_legend_side="bottom")
338
+scrna_evalPlot_overall <- function(res, agg.by=NULL, width=NULL, 
339
+                              datasets_as_columnNames=TRUE,
340
+                              heatmap_legend_param=list(direction="horizontal",nrow=1,
341
+                                                        by_row=TRUE), 
342
+                              ... ){
343
+  a <- arguments(metadata(res)$PipelineDefinition)
344
+  if(is.null(agg.by)){
345
+    agg.by <- c(unlist(a[-length(a)]),c("clustmethod", "dims"))
346
+    agg.by <- agg.by[sapply(agg.by, FUN=function(x) 
347
+                              length(unique(res$evaluation$clustering[[x]]))>1)]
348
+  }
349
+  print(agg.by)
350
+  agg.by <- as.character(agg.by)
351
+  if(!all(agg.by %in% unlist(a)))
352
+    stop("`agg.by` should be a vector of pipeline parameters.")
353
+  
354
+  # dimred
355
+  sil <- res$evaluation$dimreduction$silhouette
356
+  if(is(sil,"list")) sil <- sil[[1]]
357
+  ll <- lapply(c("minSilWidth", "meanSilWidth"), FUN=function(x){
358
+    .prepRes( sil, what=x, agg.by=intersect(agg.by, colnames(sil)), 
359
+              returnParams=TRUE, shortNames=TRUE, 
360
+              pipDef=metadata(res)$PipelineDefinition )
361
+  })
362
+  pp1 <- ll[[1]]$pp
363
+  pp1$method <- NULL
364
+  ll1 <- lapply(ll, FUN=function(x) x$res)
365
+
366
+  # clustering
367
+  ll <- lapply(c("ARI", "MI"), FUN=function(x){
368
+    .prepRes( res$evaluation$clustering, what=x, returnParams=TRUE, 
369
+              agg.by=agg.by, shortNames=TRUE, 
370
+              pipDef=metadata(res)$PipelineDefinition )
371
+  })
372
+  ll[[3]] <- .prepRes(res$evaluation$clustering, what="ARI", returnParams=TRUE,
373
+                      agg.by=agg.by, filt=expr(true.nbClusts==n_clus), 
374
+                      shortNames=TRUE, pipDef=metadata(res)$PipelineDefinition)
375
+  pp <- ll[[1]]$pp
376
+  pp$method <- NULL
377
+  ll2 <- lapply(ll, FUN=function(x){
378
+    x <- as.data.frame(x$res)
379
+    x <- as.matrix(colCenterScale(x[row.names(pp),]))
380
+    row.names(x) <- row.names(pp)
381
+    x
382
+  })
383
+  
384
+  # merge dimred and clust results
385
+  tmp <- as.character(apply( pp[,colnames(pp1),drop=FALSE], 1, 
386
+                             collapse=" > ",FUN=paste ))
387
+  ll1 <- lapply(ll1, FUN=function(x){
388
+    x <- x[tmp,]
389
+    row.names(x) <- row.names(ll2[[1]])
390
+    x
391
+  })
392
+  ll <- c(ll1,ll2)
393
+
394
+  # get max % lost
395
+  pclost <- scrna_evalPlot_filtering(res, returnTable=TRUE)
396
+  filt.agg.by <- intersect(agg.by,unlist(a[1:2]))
397
+  pclost <- aggregate( pclost[,"max.lost",drop=FALSE], 
398
+                        pclost[,c(filt.agg.by,"dataset"),drop=FALSE], FUN=mean)
399
+  if(length(filt.agg.by)>0){
400
+    f <- as.formula(paste(paste(filt.agg.by,collapse="+"),"~dataset"))
401
+    pclost <- reshape2::dcast(pclost, f, value.var="max.lost")
402
+    row.names(pclost) <- apply( pclost[,seq_along(filt.agg.by),drop=FALSE], 1, 
403
+                                collapse=" > ",FUN=paste )
404
+    tmp <- as.character(apply( pp[,filt.agg.by,drop=FALSE], 1, collapse=" > ",
405
+                               FUN=paste ))
406
+    pclost <- pclost[tmp,setdiff(colnames(pclost), filt.agg.by),]
407
+    row.names(pclost) <- row.names(ll2[[1]])
408
+  }else{
409
+    pclost <- matrix(pclost$max.lost, nrow=nrow(ll2[[1]]), ncol=nrow(pclost), 
410
+                     byrow=TRUE, 
411
+                     dimnames=list(row.names(ll2[[1]])), pclost$dataset)
412
+  }
413
+  pclost <- apply(pclost,1,FUN=max)
414
+  
415
+  ll2 <- list( list(mat=ll[[1]], title="min silhouette\nwidth", 
416
+                    cluster_columns=TRUE, name="silhouette width"),
417
+               list(mat=ll[[2]], title="mean silhouette\nwidth", 
418
+                    cluster_columns=TRUE, show_heatmap_legend=FALSE),
419
+               list(mat=ll[[3]], title="mean ARI", name="ARI (MADs)", 
420
+                    cluster_columns=FALSE),
421
+               list(mat=ll[[4]], title="mean MI", name="MI (MADs)", 
422
+                    cluster_columns=FALSE),
423
+               list(mat=ll[[5]], title="mean ARI at\ntrue k", 
424
+                    name="ARI at true k (MADs)", cluster_columns=FALSE)
425
+               )
426
+
427
+  if("doubletmethod" %in% colnames(pp))
428
+    pp$doubletmethod <- gsub("^doublet\\.","",pp$doubletmethod)
429
+  if("clustmethod" %in% colnames(pp))
430
+    pp$clustmethod <- gsub("^clust\\.","",pp$doubletmethod)
431
+  for(f in c("filt","sel","norm")){
432
+    if(f %in% colnames(pp)) pp[[f]] <- gsub(paste0("^",f,"\\."),"",pp[[f]])
433
+  }
434
+
435
+  ha <- HeatmapAnnotation(which="row", "max\n% lost"=anno_barplot(
436
+    pclost, bar_width=1, border=FALSE, gp=gpar(fill="#282828", col="#282828"),
437
+    width=unit(1.5,"cm")), df=pp, annotation_legend_param=list("side"="right"))
438
+  
439
+  h <- hclust(dist(do.call(cbind, ll)))
440
+  silhscale <- .silScale(cbind(ll2[[1]]$mat, ll2[[2]]$mat))
441
+  H <- NULL
442
+  for(i in seq_along(ll2)){
443
+    if(i==1){
444
+      hi <- h
445
+    }else{
446
+      hi <- FALSE
447
+    }
448
+    if(grepl("silhouette", ll2[[i]]$title)){
449
+      col <- silhscale
450
+      scn <- FALSE
451
+    }else{
452
+      col <- .defaultColorMapping(ll2[[i]]$mat, center=TRUE)
453
+      scn <- datasets_as_columnNames
454
+    }
455
+    la <- ra <- NULL
456
+    if(i==length(ll2)) ra <- ha
457
+    ba <- .ds_anno(colnames(ll2[[i]]$mat), 
458
+                   legend=(!datasets_as_columnNames && i==1))
459
+    name <- ifelse(is.null(ll2[[i]]$name),ll2[[i]]$title,ll2[[i]]$name)
460
+    wi <- ifelse(is.null(width), unit(ifelse(i<=2,3.2,2.5), "cm"), width)
461
+    H <- H + Heatmap( ll2[[i]]$mat, name=name, cluster_rows=hi, 
462
+                      show_row_names=FALSE, show_column_names=scn, 
463
+                      heatmap_legend_param=heatmap_legend_param, 
464
+                      column_title=ll2[[i]]$title, col=col, width=wi, 
465
+                      cluster_columns=ll2[[i]]$cluster_columns, 
466
+                      show_column_dend=FALSE, bottom_annotation=ba, 
467
+                      left_annotation=la, right_annotation=ra, 
468
+                      column_names_gp = gpar(fontsize=10),
469
+                      show_heatmap_legend=ifelse(
470
+                        is.null(ll2[[i]]$show_heatmap_legend),TRUE,
471
+                        ll2[[i]]$show_heatmap_legend), ... )
472
+  }
473
+  H
474
+}
317 475
\ No newline at end of file
318 476
new file mode 100644
... ...
@@ -0,0 +1,42 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/scrna_plot.R
3
+\name{scrna_evalPlot_overall}
4
+\alias{scrna_evalPlot_overall}
5
+\title{scrna_evalPlot_overall}
6
+\usage{
7
+scrna_evalPlot_overall(
8
+  res,
9
+  agg.by = NULL,
10
+  width = NULL,
11
+  datasets_as_columnNames = TRUE,
12
+  heatmap_legend_param = list(direction = "horizontal", nrow = 1, by_row = TRUE),
13
+  ...
14
+)
15
+}
16
+\arguments{
17
+\item{res}{Aggregated pipeline results (i.e. the output of `runPipeline` or
18
+`aggregateResults`)}
19
+
20
+\item{agg.by}{The paramters by which to aggregate.}
21
+
22
+\item{width}{The width of individual heatmap bodies.}
23
+
24
+\item{datasets_as_columnNames}{Logical; whether dataset names should be 
25
+printed below the columns (except for silhouette) rather than using a
26
+legend.}
27
+
28
+\item{heatmap_legend_param}{Passed to each calls to `Heatmap`}
29
+
30
+\item{...}{Passed to each calls to `Heatmap`}
31
+}
32
+\value{
33
+A HeatmapList
34
+}
35
+\description{
36
+Plots a multi-level summary heatmap of many analyses of the `scrna_pipeline`.
37
+}
38
+\examples{
39
+data("exampleResults")
40
+h <- scrna_evalPlot_overall(exampleResults)
41
+draw(h, heatmap_legend_side="bottom")
42
+}