Browse code

Added gsvaEnrichment() method, gsvaRanks() returns a new class gsvaRanksParam object, which is now input for gsvaScores(). Added unit tests and documentation

Robert Castelo authored on 20/10/2024 19:09:26
Showing 11 changed files

... ...
@@ -1,5 +1,6 @@
1 1
 # Generated by roxygen2: do not edit by hand
2 2
 
3
+export("geneSets<-")
3 4
 export("gsvaAnnotation<-")
4 5
 export(computeGeneSetsOverlap)
5 6
 export(deduplicateGeneSets)
... ...
@@ -9,6 +10,7 @@ export(geneSetSizes)
9 10
 export(geneSets)
10 11
 export(gsva)
11 12
 export(gsvaAnnotation)
13
+export(gsvaEnrichment)
12 14
 export(gsvaParam)
13 15
 export(gsvaRanks)
14 16
 export(gsvaScores)
... ...
@@ -23,6 +25,7 @@ exportClasses(GsvaExprData)
23 25
 exportClasses(GsvaGeneSets)
24 26
 exportClasses(GsvaMethodParam)
25 27
 exportClasses(gsvaParam)
28
+exportClasses(gsvaRanksParam)
26 29
 exportClasses(plageParam)
27 30
 exportClasses(ssgseaParam)
28 31
 exportClasses(zscoreParam)
... ...
@@ -84,13 +87,18 @@ importFrom(SpatialExperiment,SpatialExperiment)
84 87
 importFrom(SummarizedExperiment,SummarizedExperiment)
85 88
 importFrom(SummarizedExperiment,assay)
86 89
 importFrom(cli,cli_abort)
90
+importFrom(cli,cli_alert_danger)
87 91
 importFrom(cli,cli_alert_info)
88 92
 importFrom(cli,cli_alert_success)
89 93
 importFrom(cli,cli_alert_warning)
90 94
 importFrom(cli,cli_progress_bar)
91 95
 importFrom(cli,cli_progress_done)
92 96
 importFrom(cli,cli_progress_update)
97
+importFrom(graphics,abline)
98
+importFrom(graphics,grid)
99
+importFrom(graphics,lines)
93 100
 importFrom(graphics,plot)
101
+importFrom(graphics,segments)
94 102
 importFrom(methods,new)
95 103
 importFrom(parallel,splitIndices)
96 104
 importFrom(sparseMatrixStats,colRanks)
... ...
@@ -103,6 +111,7 @@ importFrom(stats,rnorm)
103 111
 importFrom(stats,rpois)
104 112
 importFrom(stats,sd)
105 113
 importFrom(utils,capture.output)
114
+importFrom(utils,globalVariables)
106 115
 importFrom(utils,head)
107 116
 importFrom(utils,installed.packages)
108 117
 importFrom(utils,packageDescription)
... ...
@@ -336,3 +336,21 @@ setClass("gsvaParam",
336 336
                         maxDiff=NA,
337 337
                         absRanking=NA,
338 338
                         sparse=FALSE))
339
+
340
+#' @name gsvaRanksParam-class
341
+#' @rdname gsvaParam-class
342
+#' @exportClass gsvaRanksParam
343
+setClass("gsvaRanksParam",
344
+         contains="gsvaParam",
345
+         prototype=list(exprData=NULL,
346
+                        geneSets=NULL,
347
+                        assay=NA_character_,
348
+                        annotation=NULL,
349
+                        minSize=NA_integer_,
350
+                        maxSize=NA_integer_,
351
+                        kcdf=NA_character_,
352
+                        kcdfNoneMinSampleSize=NA_integer_,
353
+                        tau=NA_real_,
354
+                        maxDiff=NA,
355
+                        absRanking=NA,
356
+                        sparse=FALSE))
... ...
@@ -9,7 +9,11 @@ setGeneric("gsvaRanks",
9 9
 
10 10
 #' @export 
11 11
 setGeneric("gsvaScores",
12
-           function(param, ranks, ...) standardGeneric("gsvaScores"))
12
+           function(param, ...) standardGeneric("gsvaScores"))
13
+
14
+#' @export 
15
+setGeneric("gsvaEnrichment",
16
+           function(param, ranks, ...) standardGeneric("gsvaEnrichment"))
13 17
 
14 18
 #' @export 
15 19
 setGeneric("filterGeneSets",
... ...
@@ -23,6 +27,10 @@ setGeneric("computeGeneSetsOverlap",
23 27
 setGeneric("geneSets",
24 28
            function(obj, ...) standardGeneric("geneSets"))
25 29
 
30
+#' @export
31
+setGeneric("geneSets<-",
32
+           function(object, value) standardGeneric("geneSets<-"))
33
+
26 34
 #' @export
27 35
 setGeneric("geneSetSizes",
28 36
            function(obj, ...) standardGeneric("geneSetSizes"))
... ...
@@ -301,8 +301,8 @@ compute.geneset.es <- function(expr, gset.idx.list, sample.idxs, kcdf,
301 301
 #' ranks; and (2) calculate GSVA scores using the previously calculated
302 302
 #' ranks.
303 303
 #'
304
-#' @param param A [`gsvaParam`] object built using the constructor function
305
-#' [`gsvaParam`].
304
+#' @param param A [`gsvaParam-class`] object built using the constructor
305
+#' function [`gsvaParam`].
306 306
 #'
307 307
 #' @param verbose Gives information about each calculation step. Default: `TRUE`.
308 308
 #'
... ...
@@ -310,10 +310,10 @@ compute.geneset.es <- function(expr, gset.idx.list, sample.idxs, kcdf,
310 310
 #'   related to the parallel execution of some of the tasks and calculations
311 311
 #'   within this function.
312 312
 #'
313
-#' @return In the case of the `gsvaRanks()` method, a matrix of GSVA rank
314
-#' values per column.
313
+#' @return In the case of the `gsvaRanks()` method, an object of class
314
+#' [`gsvaRanksParam-class`].
315 315
 #'
316
-#' @seealso [`gsvaParam`], [`gsva`]
316
+#' @seealso [`gsvaParam-class`], [`gsvaRanksParam-class`], [`gsva`]
317 317
 #'
318 318
 #' @aliases gsvaRanks,gsvaParam-method
319 319
 #' @name gsvaRanks
... ...
@@ -341,14 +341,17 @@ compute.geneset.es <- function(expr, gset.idx.list, sample.idxs, kcdf,
341 341
 #' y <- matrix(rnorm(n*p), nrow=p, ncol=n,
342 342
 #'             dimnames=list(paste("g", 1:p, sep="") , paste("s", 1:n, sep="")))
343 343
 #'
344
+#' ## genes in set1 are expressed at higher levels in the last 'nGrp1+1' to 'n' samples
345
+#' y[geneSets$set1, (nGrp1+1):n] <- y[geneSets$set1, (nGrp1+1):n] + 2
346
+#'
344 347
 #' ## build GSVA parameter object
345 348
 #' gsvapar <- gsvaParam(y, geneSets)
346 349
 #'
347 350
 #' ## calculate GSVA ranks
348
-#' gsva_ranks <- gsvaRanks(gsvapar)
349
-#' gsva_ranks
351
+#' gsvarankspar <- gsvaRanks(gsvapar)
352
+#' gsvarankspar
350 353
 #' ## calculate GSVA scores
351
-#' gsva_es <- gsvaScores(gsvapar, gsva_ranks)
354
+#' gsva_es <- gsvaScores(gsvarankspar)
352 355
 #' gsva_es
353 356
 #'
354 357
 #' ## calculate now GSVA scores in a single step
... ...
@@ -362,7 +365,8 @@ compute.geneset.es <- function(expr, gset.idx.list, sample.idxs, kcdf,
362 365
 #'                   gset2=paste0("g", c(1, 2, 7, 8)))
363 366
 #'
364 367
 #' ## note that there is no need to calculate the GSVA ranks again
365
-#' gsvaScores(gsvapar, gsva_ranks, geneSets2)
368
+#' geneSets(gsvarankspar) <- geneSets2
369
+#' gsvaScores(gsvarankspar)
366 370
 #'
367 371
 #' @importFrom cli cli_alert_info cli_alert_success
368 372
 #' @importFrom BiocParallel bpnworkers
... ...
@@ -393,20 +397,30 @@ setMethod("gsvaRanks", signature(param="gsvaParam"),
393 397
 
394 398
               psz <- if(inherits(BPPARAM, "SerialParam")) 1L else bpnworkers(BPPARAM)
395 399
 
396
-              gsva_rnk <- .compute_gsva_ranks(expr=filteredDataMatrix,
400
+              gsvarnks <- .compute_gsva_ranks(expr=filteredDataMatrix,
397 401
                                               kcdf=get_kcdf(param),
398 402
                                               kcdf.min.ssize=get_kcdfNoneMinSampleSize(param),
399 403
                                               sparse=get_sparse(param),
400 404
                                               verbose=verbose,
401 405
                                               BPPARAM=BPPARAM)
402 406
 
403
-              rownames(gsva_rnk) <- rownames(filteredDataMatrix)
404
-              colnames(gsva_rnk) <- colnames(filteredDataMatrix)
407
+              rownames(gsvarnks) <- rownames(filteredDataMatrix)
408
+              colnames(gsvarnks) <- colnames(filteredDataMatrix)
409
+
410
+              rnkcontainer <- wrapData(get_exprData(param), gsvarnks)
411
+              rval <- new("gsvaRanksParam",
412
+                          exprData=rnkcontainer, geneSets=get_geneSets(param),
413
+                          assay="gsvaranks", annotation=get_annotation(param),
414
+                          minSize=get_minSize(param), maxSize=get_maxSize(param),
415
+                          kcdf=get_kcdf(param),
416
+                          kcdfNoneMinSampleSize=get_kcdfNoneMinSampleSize(param),
417
+                          tau=get_tau(param), maxDiff=get_maxDiff(param),
418
+                          absRanking=get_absRanking(param), sparse=get_sparse(param))
405 419
 
406 420
               if (verbose)
407 421
                   cli_alert_success("Calculations finished")
408 422
 
409
-              return(gsva_rnk)
423
+              return(rval)
410 424
           })
411 425
 
412 426
 .check_geneSets_minSize_maxSize_tau <- function(geneSets, minSize, maxSize, tau) {
... ...
@@ -467,56 +481,24 @@ setMethod("gsvaRanks", signature(param="gsvaParam"),
467 481
 }
468 482
 
469 483
 
470
-#' @param ranks A matrix-like object storing GSVA ranks calculated with the
471
-#' method [`gsvaRanks`].
472
-#'
473
-#' @param geneSets A collection of gene sets. Must be one of the classes
474
-#' supported by [`GsvaGeneSets-class`]. For a list of these classes, see its
475
-#' help page using `help(GsvaGeneSets)`. By default, this parameter is set to
476
-#' the `NA` missing value, which means that GSVA scores will be calculated
477
-#' using the gene sets specified in the `param` argument. If this parameter is
478
-#' set to a non-missing value corresponding to an object of the classes
479
-#' supported by [`GsvaGeneSets-class`], then GSVA scores will be calculated
480
-#' using the gene sets in this argument, instead of the ones specified in the
481
-#' `param` argument.
482
-#'
483
-#' @param minSize Numeric vector of length 1.  Minimum size of the resulting gene
484
-#' sets after gene identifier mapping. Its default value is `NA`, indicating that
485
-#' this minimum value will be taken from the input `param` argument, otherwise,
486
-#' non-`NA` values override those from the input `param` argument.
487
-#'
488
-#' @param maxSize Numeric vector of length 1.  Minimum size of the resulting gene
489
-#' sets after gene identifier mapping. Its default value is `NA`, indicating that
490
-#' this minimum value will be taken from the input `param` argument, otherwise,
491
-#' non-`NA` values override those from the input `param` argument.
492
-#'
493
-#' @param tau Numeric vector of length 1.  The exponent defining the weight of
494
-#' the tail in the random walk performed by the `GSVA` (Hänzelmann et al.,
495
-#' 2013) method.  The default value is 1 as described in the paper.
496
-#'
497
-#' @param maxDiff Logical vector of length 1 which offers two approaches to
498
-#' calculate the enrichment statistic (ES) from the KS random walk statistic.
499
-#' * `FALSE`: ES is calculated as the maximum distance of the random walk
500
-#' from 0. This approach produces a distribution of enrichment scores that is
501
-#' bimodal, but it can give large enrichment scores to gene sets whose genes
502
-#' are not concordantly activated in one direction only.
503
-#' * `TRUE` (the default): ES is calculated as the magnitude difference between
504
-#' the largest positive and negative random walk deviations. This default value
505
-#' gives larger enrichment scores to gene sets whose genes are concordantly
506
-#' activated in one direction only.
507
-#'
508
-#' @param absRanking Logical vector of length 1 used only when `maxDiff=TRUE`.
509
-#' When `absRanking=FALSE` (default) a modified Kuiper statistic is used to
510
-#' calculate enrichment scores, taking the magnitude difference between the
511
-#' largest positive and negative random walk deviations. When
512
-#' `absRanking=TRUE` the original Kuiper statistic that sums the largest
513
-#' positive and negative random walk deviations is used.
484
+#' @param param A parameter object of the [`gsvaRanksParam-class`] class.
514 485
 #'
515 486
 #' @return In the case of the `gsvaScores()` method, a gene-set by sample matrix
516
-#' of GSVA enrichment scores stored in a ocntainer object of the same type as
517
-#' the input expression data container in the `param` argument.
487
+#' of GSVA enrichment scores stored in a container object of the same type as
488
+#' the input ranks data container. If
489
+#' the input was a base matrix or a [`dgCMatrix-class`] object, then the output will
490
+#' be a base matrix object with the gene sets employed in the calculations
491
+#' stored in an attribute called `geneSets`. If the input was an
492
+#' [`ExpressionSet`] object, then the output will be also an [`ExpressionSet`]
493
+#' object with the gene sets employed in the calculations stored in an
494
+#' attributed called `geneSets`. If the input was an object of one of the
495
+#' classes described in [`GsvaExprData`], such as a [`SingleCellExperiment`],
496
+#' then the output will be of the same class, where enrichment scores will be
497
+#' stored in an assay called `es` and the gene sets employed in the
498
+#' calculations will be stored in the `rowData` slot of the object under the
499
+#' column name `gs`.
518 500
 #'
519
-#' @aliases gsvaScores,gsvaParam,GsvaExprData-method
501
+#' @aliases gsvaScores,gsvaRanksParam-method
520 502
 #' @name gsvaScores
521 503
 #' @rdname gsvaRanks
522 504
 #'
... ...
@@ -524,42 +506,20 @@ setMethod("gsvaRanks", signature(param="gsvaParam"),
524 506
 #' @importFrom BiocParallel bpnworkers
525 507
 #' @importFrom utils packageDescription
526 508
 #' @exportMethod gsvaScores
527
-setMethod("gsvaScores", signature(param="gsvaParam", ranks="GsvaExprData"),
528
-          function(param, ranks, geneSets=NA, minSize=NA, maxSize=NA,
529
-                   tau=NA, maxDiff=NA, absRanking=NA,
530
-                   verbose=TRUE, BPPARAM=SerialParam(progressbar=verbose))
509
+setMethod("gsvaScores", signature(param="gsvaRanksParam"),
510
+          function(param, verbose=TRUE,
511
+                   BPPARAM=SerialParam(progressbar=verbose))
531 512
           {
532 513
               if (verbose)
533 514
                   cli_alert_info(sprintf("GSVA version %s",
534 515
                                          packageDescription("GSVA")[["Version"]]))
535 516
 
536
-              .check_geneSets_minSize_maxSize_tau(geneSets, minSize, maxSize, tau)
537
-
538
-              .check_maxDiff_absRanking(maxDiff, absRanking)
539
-
540
-              tau <- ifelse(is.na(tau), get_tau(param), tau)
541
-              maxDiff <- ifelse(is.na(maxDiff), get_maxDiff(param), maxDiff)
542
-              absRanking <- ifelse(is.na(absRanking), get_absRanking(param),
543
-                                   absRanking)
544
-              sparse <- get_sparse(param) ## sparse regime from parameter obj
545
-
517
+              ## assuming rows in the rank data have been already filtered
546 518
               exprData <- get_exprData(param)
547
-              dataMatrix <- unwrapData(exprData, get_assay(param))
548
-              filteredDataMatrix <- .filterGenes(dataMatrix,
549
-                                                 removeConstant=TRUE,
550
-                                                 removeNzConstant=TRUE)
551
-
552
-              if (!identical(rownames(filteredDataMatrix),
553
-                             rownames(unwrapData(ranks)))) {
554
-                  msg <- paste("Rownames in ranks don't match those from the",
555
-                               "input expression data in 'param'")
556
-                  cli_abort(c("x"=msg))
557
-              }
558
-
559
-              filteredMappedGeneSets <- .filterAndMapGeneSets(param, geneSets,
560
-                                                              minSize, maxSize,
561
-                                                              filteredDataMatrix,
562
-                                                              verbose)
519
+              filteredDataMatrix <- unwrapData(exprData, get_assay(param))
520
+              filteredMappedGeneSets <- .filterAndMapGeneSets(param=param,
521
+                                                              filteredDataMatrix=filteredDataMatrix,
522
+                                                              verbose=verbose)
563 523
 
564 524
               if (!inherits(BPPARAM, "SerialParam") && verbose) {
565 525
                   msg <- sprintf("Using a %s parallel back-end with %d workers",
... ...
@@ -570,12 +530,13 @@ setMethod("gsvaScores", signature(param="gsvaParam", ranks="GsvaExprData"),
570 530
               if (verbose)
571 531
                   cli_alert_info(sprintf("Calculating GSVA scores"))
572 532
 
573
-              gsva_es <- .compute_gsva_scores(R=unwrapData(ranks),
533
+              gsva_es <- .compute_gsva_scores(R=filteredDataMatrix,
574 534
                                               geneSetsIdx=filteredMappedGeneSets,
575
-                                              tau=tau, maxDiff=maxDiff,
576
-                                              absRanking=absRanking,
577
-                                              sparse=sparse, verbose=verbose,
578
-                                              BPPARAM=BPPARAM)
535
+                                              tau=get_tau(param),
536
+                                              maxDiff=get_maxDiff(param),
537
+                                              absRanking=get_absRanking(param),
538
+                                              sparse=get_sparse(param),
539
+                                              verbose=verbose, BPPARAM=BPPARAM)
579 540
 
580 541
               rownames(gsva_es) <- names(filteredMappedGeneSets)
581 542
               colnames(gsva_es) <- colnames(filteredDataMatrix)
... ...
@@ -590,9 +551,144 @@ setMethod("gsvaScores", signature(param="gsvaParam", ranks="GsvaExprData"),
590 551
               return(rval)
591 552
           })
592 553
 
554
+#' @title GSVA enrichment data and visualization
555
+#'
556
+#' @description Extract and plot enrichment data from GSVA scores.
557
+#'
558
+#' @param param A [`gsvaRanksParam-class`] object obtained with the method
559
+#' [`gsvaRanks`].
560
+#'
561
+#' @param column The column for which we want to retrieve the enrichment data.
562
+#' This parameter is only available in the `gsvaEnrichment()` method.
563
+#'
564
+#' @param geneSet Either a positive integer number between 1 and the number of
565
+#' available gene sets in `param`, or a character string with the name of
566
+#' one of the gene sets available in `param`.
567
+#'
568
+#' @param plot A character string indicating whether an enrichment plot should
569
+#' be produced using either base R graphics (`plot="base"`) or the ggplot2 package
570
+#' (`plot="ggplot"`), or not (`plot="no"`). In the latter case, the enrichment
571
+#' data will be returned. By default `plot="auto"`, which implies that if this
572
+#' method is called from an interactive session, a plot using base R graphics
573
+#' will be produced and, otherwise, the enrichment data is returned.
574
+#'
575
+#' @param ... Further arguments passed to the `plot()` function when the
576
+#' previous parameter `plot="base"`.
577
+#'
578
+#' @return When `plot="no"`, this method returns the enrichment data. When
579
+#' `plot="ggplot"`, this method returns a `ggplot` object. When `plot="base"`
580
+#' no value is returned.
581
+#'
582
+#' @aliases gsvaEnrichment,gsvaRanksParam-method
583
+#' @name gsvaEnrichment
584
+#' @rdname gsvaEnrichment
585
+#'
586
+#' @references Hänzelmann, S., Castelo, R. and Guinney, J. GSVA: Gene set
587
+#' variation analysis for microarray and RNA-Seq data.
588
+#' *BMC Bioinformatics*, 14:7, 2013.
589
+#' [DOI](https://doi.org/10.1186/1471-2105-14-7)
590
+#'
591
+#' @examples
592
+#' library(GSVA)
593
+#'
594
+#' p <- 10 ## number of genes
595
+#' n <- 30 ## number of samples
596
+#' nGrp1 <- 15 ## number of samples in group 1
597
+#' nGrp2 <- n - nGrp1 ## number of samples in group 2
598
+#'
599
+#' ## consider three disjoint gene sets
600
+#' geneSets <- list(gset1=paste0("g", 1:3),
601
+#'                  gset2=paste0("g", 4:6),
602
+#'                  gset3=paste0("g", 7:10))
603
+#'
604
+#' ## sample data from a normal distribution with mean 0 and st.dev. 1
605
+#' y <- matrix(rnorm(n*p), nrow=p, ncol=n,
606
+#'             dimnames=list(paste("g", 1:p, sep="") , paste("s", 1:n, sep="")))
607
+#'
608
+#' ## genes in set1 are expressed at higher levels in the last 'nGrp1+1' to 'n' samples
609
+#' y[geneSets$set1, (nGrp1+1):n] <- y[geneSets$set1, (nGrp1+1):n] + 2
610
+#'
611
+#' ## build GSVA parameter object
612
+#' gsvapar <- gsvaParam(y, geneSets)
613
+#'
614
+#' ## calculate GSVA ranks
615
+#' gsvarankspar <- gsvaRanks(gsvapar)
616
+#' gsvarankspar
617
+#'
618
+#' ## by default the enrichment data for the first column and the first
619
+#' ## gene set are retrieved
620
+#' gsvaEnrichment(gsvarankspar)
621
+#'
622
+#' @importFrom cli cli_alert_info cli_abort cli_alert_danger
623
+#' @exportMethod gsvaScores
624
+setMethod("gsvaEnrichment", signature(param="gsvaRanksParam"),
625
+          function(param, column=1, geneSet=1,
626
+                   plot=c("auto", "base", "ggplot", "no"), ...)
627
+          {
628
+              plot <- match.arg(plot)
629
+
630
+              geneSets <- get_geneSets(param)
631
+              if (length(geneSet) > 1) {
632
+                  msg <- paste("Please provide only the name or position of a",
633
+                               "single gene set.")
634
+                  cli_abort("x"=msg)
635
+              }
636
+              if (is.character(geneSet)) {
637
+                  if (!geneSet %in% names(geneSets)) {
638
+                      msg <- paste("Gene set %s is missing from the input",
639
+                                   "parameter object")
640
+                      cli_abort("x"=sprintf(msg, geneSet))
641
+                  }
642
+              } else if (is.integer(geneSet) || is.numeric(geneSet)) {
643
+                  if (geneSet < 1 || geneSet > length(geneSets)) {
644
+                       msg <- paste("When 'geneSet' is numeric, it should be a",
645
+                                    "number between 1 and the number of gene",
646
+                                    "sets (%d).")
647
+                       cli_abort("x"=sprintf(msg, length(geneSets)))
648
+                  }
649
+              } else {
650
+                  msg <- paste("'geneSet' should be either numeric or",
651
+                               "character.")
652
+                  cli_abort("x"=msg)
653
+              }
654
+
655
+              tau <- get_tau(param)
656
+              maxDiff <- get_maxDiff(param)
657
+              absRanking <- get_absRanking(param)
658
+              sparse <- get_sparse(param)
659
+
660
+              exprData <- get_exprData(param)
661
+              filteredDataMatrix <- unwrapData(exprData, get_assay(param))
662
+
663
+              ## no need for verbosity when mapping a single gene set
664
+              filteredMappedGeneSets <- .filterAndMapGeneSets(param, wgset=geneSet,
665
+                                                              filteredDataMatrix,
666
+                                                              verbose=FALSE)
667
+
668
+              geneSetIdx <- filteredMappedGeneSets[[1]]
669
+              edata <- .gsva_enrichment_data(R=filteredDataMatrix,
670
+                                             column=column,
671
+                                             geneSetIdx=geneSetIdx,
672
+                                             tau=tau, maxDiff=maxDiff,
673
+                                             absRanking=absRanking,
674
+                                             sparse=sparse)
675
+
676
+              if (plot == "no" || (plot == "auto" && !interactive()))
677
+                  return(edata)
678
+
679
+              if (plot == "auto" || plot == "base")
680
+                  .plot_enrichment_base(edata, ...) 
681
+              else { ## plot == "ggplot"
682
+                  instpkgs <- installed.packages(noCache=TRUE)[, "Package"]
683
+                  if (!"ggplot2" %in% instpkgs)
684
+                      cli_alert_danger("Please install the ggplot2 package")
685
+                  else
686
+                      .plot_enrichment_ggplot(edata)
687
+              }
688
+          })
593 689
 
594
-## END exported methods (to be moved to 'gsvaNewAPI.R')
595 690
 
691
+## END exported methods (to be moved to 'gsvaNewAPI.R')
596 692
 
597 693
 #' @importFrom cli cli_progress_update
598 694
 #' @importFrom parallel splitIndices
... ...
@@ -799,6 +895,125 @@ setMethod("gsvaScores", signature(param="gsvaParam", ranks="GsvaExprData"),
799 895
     return(es)
800 896
 }
801 897
 
898
+.gsva_enrichment_data <- function(R, column, geneSetIdx, tau=1, maxDiff=TRUE,
899
+                                  absRanking=FALSE, sparse=TRUE) {
900
+    n <- ncol(R)
901
+    es <- NULL
902
+    if (!is(R, "dgCMatrix"))
903
+        sparse <- FALSE
904
+
905
+    rnkstats <- .ranks2stats(R[, column], sparse)
906
+    walkStat <- .gsvaRndWalk2(geneSetIdx, rnkstats$dos, rnkstats$srs, tau)
907
+    maxDev <- c(max(c(0, max(walkStat))), min(c(0, min(walkStat))))
908
+    whMaxDev <- c(which.max(walkStat), which.min(walkStat))
909
+    whMaxDev[maxDev == 0] <- NA
910
+    
911
+    if (maxDiff && absRanking)
912
+        maxDev[2] <- -1 * maxDev[2]
913
+    sco <- sum(maxDev)
914
+    if (!maxDiff) {
915
+        sco <- maxDev[1]
916
+        if (abs(maxDev[2]) > maxDev[1])
917
+            sco <- maxDev[2]
918
+    }
919
+
920
+    edat <- data.frame(rank=seq.int(nrow(R)),
921
+                       stat=walkStat)
922
+    rownames(edat)[rnkstats$dos] <- rownames(R)
923
+
924
+    gsetrnk <- rnkstats$dos[geneSetIdx]
925
+    lepos <- leneg <- NA
926
+    if (!is.na(whMaxDev[1]))
927
+        lepos <- geneSetIdx[gsetrnk <= whMaxDev[1]]
928
+    if (!is.na(whMaxDev[2])) {
929
+        if (!is.na(whMaxDev[1]) && whMaxDev[2] < whMaxDev[1]) {
930
+            mask <- gsetrnk >= whMaxDev[2] & gsetrnk <= whMaxDev[1]
931
+            lepos <- leneg <- geneSetIdx[mask]
932
+        } else
933
+            leneg <- geneSetIdx[gsetrnk >= whMaxDev[2]]
934
+    }
935
+    res <- list(stats=edat,
936
+                gsetrnk=gsetrnk,
937
+                maxPos=maxDev[1],
938
+                whichMaxPos=whMaxDev[1],
939
+                maxNeg=maxDev[2],
940
+                whichMaxNeg=whMaxDev[2],
941
+                leadingEdgePos=lepos,
942
+                leadingEdgeNeg=leneg,
943
+                score=sco,
944
+                tau=tau,
945
+                maxDiff=maxDiff,
946
+                absRanking=absRanking,
947
+                sparse=sparse)
948
+
949
+    return(res)
950
+}
951
+
952
+#' @importFrom graphics abline grid lines segments
953
+.plot_enrichment_base <- function(edata, ...) {
954
+    ylim <- range(edata$stats$stat)
955
+    hgsetticks <- (ylim[2] - ylim[1]) * 0.1
956
+    plot(edata$stats, type="l", lwd=2, las=1, panel.first=grid(),
957
+         xlab="Gene Ranking", ylab="Random Walk Statistic", col="green", ...)
958
+    abline(h=0, lwd=2, lty=2, col="grey")
959
+    lines(edata$stats, lwd=2, col="green")
960
+    segments(edata$gsetrnk, -hgsetticks/2, edata$gsetrnk, hgsetticks/2, lwd=2)
961
+    if (!is.na(edata$whichMaxPos) &&
962
+        (edata$maxDiff || edata$maxPos >= abs(edata$maxNeg)))
963
+        segments(edata$whichMaxPos, 0, edata$whichMaxPos, edata$maxPos,
964
+                 lwd=2, lty=2, col="darkred")
965
+    if (!is.na(edata$whichMaxNeg) &&
966
+        (edata$maxDiff || edata$maxPos < abs(edata$maxNeg)))
967
+        segments(edata$whichMaxNeg, 0, edata$whichMaxNeg, edata$maxNeg,
968
+                 lwd=2, lty=2, col="darkred")
969
+}
970
+
971
+#' @importFrom cli cli_abort
972
+#' @importFrom utils globalVariables
973
+.plot_enrichment_ggplot <- function(edata, ...) {
974
+    if (!.isPackageLoaded("ggplot2")) {
975
+        loaded <- suppressPackageStartupMessages(requireNamespace("ggplot2"))
976
+        if (!loaded)
977
+            cli_abort("x"="ggplot2 could not be loaded")
978
+    }
979
+
980
+    ylim <- range(edata$stats$stat)
981
+    hgsetticks <- (ylim[2] - ylim[1]) * 0.1
982
+    gsetticks <- data.frame(gsetrnk=edata$gsetrnk)
983
+    ## from https://stackoverflow.com/a/39877048
984
+    fintticks <- function(x) unique(floor(pretty(seq(min(x),
985
+                                    (max(x) + 1) * 1.1))))
986
+    ggplot2::ggplot(data=edata$stats) +
987
+        ggplot2::scale_x_continuous(breaks=fintticks) +
988
+        ggplot2::geom_line(ggplot2::aes_string(x="rank", y="stat"), color="green") +
989
+        ggplot2::geom_segment(data=gsetticks,
990
+                     mapping=ggplot2::aes_string(x="gsetrnk", y=-hgsetticks/2,
991
+                                 xend="gsetrnk", yend=hgsetticks/2),
992
+                     linewidth=1) +
993
+        ggplot2::geom_hline(yintercept=0, colour="grey", linetype="dashed") +
994
+        { if (!is.na(edata$whichMaxPos) &&
995
+              (edata$maxDiff || edata$maxPos >= abs(edata$maxNeg)))
996
+              ggplot2::geom_segment(data=data.frame(whichMaxPos=edata$whichMaxPos,
997
+                                           maxPos=edata$maxPos),
998
+                           mapping=ggplot2::aes_string(x="whichMaxPos", y=0,
999
+                                       xend="whichMaxPos", yend="maxPos"),
1000
+                           colour="darkred", linetype="dashed") } +
1001
+        { if (!is.na(edata$whichMaxPos) &&
1002
+              (edata$maxDiff || edata$maxPos < abs(edata$maxNeg)))
1003
+              ggplot2::geom_segment(data=data.frame(whichMaxNeg=edata$whichMaxNeg,
1004
+                                           maxNeg=edata$maxNeg),
1005
+                           mapping=ggplot2::aes_string(x="whichMaxNeg", y=0,
1006
+                                       xend="whichMaxNeg", yend="maxNeg"),
1007
+                           colour="darkred", linetype="dashed") } +
1008
+        ggplot2::theme(panel.background=ggplot2::element_blank(),
1009
+              panel.grid.major=ggplot2::element_line(colour="grey", linetype="dotted"),
1010
+              panel.grid.minor=ggplot2::element_line(colour=NA),
1011
+              axis.text=ggplot2::element_text(size=12),
1012
+              axis.title=ggplot2::element_text(size=14),
1013
+              panel.border=ggplot2::element_rect(colour="black", fill=NA)) +
1014
+        ggplot2::labs(x="Gene Ranking", y="Random Walk Statistic")
1015
+}
1016
+
802 1017
 ##
803 1018
 ## functions interfacing C code
804 1019
 ##
... ...
@@ -1094,13 +1094,15 @@ setMethod("unwrapData", signature("SpatialExperiment"),
1094 1094
 ## wrapData: put the resulting data and gene sets into the original data container type
1095 1095
 setMethod("wrapData", signature(container="matrix"),
1096 1096
           function(container, dataMatrix, geneSets) {
1097
-              attr(dataMatrix, "geneSets") <- geneSets
1097
+              if (!missing(geneSets))
1098
+                  attr(dataMatrix, "geneSets") <- geneSets
1098 1099
               return(dataMatrix)
1099 1100
           })
1100 1101
 
1101 1102
 setMethod("wrapData", signature(container="dgCMatrix"),
1102 1103
           function(container, dataMatrix, geneSets) {
1103
-              attr(dataMatrix, "geneSets") <- geneSets
1104
+              if (!missing(geneSets))
1105
+                  attr(dataMatrix, "geneSets") <- geneSets
1104 1106
               return(dataMatrix)
1105 1107
           })
1106 1108
 
... ...
@@ -1110,45 +1112,79 @@ setMethod("wrapData", signature(container="ExpressionSet"),
1110 1112
                           phenoData=phenoData(container),
1111 1113
                           experimentData=experimentData(container),
1112 1114
                           annotation="")
1113
-              attr(rval, "geneSets") <- geneSets
1115
+              if (!missing(geneSets))
1116
+                  attr(rval, "geneSets") <- geneSets
1114 1117
               
1115 1118
               return(rval)
1116 1119
           })
1117 1120
 
1118 1121
 setMethod("wrapData", signature(container="SummarizedExperiment"),
1119 1122
           function(container, dataMatrix, geneSets) {
1123
+              rdata <- adata <- NULL
1124
+              if (!missing(geneSets)) {
1125
+                  adata <- SimpleList(es=dataMatrix)
1126
+                  rdata <- DataFrame(gs=CharacterList(geneSets))
1127
+              } else { ## assume missing geneSets imples dataMatrix are ranks
1128
+                  mask <- rownames(container) %in% rownames(dataMatrix)
1129
+                  adata <- c(assays(container[mask, ]),
1130
+                             SimpleList(gsvaranks=dataMatrix))
1131
+                  rdata <- rowData(container)[mask, ]
1132
+              }
1120 1133
               rval <- SummarizedExperiment(
1121
-                  assays=SimpleList(es=dataMatrix),
1134
+                  assays=adata,
1122 1135
                   colData=colData(container),
1123
-                  rowData=DataFrame(gs=CharacterList(geneSets)),
1136
+                  rowData=rdata,
1124 1137
                   metadata=metadata(container))
1125
-              metadata(rval)$annotation <- NULL
1138
+              if (!missing(geneSets))
1139
+                  metadata(rval)$annotation <- NULL
1126 1140
 
1127 1141
               return(rval)
1128 1142
           })
1129 1143
 
1130 1144
 setMethod("wrapData", signature(container="SingleCellExperiment"),
1131 1145
           function(container, dataMatrix, geneSets) {
1146
+              rdata <- adata <- NULL
1147
+              if (!missing(geneSets)) {
1148
+                  adata <- SimpleList(es=dataMatrix)
1149
+                  rdata <- DataFrame(gs=CharacterList(geneSets))
1150
+              } else { ## assume missing geneSets imples dataMatrix are ranks
1151
+                  mask <- rownames(container) %in% rownames(dataMatrix)
1152
+                  adata <- c(assays(container[mask, ]),
1153
+                             SimpleList(gsvaranks=dataMatrix))
1154
+                  rdata <- rowData(container)[mask, ]
1155
+              }
1132 1156
               rval <- SingleCellExperiment(
1133
-                  assays=SimpleList(es=dataMatrix),
1157
+                  assays=adata,
1134 1158
                   colData=colData(container),
1135
-                  rowData=DataFrame(gs=CharacterList(geneSets)),
1159
+                  rowData=rdata,
1136 1160
                   metadata=metadata(container))
1137
-              metadata(rval)$annotation <- NULL
1161
+              if (!missing(geneSets))
1162
+                  metadata(rval)$annotation <- NULL
1138 1163
               
1139 1164
               return(rval)
1140 1165
           })
1141 1166
 
1142 1167
 setMethod("wrapData", signature(container="SpatialExperiment"),
1143 1168
           function(container, dataMatrix, geneSets) {
1169
+              rdata <- adata <- NULL
1170
+              if (!missing(geneSets)) {
1171
+                  adata <- SimpleList(es=dataMatrix)
1172
+                  rdata <- DataFrame(gs=CharacterList(geneSets))
1173
+              } else { ## assume missing geneSets imples dataMatrix are ranks
1174
+                  mask <- rownames(container) %in% rownames(dataMatrix)
1175
+                  adata <- c(assays(container[mask, ]),
1176
+                             SimpleList(gsvaranks=dataMatrix))
1177
+                  rdata <- rowData(container)[mask, ]
1178
+              }
1144 1179
               rval <- SpatialExperiment(
1145
-                  assays=SimpleList(es=dataMatrix),
1180
+                  assays=adata,
1146 1181
                   colData=colData(container),
1147
-                  rowData=DataFrame(gs=CharacterList(geneSets)),
1182
+                  rowData=rdata,
1148 1183
                   metadata=metadata(container),
1149
-                  imgData = imgData(container),
1150
-                  spatialCoords = spatialCoords(container))
1151
-              metadata(rval)$annotation <- NULL
1184
+                  imgData=imgData(container),
1185
+                  spatialCoords=spatialCoords(container))
1186
+              if (!missing(geneSets))
1187
+                  metadata(rval)$annotation <- NULL
1152 1188
               
1153 1189
               return(rval)
1154 1190
           })
... ...
@@ -287,3 +287,22 @@ setMethod("show",
287 287
               if ("dgCMatrix" %in% class(unwrapData(get_exprData(object), get_assay(object))))
288 288
                   cat("sparse: ", get_sparse(object), "\n")
289 289
           })
290
+
291
+## ----- setters for gsvaRanksParam -----
292
+
293
+#' @param object For the replacement method, an object of class
294
+#' [`gsvaRanksParam-class`].
295
+#'
296
+#' @param value For the replacement method, an object of the classes supported by
297
+#' [`GsvaGeneSets-class`].
298
+#'
299
+#' @aliases geneSets<-
300
+#' @aliases geneSets<-,gsvaRanksParam,GsvaGeneSets-method
301
+#' @rdname gsvaParam-class
302
+#' @exportMethod geneSets
303
+setReplaceMethod("geneSets", signature=signature(object="gsvaRanksParam",
304
+                                                 value="GsvaGeneSets"),
305
+                 function(object, value) {
306
+                   object@geneSets <- value
307
+                   object
308
+                 })
... ...
@@ -68,32 +68,35 @@
68 68
 ## is a 'list' object with character string vectors as elements,
69 69
 ## and 'features' is a character string vector object. it assumes
70 70
 ## features in both input objects follow the same nomenclature,
71
+
72
+#' @importFrom cli cli_abort
71 73
 .mapGeneSetsToFeatures <- function(gsets, features) {
72 74
 
73
-  ## Aaron Lun's suggestion at
74
-  ## https://github.com/rcastelo/GSVA/issues/39#issuecomment-765549620
75
-  gsets2 <- CharacterList(gsets)
76
-  mt <- match(gsets2, features)
77
-  mapdgenesets <- as.list(mt[!is.na(mt)])
75
+    ## Aaron Lun's suggestion at
76
+    ## https://github.com/rcastelo/GSVA/issues/39#issuecomment-765549620
77
+    gsets2 <- CharacterList(gsets)
78
+    mt <- match(gsets2, features)
79
+    mapdgenesets <- as.list(mt[!is.na(mt)])
78 80
 
79
-  if (length(unlist(mapdgenesets, use.names=FALSE)) == 0)
80
-    stop("No identifiers in the gene sets could be matched to the identifiers in the expression data.")
81
+    if (length(unlist(mapdgenesets, use.names=FALSE)) == 0) {
82
+      msg <- paste("No identifiers in the gene sets could be matched to the",
83
+                   "identifiers in the expression data.")
84
+      cli_abort("x"=msg)
85
+    }
81 86
 
82
-  mapdgenesets
87
+    mapdgenesets
83 88
 }
84 89
 
85 90
 ## it assumes that all arguments have been already checked for correctness
86
-.filterAndMapGeneSets <- function(param, geneSets, minSize, maxSize,
87
-                                  filteredDataMatrix, verbose) {
88
-
89
-    ## if geneSets, minSize and maxSize are non-NA values, then they
90
-    ## override those coming from 'param' (b/c are provided by gsvaScores())
91
-    if (any(is.na(geneSets)))
92
-        geneSets <- get_geneSets(param)
93
-    if (is.na(minSize))
94
-        minSize <- get_minSize(param)
95
-    if (is.na(maxSize))
96
-        maxSize <- get_maxSize(param)
91
+#' @importFrom cli cli_abort
92
+.filterAndMapGeneSets <- function(param, wgset=NA, filteredDataMatrix, verbose) {
93
+
94
+    geneSets <- get_geneSets(param)
95
+    if (!is.na(wgset))
96
+        geneSets <- geneSets[wgset]
97
+
98
+    minSize <- get_minSize(param)
99
+    maxSize <- get_maxSize(param)
97 100
 
98 101
     ## note that the method for 'GeneSetCollection' calls geneIds(), i.e., 
99 102
     ## whatever the input, from here on we have a list of character vectors
... ...
@@ -113,11 +116,11 @@
113 116
                                              maxSize=maxSize)
114 117
     
115 118
     if (length(filteredMappedGeneSets) == 0)
116
-        stop("The gene set list is empty! Filter may be too stringent.")
119
+        cli_abort("x"="The gene set list is empty! Filter may be too stringent.")
117 120
 
118 121
     ## this should NEVER happen -- just to make sure it doesn't...
119 122
     if (anyDuplicated(names(filteredMappedGeneSets)) > 0)
120
-        stop("The gene set list contains duplicated gene set names.")
123
+        cli_abort("x"="The gene set list contains duplicated gene set names.")
121 124
 
122 125
     if (any(lengths(filteredMappedGeneSets) == 1)) {
123 126
         msg <- "Some gene sets have size one. Consider setting minSize > 1"
... ...
@@ -141,9 +144,9 @@
141 144
                                        removeConstant=removeConstant,
142 145
                                        removeNzConstant=removeNzConstant)
143 146
 
144
-    filteredMappedGeneSets <- .filterAndMapGeneSets(param, NA, NA, NA,
145
-                                                    filteredDataMatrix,
146
-                                                    verbose)
147
+    filteredMappedGeneSets <- .filterAndMapGeneSets(param=param,
148
+                                                    filteredDataMatrix=filteredDataMatrix,
149
+                                                    verbose=verbose)
147 150
 
148 151
     return(list(filteredDataMatrix=filteredDataMatrix,
149 152
                 filteredMappedGeneSets=filteredMappedGeneSets))
... ...
@@ -25,10 +25,10 @@ test_gsvaRanks <- function() {
25 25
 
26 26
     ## calculate GSVA scores in two steps
27 27
     ## first calculate GSVA ranks
28
-    gsva_ranks <- gsvaRanks(gsvapar, verbose=FALSE)
28
+    gsvarankspar <- gsvaRanks(gsvapar, verbose=FALSE)
29 29
 
30 30
     ## second calculate GSVA scores using GSVA ranks
31
-    gsva_es2 <- gsvaScores(gsvapar, gsva_ranks, verbose=FALSE)
31
+    gsva_es2 <- gsvaScores(gsvarankspar, verbose=FALSE)
32 32
 
33 33
     ## both approaches to calculate GSVA scores must give
34 34
     ## the same result with the same input gene sets
35 35
new file mode 100644
... ...
@@ -0,0 +1,82 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/gsva.R
3
+\name{gsvaEnrichment}
4
+\alias{gsvaEnrichment}
5
+\alias{gsvaEnrichment,gsvaRanksParam-method}
6
+\title{GSVA enrichment data and visualization}
7
+\usage{
8
+\S4method{gsvaEnrichment}{gsvaRanksParam}(
9
+  param,
10
+  column = 1,
11
+  geneSet = 1,
12
+  plot = c("auto", "base", "ggplot", "no"),
13
+  ...
14
+)
15
+}
16
+\arguments{
17
+\item{param}{A \code{\linkS4class{gsvaRanksParam}} object obtained with the method
18
+\code{\link{gsvaRanks}}.}
19
+
20
+\item{column}{The column for which we want to retrieve the enrichment data.
21
+This parameter is only available in the \code{gsvaEnrichment()} method.}
22
+
23
+\item{geneSet}{Either a positive integer number between 1 and the number of
24
+available gene sets in \code{param}, or a character string with the name of
25
+one of the gene sets available in \code{param}.}
26
+
27
+\item{plot}{A character string indicating whether an enrichment plot should
28
+be produced using either base R graphics (\code{plot="base"}) or the ggplot2 package
29
+(\code{plot="ggplot"}), or not (\code{plot="no"}). In the latter case, the enrichment
30
+data will be returned. By default \code{plot="auto"}, which implies that if this
31
+method is called from an interactive session, a plot using base R graphics
32
+will be produced and, otherwise, the enrichment data is returned.}
33
+
34
+\item{...}{Further arguments passed to the \code{plot()} function when the
35
+previous parameter \code{plot="base"}.}
36
+}
37
+\value{
38
+When \code{plot="no"}, this method returns the enrichment data. When
39
+\code{plot="ggplot"}, this method returns a \code{ggplot} object. When \code{plot="base"}
40
+no value is returned.
41
+}
42
+\description{
43
+Extract and plot enrichment data from GSVA scores.
44
+}
45
+\examples{
46
+library(GSVA)
47
+
48
+p <- 10 ## number of genes
49
+n <- 30 ## number of samples
50
+nGrp1 <- 15 ## number of samples in group 1
51
+nGrp2 <- n - nGrp1 ## number of samples in group 2
52
+
53
+## consider three disjoint gene sets
54
+geneSets <- list(gset1=paste0("g", 1:3),
55
+                 gset2=paste0("g", 4:6),
56
+                 gset3=paste0("g", 7:10))
57
+
58
+## sample data from a normal distribution with mean 0 and st.dev. 1
59
+y <- matrix(rnorm(n*p), nrow=p, ncol=n,
60
+            dimnames=list(paste("g", 1:p, sep="") , paste("s", 1:n, sep="")))
61
+
62
+## genes in set1 are expressed at higher levels in the last 'nGrp1+1' to 'n' samples
63
+y[geneSets$set1, (nGrp1+1):n] <- y[geneSets$set1, (nGrp1+1):n] + 2
64
+
65
+## build GSVA parameter object
66
+gsvapar <- gsvaParam(y, geneSets)
67
+
68
+## calculate GSVA ranks
69
+gsvarankspar <- gsvaRanks(gsvapar)
70
+gsvarankspar
71
+
72
+## by default the enrichment data for the first column and the first
73
+## gene set are retrieved
74
+gsvaEnrichment(gsvarankspar)
75
+
76
+}
77
+\references{
78
+Hänzelmann, S., Castelo, R. and Guinney, J. GSVA: Gene set
79
+variation analysis for microarray and RNA-Seq data.
80
+\emph{BMC Bioinformatics}, 14:7, 2013.
81
+\href{https://doi.org/10.1186/1471-2105-14-7}{DOI}
82
+}
... ...
@@ -3,7 +3,10 @@
3 3
 \docType{class}
4 4
 \name{gsvaParam-class}
5 5
 \alias{gsvaParam-class}
6
+\alias{gsvaRanksParam-class}
6 7
 \alias{gsvaParam}
8
+\alias{geneSets<-,gsvaRanksParam,GsvaGeneSets-method}
9
+\alias{geneSets<-}
7 10
 \title{\code{gsvaParam} class}
8 11
 \usage{
9 12
 gsvaParam(
... ...
@@ -20,6 +23,8 @@ gsvaParam(
20 23
   absRanking = FALSE,
21 24
   sparse = TRUE
22 25
 )
26
+
27
+\S4method{geneSets}{gsvaRanksParam,GsvaGeneSets}(object) <- value
23 28
 }
24 29
 \arguments{
25 30
 \item{exprData}{The expression data set.  Must be one of the classes
... ...
@@ -101,6 +106,12 @@ data in \code{exprData} is stored in a sparse matrix (e.g., a \code{dgCMatrix} o
101 106
 In such a case, when \code{sparse=TRUE} (default), a sparse version of the GSVA
102 107
 algorithm will be applied. Otherwise, when \code{sparse=FALSE}, the classical
103 108
 version of the GSVA algorithm will be used.}
109
+
110
+\item{object}{For the replacement method, an object of class
111
+\code{\linkS4class{gsvaRanksParam}}.}
112
+
113
+\item{value}{For the replacement method, an object of the classes supported by
114
+\code{\linkS4class{GsvaGeneSets}}.}
104 115
 }
105 116
 \value{
106 117
 A new \code{\linkS4class{gsvaParam}} object.
... ...
@@ -4,88 +4,40 @@
4 4
 \alias{gsvaRanks}
5 5
 \alias{gsvaRanks,gsvaParam-method}
6 6
 \alias{gsvaScores}
7
-\alias{gsvaScores,gsvaParam,GsvaExprData-method}
7
+\alias{gsvaScores,gsvaRanksParam-method}
8 8
 \title{GSVA ranks and scores}
9 9
 \usage{
10 10
 \S4method{gsvaRanks}{gsvaParam}(param, verbose = TRUE, BPPARAM = SerialParam(progressbar = verbose))
11 11
 
12
-\S4method{gsvaScores}{gsvaParam,GsvaExprData}(
13
-  param,
14
-  ranks,
15
-  geneSets = NA,
16
-  minSize = NA,
17
-  maxSize = NA,
18
-  tau = NA,
19
-  maxDiff = NA,
20
-  absRanking = NA,
21
-  verbose = TRUE,
22
-  BPPARAM = SerialParam(progressbar = verbose)
23
-)
12
+\S4method{gsvaScores}{gsvaRanksParam}(param, verbose = TRUE, BPPARAM = SerialParam(progressbar = verbose))
24 13
 }
25 14
 \arguments{
26
-\item{param}{A \code{\link{gsvaParam}} object built using the constructor function
27
-\code{\link{gsvaParam}}.}
15
+\item{param}{A parameter object of the \code{\linkS4class{gsvaRanksParam}} class.}
28 16
 
29 17
 \item{verbose}{Gives information about each calculation step. Default: \code{TRUE}.}
30 18
 
31 19
 \item{BPPARAM}{An object of class \code{\link{BiocParallelParam}} specifying parameters
32 20
 related to the parallel execution of some of the tasks and calculations
33 21
 within this function.}
34
-
35
-\item{ranks}{A matrix-like object storing GSVA ranks calculated with the
36
-method \code{\link{gsvaRanks}}.}
37
-
38
-\item{geneSets}{A collection of gene sets. Must be one of the classes
39
-supported by \code{\linkS4class{GsvaGeneSets}}. For a list of these classes, see its
40
-help page using \code{help(GsvaGeneSets)}. By default, this parameter is set to
41
-the \code{NA} missing value, which means that GSVA scores will be calculated
42
-using the gene sets specified in the \code{param} argument. If this parameter is
43
-set to a non-missing value corresponding to an object of the classes
44
-supported by \code{\linkS4class{GsvaGeneSets}}, then GSVA scores will be calculated
45
-using the gene sets in this argument, instead of the ones specified in the
46
-\code{param} argument.}
47
-
48
-\item{minSize}{Numeric vector of length 1.  Minimum size of the resulting gene
49
-sets after gene identifier mapping. Its default value is \code{NA}, indicating that
50
-this minimum value will be taken from the input \code{param} argument, otherwise,
51
-non-\code{NA} values override those from the input \code{param} argument.}
52
-
53
-\item{maxSize}{Numeric vector of length 1.  Minimum size of the resulting gene
54
-sets after gene identifier mapping. Its default value is \code{NA}, indicating that
55
-this minimum value will be taken from the input \code{param} argument, otherwise,
56
-non-\code{NA} values override those from the input \code{param} argument.}
57
-
58
-\item{tau}{Numeric vector of length 1.  The exponent defining the weight of
59
-the tail in the random walk performed by the \code{GSVA} (Hänzelmann et al.,
60
-2013) method.  The default value is 1 as described in the paper.}
61
-
62
-\item{maxDiff}{Logical vector of length 1 which offers two approaches to
63
-calculate the enrichment statistic (ES) from the KS random walk statistic.
64
-\itemize{
65
-\item \code{FALSE}: ES is calculated as the maximum distance of the random walk
66
-from 0. This approach produces a distribution of enrichment scores that is
67
-bimodal, but it can give large enrichment scores to gene sets whose genes
68
-are not concordantly activated in one direction only.
69
-\item \code{TRUE} (the default): ES is calculated as the magnitude difference between
70
-the largest positive and negative random walk deviations. This default value
71
-gives larger enrichment scores to gene sets whose genes are concordantly
72
-activated in one direction only.
73
-}}
74
-
75
-\item{absRanking}{Logical vector of length 1 used only when \code{maxDiff=TRUE}.
76
-When \code{absRanking=FALSE} (default) a modified Kuiper statistic is used to
77
-calculate enrichment scores, taking the magnitude difference between the
78
-largest positive and negative random walk deviations. When
79
-\code{absRanking=TRUE} the original Kuiper statistic that sums the largest
80
-positive and negative random walk deviations is used.}
81 22
 }
82 23
 \value{
83
-In the case of the \code{gsvaRanks()} method, a matrix of GSVA rank
84
-values per column.
24
+In the case of the \code{gsvaRanks()} method, an object of class
25
+\code{\linkS4class{gsvaRanksParam}}.
85 26
 
86 27
 In the case of the \code{gsvaScores()} method, a gene-set by sample matrix
87
-of GSVA enrichment scores stored in a ocntainer object of the same type as
88
-the input expression data container in the \code{param} argument.
28
+of GSVA enrichment scores stored in a container object of the same type as
29
+the input ranks data container. If
30
+the input was a base matrix or a \code{\linkS4class{dgCMatrix}} object, then the output will
31
+be a base matrix object with the gene sets employed in the calculations
32
+stored in an attribute called \code{geneSets}. If the input was an
33
+\code{\link{ExpressionSet}} object, then the output will be also an \code{\link{ExpressionSet}}
34
+object with the gene sets employed in the calculations stored in an
35
+attributed called \code{geneSets}. If the input was an object of one of the
36
+classes described in \code{\link{GsvaExprData}}, such as a \code{\link{SingleCellExperiment}},
37
+then the output will be of the same class, where enrichment scores will be
38
+stored in an assay called \code{es} and the gene sets employed in the
39
+calculations will be stored in the \code{rowData} slot of the object under the
40
+column name \code{gs}.
89 41
 }
90 42
 \description{
91 43
 Calculate GSVA scores in two steps: (1) calculate GSVA
... ...
@@ -109,14 +61,17 @@ geneSets <- list(gset1=paste0("g", 1:3),
109 61
 y <- matrix(rnorm(n*p), nrow=p, ncol=n,
110 62
             dimnames=list(paste("g", 1:p, sep="") , paste("s", 1:n, sep="")))
111 63
 
64
+## genes in set1 are expressed at higher levels in the last 'nGrp1+1' to 'n' samples
65
+y[geneSets$set1, (nGrp1+1):n] <- y[geneSets$set1, (nGrp1+1):n] + 2
66
+
112 67
 ## build GSVA parameter object
113 68
 gsvapar <- gsvaParam(y, geneSets)
114 69
 
115 70
 ## calculate GSVA ranks
116
-gsva_ranks <- gsvaRanks(gsvapar)
117
-gsva_ranks
71
+gsvarankspar <- gsvaRanks(gsvapar)
72
+gsvarankspar
118 73
 ## calculate GSVA scores
119
-gsva_es <- gsvaScores(gsvapar, gsva_ranks)
74
+gsva_es <- gsvaScores(gsvarankspar)
120 75
 gsva_es
121 76
 
122 77
 ## calculate now GSVA scores in a single step
... ...
@@ -130,7 +85,8 @@ geneSets2 <- list(gset1=paste0("g", 3:6),
130 85
                   gset2=paste0("g", c(1, 2, 7, 8)))
131 86
 
132 87
 ## note that there is no need to calculate the GSVA ranks again
133
-gsvaScores(gsvapar, gsva_ranks, geneSets2)
88
+geneSets(gsvarankspar) <- geneSets2
89
+gsvaScores(gsvarankspar)
134 90
 
135 91
 }
136 92
 \references{
... ...
@@ -140,5 +96,5 @@ variation analysis for microarray and RNA-Seq data.
140 96
 \href{https://doi.org/10.1186/1471-2105-14-7}{DOI}
141 97
 }
142 98
 \seealso{
143
-\code{\link{gsvaParam}}, \code{\link{gsva}}
99
+\code{\linkS4class{gsvaParam}}, \code{\linkS4class{gsvaRanksParam}}, \code{\link{gsva}}
144 100
 }