... | ... |
@@ -1,5 +1,6 @@ |
1 | 1 |
# Generated by roxygen2: do not edit by hand |
2 | 2 |
|
3 |
+export("geneSets<-") |
|
3 | 4 |
export("gsvaAnnotation<-") |
4 | 5 |
export(computeGeneSetsOverlap) |
5 | 6 |
export(deduplicateGeneSets) |
... | ... |
@@ -9,6 +10,7 @@ export(geneSetSizes) |
9 | 10 |
export(geneSets) |
10 | 11 |
export(gsva) |
11 | 12 |
export(gsvaAnnotation) |
13 |
+export(gsvaEnrichment) |
|
12 | 14 |
export(gsvaParam) |
13 | 15 |
export(gsvaRanks) |
14 | 16 |
export(gsvaScores) |
... | ... |
@@ -23,6 +25,7 @@ exportClasses(GsvaExprData) |
23 | 25 |
exportClasses(GsvaGeneSets) |
24 | 26 |
exportClasses(GsvaMethodParam) |
25 | 27 |
exportClasses(gsvaParam) |
28 |
+exportClasses(gsvaRanksParam) |
|
26 | 29 |
exportClasses(plageParam) |
27 | 30 |
exportClasses(ssgseaParam) |
28 | 31 |
exportClasses(zscoreParam) |
... | ... |
@@ -84,13 +87,18 @@ importFrom(SpatialExperiment,SpatialExperiment) |
84 | 87 |
importFrom(SummarizedExperiment,SummarizedExperiment) |
85 | 88 |
importFrom(SummarizedExperiment,assay) |
86 | 89 |
importFrom(cli,cli_abort) |
90 |
+importFrom(cli,cli_alert_danger) |
|
87 | 91 |
importFrom(cli,cli_alert_info) |
88 | 92 |
importFrom(cli,cli_alert_success) |
89 | 93 |
importFrom(cli,cli_alert_warning) |
90 | 94 |
importFrom(cli,cli_progress_bar) |
91 | 95 |
importFrom(cli,cli_progress_done) |
92 | 96 |
importFrom(cli,cli_progress_update) |
97 |
+importFrom(graphics,abline) |
|
98 |
+importFrom(graphics,grid) |
|
99 |
+importFrom(graphics,lines) |
|
93 | 100 |
importFrom(graphics,plot) |
101 |
+importFrom(graphics,segments) |
|
94 | 102 |
importFrom(methods,new) |
95 | 103 |
importFrom(parallel,splitIndices) |
96 | 104 |
importFrom(sparseMatrixStats,colRanks) |
... | ... |
@@ -103,6 +111,7 @@ importFrom(stats,rnorm) |
103 | 111 |
importFrom(stats,rpois) |
104 | 112 |
importFrom(stats,sd) |
105 | 113 |
importFrom(utils,capture.output) |
114 |
+importFrom(utils,globalVariables) |
|
106 | 115 |
importFrom(utils,head) |
107 | 116 |
importFrom(utils,installed.packages) |
108 | 117 |
importFrom(utils,packageDescription) |
... | ... |
@@ -336,3 +336,21 @@ setClass("gsvaParam", |
336 | 336 |
maxDiff=NA, |
337 | 337 |
absRanking=NA, |
338 | 338 |
sparse=FALSE)) |
339 |
+ |
|
340 |
+#' @name gsvaRanksParam-class |
|
341 |
+#' @rdname gsvaParam-class |
|
342 |
+#' @exportClass gsvaRanksParam |
|
343 |
+setClass("gsvaRanksParam", |
|
344 |
+ contains="gsvaParam", |
|
345 |
+ prototype=list(exprData=NULL, |
|
346 |
+ geneSets=NULL, |
|
347 |
+ assay=NA_character_, |
|
348 |
+ annotation=NULL, |
|
349 |
+ minSize=NA_integer_, |
|
350 |
+ maxSize=NA_integer_, |
|
351 |
+ kcdf=NA_character_, |
|
352 |
+ kcdfNoneMinSampleSize=NA_integer_, |
|
353 |
+ tau=NA_real_, |
|
354 |
+ maxDiff=NA, |
|
355 |
+ absRanking=NA, |
|
356 |
+ sparse=FALSE)) |
... | ... |
@@ -9,7 +9,11 @@ setGeneric("gsvaRanks", |
9 | 9 |
|
10 | 10 |
#' @export |
11 | 11 |
setGeneric("gsvaScores", |
12 |
- function(param, ranks, ...) standardGeneric("gsvaScores")) |
|
12 |
+ function(param, ...) standardGeneric("gsvaScores")) |
|
13 |
+ |
|
14 |
+#' @export |
|
15 |
+setGeneric("gsvaEnrichment", |
|
16 |
+ function(param, ranks, ...) standardGeneric("gsvaEnrichment")) |
|
13 | 17 |
|
14 | 18 |
#' @export |
15 | 19 |
setGeneric("filterGeneSets", |
... | ... |
@@ -23,6 +27,10 @@ setGeneric("computeGeneSetsOverlap", |
23 | 27 |
setGeneric("geneSets", |
24 | 28 |
function(obj, ...) standardGeneric("geneSets")) |
25 | 29 |
|
30 |
+#' @export |
|
31 |
+setGeneric("geneSets<-", |
|
32 |
+ function(object, value) standardGeneric("geneSets<-")) |
|
33 |
+ |
|
26 | 34 |
#' @export |
27 | 35 |
setGeneric("geneSetSizes", |
28 | 36 |
function(obj, ...) standardGeneric("geneSetSizes")) |
... | ... |
@@ -301,8 +301,8 @@ compute.geneset.es <- function(expr, gset.idx.list, sample.idxs, kcdf, |
301 | 301 |
#' ranks; and (2) calculate GSVA scores using the previously calculated |
302 | 302 |
#' ranks. |
303 | 303 |
#' |
304 |
-#' @param param A [`gsvaParam`] object built using the constructor function |
|
305 |
-#' [`gsvaParam`]. |
|
304 |
+#' @param param A [`gsvaParam-class`] object built using the constructor |
|
305 |
+#' function [`gsvaParam`]. |
|
306 | 306 |
#' |
307 | 307 |
#' @param verbose Gives information about each calculation step. Default: `TRUE`. |
308 | 308 |
#' |
... | ... |
@@ -310,10 +310,10 @@ compute.geneset.es <- function(expr, gset.idx.list, sample.idxs, kcdf, |
310 | 310 |
#' related to the parallel execution of some of the tasks and calculations |
311 | 311 |
#' within this function. |
312 | 312 |
#' |
313 |
-#' @return In the case of the `gsvaRanks()` method, a matrix of GSVA rank |
|
314 |
-#' values per column. |
|
313 |
+#' @return In the case of the `gsvaRanks()` method, an object of class |
|
314 |
+#' [`gsvaRanksParam-class`]. |
|
315 | 315 |
#' |
316 |
-#' @seealso [`gsvaParam`], [`gsva`] |
|
316 |
+#' @seealso [`gsvaParam-class`], [`gsvaRanksParam-class`], [`gsva`] |
|
317 | 317 |
#' |
318 | 318 |
#' @aliases gsvaRanks,gsvaParam-method |
319 | 319 |
#' @name gsvaRanks |
... | ... |
@@ -341,14 +341,17 @@ compute.geneset.es <- function(expr, gset.idx.list, sample.idxs, kcdf, |
341 | 341 |
#' y <- matrix(rnorm(n*p), nrow=p, ncol=n, |
342 | 342 |
#' dimnames=list(paste("g", 1:p, sep="") , paste("s", 1:n, sep=""))) |
343 | 343 |
#' |
344 |
+#' ## genes in set1 are expressed at higher levels in the last 'nGrp1+1' to 'n' samples |
|
345 |
+#' y[geneSets$set1, (nGrp1+1):n] <- y[geneSets$set1, (nGrp1+1):n] + 2 |
|
346 |
+#' |
|
344 | 347 |
#' ## build GSVA parameter object |
345 | 348 |
#' gsvapar <- gsvaParam(y, geneSets) |
346 | 349 |
#' |
347 | 350 |
#' ## calculate GSVA ranks |
348 |
-#' gsva_ranks <- gsvaRanks(gsvapar) |
|
349 |
-#' gsva_ranks |
|
351 |
+#' gsvarankspar <- gsvaRanks(gsvapar) |
|
352 |
+#' gsvarankspar |
|
350 | 353 |
#' ## calculate GSVA scores |
351 |
-#' gsva_es <- gsvaScores(gsvapar, gsva_ranks) |
|
354 |
+#' gsva_es <- gsvaScores(gsvarankspar) |
|
352 | 355 |
#' gsva_es |
353 | 356 |
#' |
354 | 357 |
#' ## calculate now GSVA scores in a single step |
... | ... |
@@ -362,7 +365,8 @@ compute.geneset.es <- function(expr, gset.idx.list, sample.idxs, kcdf, |
362 | 365 |
#' gset2=paste0("g", c(1, 2, 7, 8))) |
363 | 366 |
#' |
364 | 367 |
#' ## note that there is no need to calculate the GSVA ranks again |
365 |
-#' gsvaScores(gsvapar, gsva_ranks, geneSets2) |
|
368 |
+#' geneSets(gsvarankspar) <- geneSets2 |
|
369 |
+#' gsvaScores(gsvarankspar) |
|
366 | 370 |
#' |
367 | 371 |
#' @importFrom cli cli_alert_info cli_alert_success |
368 | 372 |
#' @importFrom BiocParallel bpnworkers |
... | ... |
@@ -393,20 +397,30 @@ setMethod("gsvaRanks", signature(param="gsvaParam"), |
393 | 397 |
|
394 | 398 |
psz <- if(inherits(BPPARAM, "SerialParam")) 1L else bpnworkers(BPPARAM) |
395 | 399 |
|
396 |
- gsva_rnk <- .compute_gsva_ranks(expr=filteredDataMatrix, |
|
400 |
+ gsvarnks <- .compute_gsva_ranks(expr=filteredDataMatrix, |
|
397 | 401 |
kcdf=get_kcdf(param), |
398 | 402 |
kcdf.min.ssize=get_kcdfNoneMinSampleSize(param), |
399 | 403 |
sparse=get_sparse(param), |
400 | 404 |
verbose=verbose, |
401 | 405 |
BPPARAM=BPPARAM) |
402 | 406 |
|
403 |
- rownames(gsva_rnk) <- rownames(filteredDataMatrix) |
|
404 |
- colnames(gsva_rnk) <- colnames(filteredDataMatrix) |
|
407 |
+ rownames(gsvarnks) <- rownames(filteredDataMatrix) |
|
408 |
+ colnames(gsvarnks) <- colnames(filteredDataMatrix) |
|
409 |
+ |
|
410 |
+ rnkcontainer <- wrapData(get_exprData(param), gsvarnks) |
|
411 |
+ rval <- new("gsvaRanksParam", |
|
412 |
+ exprData=rnkcontainer, geneSets=get_geneSets(param), |
|
413 |
+ assay="gsvaranks", annotation=get_annotation(param), |
|
414 |
+ minSize=get_minSize(param), maxSize=get_maxSize(param), |
|
415 |
+ kcdf=get_kcdf(param), |
|
416 |
+ kcdfNoneMinSampleSize=get_kcdfNoneMinSampleSize(param), |
|
417 |
+ tau=get_tau(param), maxDiff=get_maxDiff(param), |
|
418 |
+ absRanking=get_absRanking(param), sparse=get_sparse(param)) |
|
405 | 419 |
|
406 | 420 |
if (verbose) |
407 | 421 |
cli_alert_success("Calculations finished") |
408 | 422 |
|
409 |
- return(gsva_rnk) |
|
423 |
+ return(rval) |
|
410 | 424 |
}) |
411 | 425 |
|
412 | 426 |
.check_geneSets_minSize_maxSize_tau <- function(geneSets, minSize, maxSize, tau) { |
... | ... |
@@ -467,56 +481,24 @@ setMethod("gsvaRanks", signature(param="gsvaParam"), |
467 | 481 |
} |
468 | 482 |
|
469 | 483 |
|
470 |
-#' @param ranks A matrix-like object storing GSVA ranks calculated with the |
|
471 |
-#' method [`gsvaRanks`]. |
|
472 |
-#' |
|
473 |
-#' @param geneSets A collection of gene sets. Must be one of the classes |
|
474 |
-#' supported by [`GsvaGeneSets-class`]. For a list of these classes, see its |
|
475 |
-#' help page using `help(GsvaGeneSets)`. By default, this parameter is set to |
|
476 |
-#' the `NA` missing value, which means that GSVA scores will be calculated |
|
477 |
-#' using the gene sets specified in the `param` argument. If this parameter is |
|
478 |
-#' set to a non-missing value corresponding to an object of the classes |
|
479 |
-#' supported by [`GsvaGeneSets-class`], then GSVA scores will be calculated |
|
480 |
-#' using the gene sets in this argument, instead of the ones specified in the |
|
481 |
-#' `param` argument. |
|
482 |
-#' |
|
483 |
-#' @param minSize Numeric vector of length 1. Minimum size of the resulting gene |
|
484 |
-#' sets after gene identifier mapping. Its default value is `NA`, indicating that |
|
485 |
-#' this minimum value will be taken from the input `param` argument, otherwise, |
|
486 |
-#' non-`NA` values override those from the input `param` argument. |
|
487 |
-#' |
|
488 |
-#' @param maxSize Numeric vector of length 1. Minimum size of the resulting gene |
|
489 |
-#' sets after gene identifier mapping. Its default value is `NA`, indicating that |
|
490 |
-#' this minimum value will be taken from the input `param` argument, otherwise, |
|
491 |
-#' non-`NA` values override those from the input `param` argument. |
|
492 |
-#' |
|
493 |
-#' @param tau Numeric vector of length 1. The exponent defining the weight of |
|
494 |
-#' the tail in the random walk performed by the `GSVA` (Hänzelmann et al., |
|
495 |
-#' 2013) method. The default value is 1 as described in the paper. |
|
496 |
-#' |
|
497 |
-#' @param maxDiff Logical vector of length 1 which offers two approaches to |
|
498 |
-#' calculate the enrichment statistic (ES) from the KS random walk statistic. |
|
499 |
-#' * `FALSE`: ES is calculated as the maximum distance of the random walk |
|
500 |
-#' from 0. This approach produces a distribution of enrichment scores that is |
|
501 |
-#' bimodal, but it can give large enrichment scores to gene sets whose genes |
|
502 |
-#' are not concordantly activated in one direction only. |
|
503 |
-#' * `TRUE` (the default): ES is calculated as the magnitude difference between |
|
504 |
-#' the largest positive and negative random walk deviations. This default value |
|
505 |
-#' gives larger enrichment scores to gene sets whose genes are concordantly |
|
506 |
-#' activated in one direction only. |
|
507 |
-#' |
|
508 |
-#' @param absRanking Logical vector of length 1 used only when `maxDiff=TRUE`. |
|
509 |
-#' When `absRanking=FALSE` (default) a modified Kuiper statistic is used to |
|
510 |
-#' calculate enrichment scores, taking the magnitude difference between the |
|
511 |
-#' largest positive and negative random walk deviations. When |
|
512 |
-#' `absRanking=TRUE` the original Kuiper statistic that sums the largest |
|
513 |
-#' positive and negative random walk deviations is used. |
|
484 |
+#' @param param A parameter object of the [`gsvaRanksParam-class`] class. |
|
514 | 485 |
#' |
515 | 486 |
#' @return In the case of the `gsvaScores()` method, a gene-set by sample matrix |
516 |
-#' of GSVA enrichment scores stored in a ocntainer object of the same type as |
|
517 |
-#' the input expression data container in the `param` argument. |
|
487 |
+#' of GSVA enrichment scores stored in a container object of the same type as |
|
488 |
+#' the input ranks data container. If |
|
489 |
+#' the input was a base matrix or a [`dgCMatrix-class`] object, then the output will |
|
490 |
+#' be a base matrix object with the gene sets employed in the calculations |
|
491 |
+#' stored in an attribute called `geneSets`. If the input was an |
|
492 |
+#' [`ExpressionSet`] object, then the output will be also an [`ExpressionSet`] |
|
493 |
+#' object with the gene sets employed in the calculations stored in an |
|
494 |
+#' attributed called `geneSets`. If the input was an object of one of the |
|
495 |
+#' classes described in [`GsvaExprData`], such as a [`SingleCellExperiment`], |
|
496 |
+#' then the output will be of the same class, where enrichment scores will be |
|
497 |
+#' stored in an assay called `es` and the gene sets employed in the |
|
498 |
+#' calculations will be stored in the `rowData` slot of the object under the |
|
499 |
+#' column name `gs`. |
|
518 | 500 |
#' |
519 |
-#' @aliases gsvaScores,gsvaParam,GsvaExprData-method |
|
501 |
+#' @aliases gsvaScores,gsvaRanksParam-method |
|
520 | 502 |
#' @name gsvaScores |
521 | 503 |
#' @rdname gsvaRanks |
522 | 504 |
#' |
... | ... |
@@ -524,42 +506,20 @@ setMethod("gsvaRanks", signature(param="gsvaParam"), |
524 | 506 |
#' @importFrom BiocParallel bpnworkers |
525 | 507 |
#' @importFrom utils packageDescription |
526 | 508 |
#' @exportMethod gsvaScores |
527 |
-setMethod("gsvaScores", signature(param="gsvaParam", ranks="GsvaExprData"), |
|
528 |
- function(param, ranks, geneSets=NA, minSize=NA, maxSize=NA, |
|
529 |
- tau=NA, maxDiff=NA, absRanking=NA, |
|
530 |
- verbose=TRUE, BPPARAM=SerialParam(progressbar=verbose)) |
|
509 |
+setMethod("gsvaScores", signature(param="gsvaRanksParam"), |
|
510 |
+ function(param, verbose=TRUE, |
|
511 |
+ BPPARAM=SerialParam(progressbar=verbose)) |
|
531 | 512 |
{ |
532 | 513 |
if (verbose) |
533 | 514 |
cli_alert_info(sprintf("GSVA version %s", |
534 | 515 |
packageDescription("GSVA")[["Version"]])) |
535 | 516 |
|
536 |
- .check_geneSets_minSize_maxSize_tau(geneSets, minSize, maxSize, tau) |
|
537 |
- |
|
538 |
- .check_maxDiff_absRanking(maxDiff, absRanking) |
|
539 |
- |
|
540 |
- tau <- ifelse(is.na(tau), get_tau(param), tau) |
|
541 |
- maxDiff <- ifelse(is.na(maxDiff), get_maxDiff(param), maxDiff) |
|
542 |
- absRanking <- ifelse(is.na(absRanking), get_absRanking(param), |
|
543 |
- absRanking) |
|
544 |
- sparse <- get_sparse(param) ## sparse regime from parameter obj |
|
545 |
- |
|
517 |
+ ## assuming rows in the rank data have been already filtered |
|
546 | 518 |
exprData <- get_exprData(param) |
547 |
- dataMatrix <- unwrapData(exprData, get_assay(param)) |
|
548 |
- filteredDataMatrix <- .filterGenes(dataMatrix, |
|
549 |
- removeConstant=TRUE, |
|
550 |
- removeNzConstant=TRUE) |
|
551 |
- |
|
552 |
- if (!identical(rownames(filteredDataMatrix), |
|
553 |
- rownames(unwrapData(ranks)))) { |
|
554 |
- msg <- paste("Rownames in ranks don't match those from the", |
|
555 |
- "input expression data in 'param'") |
|
556 |
- cli_abort(c("x"=msg)) |
|
557 |
- } |
|
558 |
- |
|
559 |
- filteredMappedGeneSets <- .filterAndMapGeneSets(param, geneSets, |
|
560 |
- minSize, maxSize, |
|
561 |
- filteredDataMatrix, |
|
562 |
- verbose) |
|
519 |
+ filteredDataMatrix <- unwrapData(exprData, get_assay(param)) |
|
520 |
+ filteredMappedGeneSets <- .filterAndMapGeneSets(param=param, |
|
521 |
+ filteredDataMatrix=filteredDataMatrix, |
|
522 |
+ verbose=verbose) |
|
563 | 523 |
|
564 | 524 |
if (!inherits(BPPARAM, "SerialParam") && verbose) { |
565 | 525 |
msg <- sprintf("Using a %s parallel back-end with %d workers", |
... | ... |
@@ -570,12 +530,13 @@ setMethod("gsvaScores", signature(param="gsvaParam", ranks="GsvaExprData"), |
570 | 530 |
if (verbose) |
571 | 531 |
cli_alert_info(sprintf("Calculating GSVA scores")) |
572 | 532 |
|
573 |
- gsva_es <- .compute_gsva_scores(R=unwrapData(ranks), |
|
533 |
+ gsva_es <- .compute_gsva_scores(R=filteredDataMatrix, |
|
574 | 534 |
geneSetsIdx=filteredMappedGeneSets, |
575 |
- tau=tau, maxDiff=maxDiff, |
|
576 |
- absRanking=absRanking, |
|
577 |
- sparse=sparse, verbose=verbose, |
|
578 |
- BPPARAM=BPPARAM) |
|
535 |
+ tau=get_tau(param), |
|
536 |
+ maxDiff=get_maxDiff(param), |
|
537 |
+ absRanking=get_absRanking(param), |
|
538 |
+ sparse=get_sparse(param), |
|
539 |
+ verbose=verbose, BPPARAM=BPPARAM) |
|
579 | 540 |
|
580 | 541 |
rownames(gsva_es) <- names(filteredMappedGeneSets) |
581 | 542 |
colnames(gsva_es) <- colnames(filteredDataMatrix) |
... | ... |
@@ -590,9 +551,144 @@ setMethod("gsvaScores", signature(param="gsvaParam", ranks="GsvaExprData"), |
590 | 551 |
return(rval) |
591 | 552 |
}) |
592 | 553 |
|
554 |
+#' @title GSVA enrichment data and visualization |
|
555 |
+#' |
|
556 |
+#' @description Extract and plot enrichment data from GSVA scores. |
|
557 |
+#' |
|
558 |
+#' @param param A [`gsvaRanksParam-class`] object obtained with the method |
|
559 |
+#' [`gsvaRanks`]. |
|
560 |
+#' |
|
561 |
+#' @param column The column for which we want to retrieve the enrichment data. |
|
562 |
+#' This parameter is only available in the `gsvaEnrichment()` method. |
|
563 |
+#' |
|
564 |
+#' @param geneSet Either a positive integer number between 1 and the number of |
|
565 |
+#' available gene sets in `param`, or a character string with the name of |
|
566 |
+#' one of the gene sets available in `param`. |
|
567 |
+#' |
|
568 |
+#' @param plot A character string indicating whether an enrichment plot should |
|
569 |
+#' be produced using either base R graphics (`plot="base"`) or the ggplot2 package |
|
570 |
+#' (`plot="ggplot"`), or not (`plot="no"`). In the latter case, the enrichment |
|
571 |
+#' data will be returned. By default `plot="auto"`, which implies that if this |
|
572 |
+#' method is called from an interactive session, a plot using base R graphics |
|
573 |
+#' will be produced and, otherwise, the enrichment data is returned. |
|
574 |
+#' |
|
575 |
+#' @param ... Further arguments passed to the `plot()` function when the |
|
576 |
+#' previous parameter `plot="base"`. |
|
577 |
+#' |
|
578 |
+#' @return When `plot="no"`, this method returns the enrichment data. When |
|
579 |
+#' `plot="ggplot"`, this method returns a `ggplot` object. When `plot="base"` |
|
580 |
+#' no value is returned. |
|
581 |
+#' |
|
582 |
+#' @aliases gsvaEnrichment,gsvaRanksParam-method |
|
583 |
+#' @name gsvaEnrichment |
|
584 |
+#' @rdname gsvaEnrichment |
|
585 |
+#' |
|
586 |
+#' @references Hänzelmann, S., Castelo, R. and Guinney, J. GSVA: Gene set |
|
587 |
+#' variation analysis for microarray and RNA-Seq data. |
|
588 |
+#' *BMC Bioinformatics*, 14:7, 2013. |
|
589 |
+#' [DOI](https://doi.org/10.1186/1471-2105-14-7) |
|
590 |
+#' |
|
591 |
+#' @examples |
|
592 |
+#' library(GSVA) |
|
593 |
+#' |
|
594 |
+#' p <- 10 ## number of genes |
|
595 |
+#' n <- 30 ## number of samples |
|
596 |
+#' nGrp1 <- 15 ## number of samples in group 1 |
|
597 |
+#' nGrp2 <- n - nGrp1 ## number of samples in group 2 |
|
598 |
+#' |
|
599 |
+#' ## consider three disjoint gene sets |
|
600 |
+#' geneSets <- list(gset1=paste0("g", 1:3), |
|
601 |
+#' gset2=paste0("g", 4:6), |
|
602 |
+#' gset3=paste0("g", 7:10)) |
|
603 |
+#' |
|
604 |
+#' ## sample data from a normal distribution with mean 0 and st.dev. 1 |
|
605 |
+#' y <- matrix(rnorm(n*p), nrow=p, ncol=n, |
|
606 |
+#' dimnames=list(paste("g", 1:p, sep="") , paste("s", 1:n, sep=""))) |
|
607 |
+#' |
|
608 |
+#' ## genes in set1 are expressed at higher levels in the last 'nGrp1+1' to 'n' samples |
|
609 |
+#' y[geneSets$set1, (nGrp1+1):n] <- y[geneSets$set1, (nGrp1+1):n] + 2 |
|
610 |
+#' |
|
611 |
+#' ## build GSVA parameter object |
|
612 |
+#' gsvapar <- gsvaParam(y, geneSets) |
|
613 |
+#' |
|
614 |
+#' ## calculate GSVA ranks |
|
615 |
+#' gsvarankspar <- gsvaRanks(gsvapar) |
|
616 |
+#' gsvarankspar |
|
617 |
+#' |
|
618 |
+#' ## by default the enrichment data for the first column and the first |
|
619 |
+#' ## gene set are retrieved |
|
620 |
+#' gsvaEnrichment(gsvarankspar) |
|
621 |
+#' |
|
622 |
+#' @importFrom cli cli_alert_info cli_abort cli_alert_danger |
|
623 |
+#' @exportMethod gsvaScores |
|
624 |
+setMethod("gsvaEnrichment", signature(param="gsvaRanksParam"), |
|
625 |
+ function(param, column=1, geneSet=1, |
|
626 |
+ plot=c("auto", "base", "ggplot", "no"), ...) |
|
627 |
+ { |
|
628 |
+ plot <- match.arg(plot) |
|
629 |
+ |
|
630 |
+ geneSets <- get_geneSets(param) |
|
631 |
+ if (length(geneSet) > 1) { |
|
632 |
+ msg <- paste("Please provide only the name or position of a", |
|
633 |
+ "single gene set.") |
|
634 |
+ cli_abort("x"=msg) |
|
635 |
+ } |
|
636 |
+ if (is.character(geneSet)) { |
|
637 |
+ if (!geneSet %in% names(geneSets)) { |
|
638 |
+ msg <- paste("Gene set %s is missing from the input", |
|
639 |
+ "parameter object") |
|
640 |
+ cli_abort("x"=sprintf(msg, geneSet)) |
|
641 |
+ } |
|
642 |
+ } else if (is.integer(geneSet) || is.numeric(geneSet)) { |
|
643 |
+ if (geneSet < 1 || geneSet > length(geneSets)) { |
|
644 |
+ msg <- paste("When 'geneSet' is numeric, it should be a", |
|
645 |
+ "number between 1 and the number of gene", |
|
646 |
+ "sets (%d).") |
|
647 |
+ cli_abort("x"=sprintf(msg, length(geneSets))) |
|
648 |
+ } |
|
649 |
+ } else { |
|
650 |
+ msg <- paste("'geneSet' should be either numeric or", |
|
651 |
+ "character.") |
|
652 |
+ cli_abort("x"=msg) |
|
653 |
+ } |
|
654 |
+ |
|
655 |
+ tau <- get_tau(param) |
|
656 |
+ maxDiff <- get_maxDiff(param) |
|
657 |
+ absRanking <- get_absRanking(param) |
|
658 |
+ sparse <- get_sparse(param) |
|
659 |
+ |
|
660 |
+ exprData <- get_exprData(param) |
|
661 |
+ filteredDataMatrix <- unwrapData(exprData, get_assay(param)) |
|
662 |
+ |
|
663 |
+ ## no need for verbosity when mapping a single gene set |
|
664 |
+ filteredMappedGeneSets <- .filterAndMapGeneSets(param, wgset=geneSet, |
|
665 |
+ filteredDataMatrix, |
|
666 |
+ verbose=FALSE) |
|
667 |
+ |
|
668 |
+ geneSetIdx <- filteredMappedGeneSets[[1]] |
|
669 |
+ edata <- .gsva_enrichment_data(R=filteredDataMatrix, |
|
670 |
+ column=column, |
|
671 |
+ geneSetIdx=geneSetIdx, |
|
672 |
+ tau=tau, maxDiff=maxDiff, |
|
673 |
+ absRanking=absRanking, |
|
674 |
+ sparse=sparse) |
|
675 |
+ |
|
676 |
+ if (plot == "no" || (plot == "auto" && !interactive())) |
|
677 |
+ return(edata) |
|
678 |
+ |
|
679 |
+ if (plot == "auto" || plot == "base") |
|
680 |
+ .plot_enrichment_base(edata, ...) |
|
681 |
+ else { ## plot == "ggplot" |
|
682 |
+ instpkgs <- installed.packages(noCache=TRUE)[, "Package"] |
|
683 |
+ if (!"ggplot2" %in% instpkgs) |
|
684 |
+ cli_alert_danger("Please install the ggplot2 package") |
|
685 |
+ else |
|
686 |
+ .plot_enrichment_ggplot(edata) |
|
687 |
+ } |
|
688 |
+ }) |
|
593 | 689 |
|
594 |
-## END exported methods (to be moved to 'gsvaNewAPI.R') |
|
595 | 690 |
|
691 |
+## END exported methods (to be moved to 'gsvaNewAPI.R') |
|
596 | 692 |
|
597 | 693 |
#' @importFrom cli cli_progress_update |
598 | 694 |
#' @importFrom parallel splitIndices |
... | ... |
@@ -799,6 +895,125 @@ setMethod("gsvaScores", signature(param="gsvaParam", ranks="GsvaExprData"), |
799 | 895 |
return(es) |
800 | 896 |
} |
801 | 897 |
|
898 |
+.gsva_enrichment_data <- function(R, column, geneSetIdx, tau=1, maxDiff=TRUE, |
|
899 |
+ absRanking=FALSE, sparse=TRUE) { |
|
900 |
+ n <- ncol(R) |
|
901 |
+ es <- NULL |
|
902 |
+ if (!is(R, "dgCMatrix")) |
|
903 |
+ sparse <- FALSE |
|
904 |
+ |
|
905 |
+ rnkstats <- .ranks2stats(R[, column], sparse) |
|
906 |
+ walkStat <- .gsvaRndWalk2(geneSetIdx, rnkstats$dos, rnkstats$srs, tau) |
|
907 |
+ maxDev <- c(max(c(0, max(walkStat))), min(c(0, min(walkStat)))) |
|
908 |
+ whMaxDev <- c(which.max(walkStat), which.min(walkStat)) |
|
909 |
+ whMaxDev[maxDev == 0] <- NA |
|
910 |
+ |
|
911 |
+ if (maxDiff && absRanking) |
|
912 |
+ maxDev[2] <- -1 * maxDev[2] |
|
913 |
+ sco <- sum(maxDev) |
|
914 |
+ if (!maxDiff) { |
|
915 |
+ sco <- maxDev[1] |
|
916 |
+ if (abs(maxDev[2]) > maxDev[1]) |
|
917 |
+ sco <- maxDev[2] |
|
918 |
+ } |
|
919 |
+ |
|
920 |
+ edat <- data.frame(rank=seq.int(nrow(R)), |
|
921 |
+ stat=walkStat) |
|
922 |
+ rownames(edat)[rnkstats$dos] <- rownames(R) |
|
923 |
+ |
|
924 |
+ gsetrnk <- rnkstats$dos[geneSetIdx] |
|
925 |
+ lepos <- leneg <- NA |
|
926 |
+ if (!is.na(whMaxDev[1])) |
|
927 |
+ lepos <- geneSetIdx[gsetrnk <= whMaxDev[1]] |
|
928 |
+ if (!is.na(whMaxDev[2])) { |
|
929 |
+ if (!is.na(whMaxDev[1]) && whMaxDev[2] < whMaxDev[1]) { |
|
930 |
+ mask <- gsetrnk >= whMaxDev[2] & gsetrnk <= whMaxDev[1] |
|
931 |
+ lepos <- leneg <- geneSetIdx[mask] |
|
932 |
+ } else |
|
933 |
+ leneg <- geneSetIdx[gsetrnk >= whMaxDev[2]] |
|
934 |
+ } |
|
935 |
+ res <- list(stats=edat, |
|
936 |
+ gsetrnk=gsetrnk, |
|
937 |
+ maxPos=maxDev[1], |
|
938 |
+ whichMaxPos=whMaxDev[1], |
|
939 |
+ maxNeg=maxDev[2], |
|
940 |
+ whichMaxNeg=whMaxDev[2], |
|
941 |
+ leadingEdgePos=lepos, |
|
942 |
+ leadingEdgeNeg=leneg, |
|
943 |
+ score=sco, |
|
944 |
+ tau=tau, |
|
945 |
+ maxDiff=maxDiff, |
|
946 |
+ absRanking=absRanking, |
|
947 |
+ sparse=sparse) |
|
948 |
+ |
|
949 |
+ return(res) |
|
950 |
+} |
|
951 |
+ |
|
952 |
+#' @importFrom graphics abline grid lines segments |
|
953 |
+.plot_enrichment_base <- function(edata, ...) { |
|
954 |
+ ylim <- range(edata$stats$stat) |
|
955 |
+ hgsetticks <- (ylim[2] - ylim[1]) * 0.1 |
|
956 |
+ plot(edata$stats, type="l", lwd=2, las=1, panel.first=grid(), |
|
957 |
+ xlab="Gene Ranking", ylab="Random Walk Statistic", col="green", ...) |
|
958 |
+ abline(h=0, lwd=2, lty=2, col="grey") |
|
959 |
+ lines(edata$stats, lwd=2, col="green") |
|
960 |
+ segments(edata$gsetrnk, -hgsetticks/2, edata$gsetrnk, hgsetticks/2, lwd=2) |
|
961 |
+ if (!is.na(edata$whichMaxPos) && |
|
962 |
+ (edata$maxDiff || edata$maxPos >= abs(edata$maxNeg))) |
|
963 |
+ segments(edata$whichMaxPos, 0, edata$whichMaxPos, edata$maxPos, |
|
964 |
+ lwd=2, lty=2, col="darkred") |
|
965 |
+ if (!is.na(edata$whichMaxNeg) && |
|
966 |
+ (edata$maxDiff || edata$maxPos < abs(edata$maxNeg))) |
|
967 |
+ segments(edata$whichMaxNeg, 0, edata$whichMaxNeg, edata$maxNeg, |
|
968 |
+ lwd=2, lty=2, col="darkred") |
|
969 |
+} |
|
970 |
+ |
|
971 |
+#' @importFrom cli cli_abort |
|
972 |
+#' @importFrom utils globalVariables |
|
973 |
+.plot_enrichment_ggplot <- function(edata, ...) { |
|
974 |
+ if (!.isPackageLoaded("ggplot2")) { |
|
975 |
+ loaded <- suppressPackageStartupMessages(requireNamespace("ggplot2")) |
|
976 |
+ if (!loaded) |
|
977 |
+ cli_abort("x"="ggplot2 could not be loaded") |
|
978 |
+ } |
|
979 |
+ |
|
980 |
+ ylim <- range(edata$stats$stat) |
|
981 |
+ hgsetticks <- (ylim[2] - ylim[1]) * 0.1 |
|
982 |
+ gsetticks <- data.frame(gsetrnk=edata$gsetrnk) |
|
983 |
+ ## from https://stackoverflow.com/a/39877048 |
|
984 |
+ fintticks <- function(x) unique(floor(pretty(seq(min(x), |
|
985 |
+ (max(x) + 1) * 1.1)))) |
|
986 |
+ ggplot2::ggplot(data=edata$stats) + |
|
987 |
+ ggplot2::scale_x_continuous(breaks=fintticks) + |
|
988 |
+ ggplot2::geom_line(ggplot2::aes_string(x="rank", y="stat"), color="green") + |
|
989 |
+ ggplot2::geom_segment(data=gsetticks, |
|
990 |
+ mapping=ggplot2::aes_string(x="gsetrnk", y=-hgsetticks/2, |
|
991 |
+ xend="gsetrnk", yend=hgsetticks/2), |
|
992 |
+ linewidth=1) + |
|
993 |
+ ggplot2::geom_hline(yintercept=0, colour="grey", linetype="dashed") + |
|
994 |
+ { if (!is.na(edata$whichMaxPos) && |
|
995 |
+ (edata$maxDiff || edata$maxPos >= abs(edata$maxNeg))) |
|
996 |
+ ggplot2::geom_segment(data=data.frame(whichMaxPos=edata$whichMaxPos, |
|
997 |
+ maxPos=edata$maxPos), |
|
998 |
+ mapping=ggplot2::aes_string(x="whichMaxPos", y=0, |
|
999 |
+ xend="whichMaxPos", yend="maxPos"), |
|
1000 |
+ colour="darkred", linetype="dashed") } + |
|
1001 |
+ { if (!is.na(edata$whichMaxPos) && |
|
1002 |
+ (edata$maxDiff || edata$maxPos < abs(edata$maxNeg))) |
|
1003 |
+ ggplot2::geom_segment(data=data.frame(whichMaxNeg=edata$whichMaxNeg, |
|
1004 |
+ maxNeg=edata$maxNeg), |
|
1005 |
+ mapping=ggplot2::aes_string(x="whichMaxNeg", y=0, |
|
1006 |
+ xend="whichMaxNeg", yend="maxNeg"), |
|
1007 |
+ colour="darkred", linetype="dashed") } + |
|
1008 |
+ ggplot2::theme(panel.background=ggplot2::element_blank(), |
|
1009 |
+ panel.grid.major=ggplot2::element_line(colour="grey", linetype="dotted"), |
|
1010 |
+ panel.grid.minor=ggplot2::element_line(colour=NA), |
|
1011 |
+ axis.text=ggplot2::element_text(size=12), |
|
1012 |
+ axis.title=ggplot2::element_text(size=14), |
|
1013 |
+ panel.border=ggplot2::element_rect(colour="black", fill=NA)) + |
|
1014 |
+ ggplot2::labs(x="Gene Ranking", y="Random Walk Statistic") |
|
1015 |
+} |
|
1016 |
+ |
|
802 | 1017 |
## |
803 | 1018 |
## functions interfacing C code |
804 | 1019 |
## |
... | ... |
@@ -1094,13 +1094,15 @@ setMethod("unwrapData", signature("SpatialExperiment"), |
1094 | 1094 |
## wrapData: put the resulting data and gene sets into the original data container type |
1095 | 1095 |
setMethod("wrapData", signature(container="matrix"), |
1096 | 1096 |
function(container, dataMatrix, geneSets) { |
1097 |
- attr(dataMatrix, "geneSets") <- geneSets |
|
1097 |
+ if (!missing(geneSets)) |
|
1098 |
+ attr(dataMatrix, "geneSets") <- geneSets |
|
1098 | 1099 |
return(dataMatrix) |
1099 | 1100 |
}) |
1100 | 1101 |
|
1101 | 1102 |
setMethod("wrapData", signature(container="dgCMatrix"), |
1102 | 1103 |
function(container, dataMatrix, geneSets) { |
1103 |
- attr(dataMatrix, "geneSets") <- geneSets |
|
1104 |
+ if (!missing(geneSets)) |
|
1105 |
+ attr(dataMatrix, "geneSets") <- geneSets |
|
1104 | 1106 |
return(dataMatrix) |
1105 | 1107 |
}) |
1106 | 1108 |
|
... | ... |
@@ -1110,45 +1112,79 @@ setMethod("wrapData", signature(container="ExpressionSet"), |
1110 | 1112 |
phenoData=phenoData(container), |
1111 | 1113 |
experimentData=experimentData(container), |
1112 | 1114 |
annotation="") |
1113 |
- attr(rval, "geneSets") <- geneSets |
|
1115 |
+ if (!missing(geneSets)) |
|
1116 |
+ attr(rval, "geneSets") <- geneSets |
|
1114 | 1117 |
|
1115 | 1118 |
return(rval) |
1116 | 1119 |
}) |
1117 | 1120 |
|
1118 | 1121 |
setMethod("wrapData", signature(container="SummarizedExperiment"), |
1119 | 1122 |
function(container, dataMatrix, geneSets) { |
1123 |
+ rdata <- adata <- NULL |
|
1124 |
+ if (!missing(geneSets)) { |
|
1125 |
+ adata <- SimpleList(es=dataMatrix) |
|
1126 |
+ rdata <- DataFrame(gs=CharacterList(geneSets)) |
|
1127 |
+ } else { ## assume missing geneSets imples dataMatrix are ranks |
|
1128 |
+ mask <- rownames(container) %in% rownames(dataMatrix) |
|
1129 |
+ adata <- c(assays(container[mask, ]), |
|
1130 |
+ SimpleList(gsvaranks=dataMatrix)) |
|
1131 |
+ rdata <- rowData(container)[mask, ] |
|
1132 |
+ } |
|
1120 | 1133 |
rval <- SummarizedExperiment( |
1121 |
- assays=SimpleList(es=dataMatrix), |
|
1134 |
+ assays=adata, |
|
1122 | 1135 |
colData=colData(container), |
1123 |
- rowData=DataFrame(gs=CharacterList(geneSets)), |
|
1136 |
+ rowData=rdata, |
|
1124 | 1137 |
metadata=metadata(container)) |
1125 |
- metadata(rval)$annotation <- NULL |
|
1138 |
+ if (!missing(geneSets)) |
|
1139 |
+ metadata(rval)$annotation <- NULL |
|
1126 | 1140 |
|
1127 | 1141 |
return(rval) |
1128 | 1142 |
}) |
1129 | 1143 |
|
1130 | 1144 |
setMethod("wrapData", signature(container="SingleCellExperiment"), |
1131 | 1145 |
function(container, dataMatrix, geneSets) { |
1146 |
+ rdata <- adata <- NULL |
|
1147 |
+ if (!missing(geneSets)) { |
|
1148 |
+ adata <- SimpleList(es=dataMatrix) |
|
1149 |
+ rdata <- DataFrame(gs=CharacterList(geneSets)) |
|
1150 |
+ } else { ## assume missing geneSets imples dataMatrix are ranks |
|
1151 |
+ mask <- rownames(container) %in% rownames(dataMatrix) |
|
1152 |
+ adata <- c(assays(container[mask, ]), |
|
1153 |
+ SimpleList(gsvaranks=dataMatrix)) |
|
1154 |
+ rdata <- rowData(container)[mask, ] |
|
1155 |
+ } |
|
1132 | 1156 |
rval <- SingleCellExperiment( |
1133 |
- assays=SimpleList(es=dataMatrix), |
|
1157 |
+ assays=adata, |
|
1134 | 1158 |
colData=colData(container), |
1135 |
- rowData=DataFrame(gs=CharacterList(geneSets)), |
|
1159 |
+ rowData=rdata, |
|
1136 | 1160 |
metadata=metadata(container)) |
1137 |
- metadata(rval)$annotation <- NULL |
|
1161 |
+ if (!missing(geneSets)) |
|
1162 |
+ metadata(rval)$annotation <- NULL |
|
1138 | 1163 |
|
1139 | 1164 |
return(rval) |
1140 | 1165 |
}) |
1141 | 1166 |
|
1142 | 1167 |
setMethod("wrapData", signature(container="SpatialExperiment"), |
1143 | 1168 |
function(container, dataMatrix, geneSets) { |
1169 |
+ rdata <- adata <- NULL |
|
1170 |
+ if (!missing(geneSets)) { |
|
1171 |
+ adata <- SimpleList(es=dataMatrix) |
|
1172 |
+ rdata <- DataFrame(gs=CharacterList(geneSets)) |
|
1173 |
+ } else { ## assume missing geneSets imples dataMatrix are ranks |
|
1174 |
+ mask <- rownames(container) %in% rownames(dataMatrix) |
|
1175 |
+ adata <- c(assays(container[mask, ]), |
|
1176 |
+ SimpleList(gsvaranks=dataMatrix)) |
|
1177 |
+ rdata <- rowData(container)[mask, ] |
|
1178 |
+ } |
|
1144 | 1179 |
rval <- SpatialExperiment( |
1145 |
- assays=SimpleList(es=dataMatrix), |
|
1180 |
+ assays=adata, |
|
1146 | 1181 |
colData=colData(container), |
1147 |
- rowData=DataFrame(gs=CharacterList(geneSets)), |
|
1182 |
+ rowData=rdata, |
|
1148 | 1183 |
metadata=metadata(container), |
1149 |
- imgData = imgData(container), |
|
1150 |
- spatialCoords = spatialCoords(container)) |
|
1151 |
- metadata(rval)$annotation <- NULL |
|
1184 |
+ imgData=imgData(container), |
|
1185 |
+ spatialCoords=spatialCoords(container)) |
|
1186 |
+ if (!missing(geneSets)) |
|
1187 |
+ metadata(rval)$annotation <- NULL |
|
1152 | 1188 |
|
1153 | 1189 |
return(rval) |
1154 | 1190 |
}) |
... | ... |
@@ -287,3 +287,22 @@ setMethod("show", |
287 | 287 |
if ("dgCMatrix" %in% class(unwrapData(get_exprData(object), get_assay(object)))) |
288 | 288 |
cat("sparse: ", get_sparse(object), "\n") |
289 | 289 |
}) |
290 |
+ |
|
291 |
+## ----- setters for gsvaRanksParam ----- |
|
292 |
+ |
|
293 |
+#' @param object For the replacement method, an object of class |
|
294 |
+#' [`gsvaRanksParam-class`]. |
|
295 |
+#' |
|
296 |
+#' @param value For the replacement method, an object of the classes supported by |
|
297 |
+#' [`GsvaGeneSets-class`]. |
|
298 |
+#' |
|
299 |
+#' @aliases geneSets<- |
|
300 |
+#' @aliases geneSets<-,gsvaRanksParam,GsvaGeneSets-method |
|
301 |
+#' @rdname gsvaParam-class |
|
302 |
+#' @exportMethod geneSets |
|
303 |
+setReplaceMethod("geneSets", signature=signature(object="gsvaRanksParam", |
|
304 |
+ value="GsvaGeneSets"), |
|
305 |
+ function(object, value) { |
|
306 |
+ object@geneSets <- value |
|
307 |
+ object |
|
308 |
+ }) |
... | ... |
@@ -68,32 +68,35 @@ |
68 | 68 |
## is a 'list' object with character string vectors as elements, |
69 | 69 |
## and 'features' is a character string vector object. it assumes |
70 | 70 |
## features in both input objects follow the same nomenclature, |
71 |
+ |
|
72 |
+#' @importFrom cli cli_abort |
|
71 | 73 |
.mapGeneSetsToFeatures <- function(gsets, features) { |
72 | 74 |
|
73 |
- ## Aaron Lun's suggestion at |
|
74 |
- ## https://github.com/rcastelo/GSVA/issues/39#issuecomment-765549620 |
|
75 |
- gsets2 <- CharacterList(gsets) |
|
76 |
- mt <- match(gsets2, features) |
|
77 |
- mapdgenesets <- as.list(mt[!is.na(mt)]) |
|
75 |
+ ## Aaron Lun's suggestion at |
|
76 |
+ ## https://github.com/rcastelo/GSVA/issues/39#issuecomment-765549620 |
|
77 |
+ gsets2 <- CharacterList(gsets) |
|
78 |
+ mt <- match(gsets2, features) |
|
79 |
+ mapdgenesets <- as.list(mt[!is.na(mt)]) |
|
78 | 80 |
|
79 |
- if (length(unlist(mapdgenesets, use.names=FALSE)) == 0) |
|
80 |
- stop("No identifiers in the gene sets could be matched to the identifiers in the expression data.") |
|
81 |
+ if (length(unlist(mapdgenesets, use.names=FALSE)) == 0) { |
|
82 |
+ msg <- paste("No identifiers in the gene sets could be matched to the", |
|
83 |
+ "identifiers in the expression data.") |
|
84 |
+ cli_abort("x"=msg) |
|
85 |
+ } |
|
81 | 86 |
|
82 |
- mapdgenesets |
|
87 |
+ mapdgenesets |
|
83 | 88 |
} |
84 | 89 |
|
85 | 90 |
## it assumes that all arguments have been already checked for correctness |
86 |
-.filterAndMapGeneSets <- function(param, geneSets, minSize, maxSize, |
|
87 |
- filteredDataMatrix, verbose) { |
|
88 |
- |
|
89 |
- ## if geneSets, minSize and maxSize are non-NA values, then they |
|
90 |
- ## override those coming from 'param' (b/c are provided by gsvaScores()) |
|
91 |
- if (any(is.na(geneSets))) |
|
92 |
- geneSets <- get_geneSets(param) |
|
93 |
- if (is.na(minSize)) |
|
94 |
- minSize <- get_minSize(param) |
|
95 |
- if (is.na(maxSize)) |
|
96 |
- maxSize <- get_maxSize(param) |
|
91 |
+#' @importFrom cli cli_abort |
|
92 |
+.filterAndMapGeneSets <- function(param, wgset=NA, filteredDataMatrix, verbose) { |
|
93 |
+ |
|
94 |
+ geneSets <- get_geneSets(param) |
|
95 |
+ if (!is.na(wgset)) |
|
96 |
+ geneSets <- geneSets[wgset] |
|
97 |
+ |
|
98 |
+ minSize <- get_minSize(param) |
|
99 |
+ maxSize <- get_maxSize(param) |
|
97 | 100 |
|
98 | 101 |
## note that the method for 'GeneSetCollection' calls geneIds(), i.e., |
99 | 102 |
## whatever the input, from here on we have a list of character vectors |
... | ... |
@@ -113,11 +116,11 @@ |
113 | 116 |
maxSize=maxSize) |
114 | 117 |
|
115 | 118 |
if (length(filteredMappedGeneSets) == 0) |
116 |
- stop("The gene set list is empty! Filter may be too stringent.") |
|
119 |
+ cli_abort("x"="The gene set list is empty! Filter may be too stringent.") |
|
117 | 120 |
|
118 | 121 |
## this should NEVER happen -- just to make sure it doesn't... |
119 | 122 |
if (anyDuplicated(names(filteredMappedGeneSets)) > 0) |
120 |
- stop("The gene set list contains duplicated gene set names.") |
|
123 |
+ cli_abort("x"="The gene set list contains duplicated gene set names.") |
|
121 | 124 |
|
122 | 125 |
if (any(lengths(filteredMappedGeneSets) == 1)) { |
123 | 126 |
msg <- "Some gene sets have size one. Consider setting minSize > 1" |
... | ... |
@@ -141,9 +144,9 @@ |
141 | 144 |
removeConstant=removeConstant, |
142 | 145 |
removeNzConstant=removeNzConstant) |
143 | 146 |
|
144 |
- filteredMappedGeneSets <- .filterAndMapGeneSets(param, NA, NA, NA, |
|
145 |
- filteredDataMatrix, |
|
146 |
- verbose) |
|
147 |
+ filteredMappedGeneSets <- .filterAndMapGeneSets(param=param, |
|
148 |
+ filteredDataMatrix=filteredDataMatrix, |
|
149 |
+ verbose=verbose) |
|
147 | 150 |
|
148 | 151 |
return(list(filteredDataMatrix=filteredDataMatrix, |
149 | 152 |
filteredMappedGeneSets=filteredMappedGeneSets)) |
... | ... |
@@ -25,10 +25,10 @@ test_gsvaRanks <- function() { |
25 | 25 |
|
26 | 26 |
## calculate GSVA scores in two steps |
27 | 27 |
## first calculate GSVA ranks |
28 |
- gsva_ranks <- gsvaRanks(gsvapar, verbose=FALSE) |
|
28 |
+ gsvarankspar <- gsvaRanks(gsvapar, verbose=FALSE) |
|
29 | 29 |
|
30 | 30 |
## second calculate GSVA scores using GSVA ranks |
31 |
- gsva_es2 <- gsvaScores(gsvapar, gsva_ranks, verbose=FALSE) |
|
31 |
+ gsva_es2 <- gsvaScores(gsvarankspar, verbose=FALSE) |
|
32 | 32 |
|
33 | 33 |
## both approaches to calculate GSVA scores must give |
34 | 34 |
## the same result with the same input gene sets |
35 | 35 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,82 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/gsva.R |
|
3 |
+\name{gsvaEnrichment} |
|
4 |
+\alias{gsvaEnrichment} |
|
5 |
+\alias{gsvaEnrichment,gsvaRanksParam-method} |
|
6 |
+\title{GSVA enrichment data and visualization} |
|
7 |
+\usage{ |
|
8 |
+\S4method{gsvaEnrichment}{gsvaRanksParam}( |
|
9 |
+ param, |
|
10 |
+ column = 1, |
|
11 |
+ geneSet = 1, |
|
12 |
+ plot = c("auto", "base", "ggplot", "no"), |
|
13 |
+ ... |
|
14 |
+) |
|
15 |
+} |
|
16 |
+\arguments{ |
|
17 |
+\item{param}{A \code{\linkS4class{gsvaRanksParam}} object obtained with the method |
|
18 |
+\code{\link{gsvaRanks}}.} |
|
19 |
+ |
|
20 |
+\item{column}{The column for which we want to retrieve the enrichment data. |
|
21 |
+This parameter is only available in the \code{gsvaEnrichment()} method.} |
|
22 |
+ |
|
23 |
+\item{geneSet}{Either a positive integer number between 1 and the number of |
|
24 |
+available gene sets in \code{param}, or a character string with the name of |
|
25 |
+one of the gene sets available in \code{param}.} |
|
26 |
+ |
|
27 |
+\item{plot}{A character string indicating whether an enrichment plot should |
|
28 |
+be produced using either base R graphics (\code{plot="base"}) or the ggplot2 package |
|
29 |
+(\code{plot="ggplot"}), or not (\code{plot="no"}). In the latter case, the enrichment |
|
30 |
+data will be returned. By default \code{plot="auto"}, which implies that if this |
|
31 |
+method is called from an interactive session, a plot using base R graphics |
|
32 |
+will be produced and, otherwise, the enrichment data is returned.} |
|
33 |
+ |
|
34 |
+\item{...}{Further arguments passed to the \code{plot()} function when the |
|
35 |
+previous parameter \code{plot="base"}.} |
|
36 |
+} |
|
37 |
+\value{ |
|
38 |
+When \code{plot="no"}, this method returns the enrichment data. When |
|
39 |
+\code{plot="ggplot"}, this method returns a \code{ggplot} object. When \code{plot="base"} |
|
40 |
+no value is returned. |
|
41 |
+} |
|
42 |
+\description{ |
|
43 |
+Extract and plot enrichment data from GSVA scores. |
|
44 |
+} |
|
45 |
+\examples{ |
|
46 |
+library(GSVA) |
|
47 |
+ |
|
48 |
+p <- 10 ## number of genes |
|
49 |
+n <- 30 ## number of samples |
|
50 |
+nGrp1 <- 15 ## number of samples in group 1 |
|
51 |
+nGrp2 <- n - nGrp1 ## number of samples in group 2 |
|
52 |
+ |
|
53 |
+## consider three disjoint gene sets |
|
54 |
+geneSets <- list(gset1=paste0("g", 1:3), |
|
55 |
+ gset2=paste0("g", 4:6), |
|
56 |
+ gset3=paste0("g", 7:10)) |
|
57 |
+ |
|
58 |
+## sample data from a normal distribution with mean 0 and st.dev. 1 |
|
59 |
+y <- matrix(rnorm(n*p), nrow=p, ncol=n, |
|
60 |
+ dimnames=list(paste("g", 1:p, sep="") , paste("s", 1:n, sep=""))) |
|
61 |
+ |
|
62 |
+## genes in set1 are expressed at higher levels in the last 'nGrp1+1' to 'n' samples |
|
63 |
+y[geneSets$set1, (nGrp1+1):n] <- y[geneSets$set1, (nGrp1+1):n] + 2 |
|
64 |
+ |
|
65 |
+## build GSVA parameter object |
|
66 |
+gsvapar <- gsvaParam(y, geneSets) |
|
67 |
+ |
|
68 |
+## calculate GSVA ranks |
|
69 |
+gsvarankspar <- gsvaRanks(gsvapar) |
|
70 |
+gsvarankspar |
|
71 |
+ |
|
72 |
+## by default the enrichment data for the first column and the first |
|
73 |
+## gene set are retrieved |
|
74 |
+gsvaEnrichment(gsvarankspar) |
|
75 |
+ |
|
76 |
+} |
|
77 |
+\references{ |
|
78 |
+Hänzelmann, S., Castelo, R. and Guinney, J. GSVA: Gene set |
|
79 |
+variation analysis for microarray and RNA-Seq data. |
|
80 |
+\emph{BMC Bioinformatics}, 14:7, 2013. |
|
81 |
+\href{https://doi.org/10.1186/1471-2105-14-7}{DOI} |
|
82 |
+} |
... | ... |
@@ -3,7 +3,10 @@ |
3 | 3 |
\docType{class} |
4 | 4 |
\name{gsvaParam-class} |
5 | 5 |
\alias{gsvaParam-class} |
6 |
+\alias{gsvaRanksParam-class} |
|
6 | 7 |
\alias{gsvaParam} |
8 |
+\alias{geneSets<-,gsvaRanksParam,GsvaGeneSets-method} |
|
9 |
+\alias{geneSets<-} |
|
7 | 10 |
\title{\code{gsvaParam} class} |
8 | 11 |
\usage{ |
9 | 12 |
gsvaParam( |
... | ... |
@@ -20,6 +23,8 @@ gsvaParam( |
20 | 23 |
absRanking = FALSE, |
21 | 24 |
sparse = TRUE |
22 | 25 |
) |
26 |
+ |
|
27 |
+\S4method{geneSets}{gsvaRanksParam,GsvaGeneSets}(object) <- value |
|
23 | 28 |
} |
24 | 29 |
\arguments{ |
25 | 30 |
\item{exprData}{The expression data set. Must be one of the classes |
... | ... |
@@ -101,6 +106,12 @@ data in \code{exprData} is stored in a sparse matrix (e.g., a \code{dgCMatrix} o |
101 | 106 |
In such a case, when \code{sparse=TRUE} (default), a sparse version of the GSVA |
102 | 107 |
algorithm will be applied. Otherwise, when \code{sparse=FALSE}, the classical |
103 | 108 |
version of the GSVA algorithm will be used.} |
109 |
+ |
|
110 |
+\item{object}{For the replacement method, an object of class |
|
111 |
+\code{\linkS4class{gsvaRanksParam}}.} |
|
112 |
+ |
|
113 |
+\item{value}{For the replacement method, an object of the classes supported by |
|
114 |
+\code{\linkS4class{GsvaGeneSets}}.} |
|
104 | 115 |
} |
105 | 116 |
\value{ |
106 | 117 |
A new \code{\linkS4class{gsvaParam}} object. |
... | ... |
@@ -4,88 +4,40 @@ |
4 | 4 |
\alias{gsvaRanks} |
5 | 5 |
\alias{gsvaRanks,gsvaParam-method} |
6 | 6 |
\alias{gsvaScores} |
7 |
-\alias{gsvaScores,gsvaParam,GsvaExprData-method} |
|
7 |
+\alias{gsvaScores,gsvaRanksParam-method} |
|
8 | 8 |
\title{GSVA ranks and scores} |
9 | 9 |
\usage{ |
10 | 10 |
\S4method{gsvaRanks}{gsvaParam}(param, verbose = TRUE, BPPARAM = SerialParam(progressbar = verbose)) |
11 | 11 |
|
12 |
-\S4method{gsvaScores}{gsvaParam,GsvaExprData}( |
|
13 |
- param, |
|
14 |
- ranks, |
|
15 |
- geneSets = NA, |
|
16 |
- minSize = NA, |
|
17 |
- maxSize = NA, |
|
18 |
- tau = NA, |
|
19 |
- maxDiff = NA, |
|
20 |
- absRanking = NA, |
|
21 |
- verbose = TRUE, |
|
22 |
- BPPARAM = SerialParam(progressbar = verbose) |
|
23 |
-) |
|
12 |
+\S4method{gsvaScores}{gsvaRanksParam}(param, verbose = TRUE, BPPARAM = SerialParam(progressbar = verbose)) |
|
24 | 13 |
} |
25 | 14 |
\arguments{ |
26 |
-\item{param}{A \code{\link{gsvaParam}} object built using the constructor function |
|
27 |
-\code{\link{gsvaParam}}.} |
|
15 |
+\item{param}{A parameter object of the \code{\linkS4class{gsvaRanksParam}} class.} |
|
28 | 16 |
|
29 | 17 |
\item{verbose}{Gives information about each calculation step. Default: \code{TRUE}.} |
30 | 18 |
|
31 | 19 |
\item{BPPARAM}{An object of class \code{\link{BiocParallelParam}} specifying parameters |
32 | 20 |
related to the parallel execution of some of the tasks and calculations |
33 | 21 |
within this function.} |
34 |
- |
|
35 |
-\item{ranks}{A matrix-like object storing GSVA ranks calculated with the |
|
36 |
-method \code{\link{gsvaRanks}}.} |
|
37 |
- |
|
38 |
-\item{geneSets}{A collection of gene sets. Must be one of the classes |
|
39 |
-supported by \code{\linkS4class{GsvaGeneSets}}. For a list of these classes, see its |
|
40 |
-help page using \code{help(GsvaGeneSets)}. By default, this parameter is set to |
|
41 |
-the \code{NA} missing value, which means that GSVA scores will be calculated |
|
42 |
-using the gene sets specified in the \code{param} argument. If this parameter is |
|
43 |
-set to a non-missing value corresponding to an object of the classes |
|
44 |
-supported by \code{\linkS4class{GsvaGeneSets}}, then GSVA scores will be calculated |
|
45 |
-using the gene sets in this argument, instead of the ones specified in the |
|
46 |
-\code{param} argument.} |
|
47 |
- |
|
48 |
-\item{minSize}{Numeric vector of length 1. Minimum size of the resulting gene |
|
49 |
-sets after gene identifier mapping. Its default value is \code{NA}, indicating that |
|
50 |
-this minimum value will be taken from the input \code{param} argument, otherwise, |
|
51 |
-non-\code{NA} values override those from the input \code{param} argument.} |
|
52 |
- |
|
53 |
-\item{maxSize}{Numeric vector of length 1. Minimum size of the resulting gene |
|
54 |
-sets after gene identifier mapping. Its default value is \code{NA}, indicating that |
|
55 |
-this minimum value will be taken from the input \code{param} argument, otherwise, |
|
56 |
-non-\code{NA} values override those from the input \code{param} argument.} |
|
57 |
- |
|
58 |
-\item{tau}{Numeric vector of length 1. The exponent defining the weight of |
|
59 |
-the tail in the random walk performed by the \code{GSVA} (Hänzelmann et al., |
|
60 |
-2013) method. The default value is 1 as described in the paper.} |
|
61 |
- |
|
62 |
-\item{maxDiff}{Logical vector of length 1 which offers two approaches to |
|
63 |
-calculate the enrichment statistic (ES) from the KS random walk statistic. |
|
64 |
-\itemize{ |
|
65 |
-\item \code{FALSE}: ES is calculated as the maximum distance of the random walk |
|
66 |
-from 0. This approach produces a distribution of enrichment scores that is |
|
67 |
-bimodal, but it can give large enrichment scores to gene sets whose genes |
|
68 |
-are not concordantly activated in one direction only. |
|
69 |
-\item \code{TRUE} (the default): ES is calculated as the magnitude difference between |
|
70 |
-the largest positive and negative random walk deviations. This default value |
|
71 |
-gives larger enrichment scores to gene sets whose genes are concordantly |
|
72 |
-activated in one direction only. |
|
73 |
-}} |
|
74 |
- |
|
75 |
-\item{absRanking}{Logical vector of length 1 used only when \code{maxDiff=TRUE}. |
|
76 |
-When \code{absRanking=FALSE} (default) a modified Kuiper statistic is used to |
|
77 |
-calculate enrichment scores, taking the magnitude difference between the |
|
78 |
-largest positive and negative random walk deviations. When |
|
79 |
-\code{absRanking=TRUE} the original Kuiper statistic that sums the largest |
|
80 |
-positive and negative random walk deviations is used.} |
|
81 | 22 |
} |
82 | 23 |
\value{ |
83 |
-In the case of the \code{gsvaRanks()} method, a matrix of GSVA rank |
|
84 |
-values per column. |
|
24 |
+In the case of the \code{gsvaRanks()} method, an object of class |
|
25 |
+\code{\linkS4class{gsvaRanksParam}}. |
|
85 | 26 |
|
86 | 27 |
In the case of the \code{gsvaScores()} method, a gene-set by sample matrix |
87 |
-of GSVA enrichment scores stored in a ocntainer object of the same type as |
|
88 |
-the input expression data container in the \code{param} argument. |
|
28 |
+of GSVA enrichment scores stored in a container object of the same type as |
|
29 |
+the input ranks data container. If |
|
30 |
+the input was a base matrix or a \code{\linkS4class{dgCMatrix}} object, then the output will |
|
31 |
+be a base matrix object with the gene sets employed in the calculations |
|
32 |
+stored in an attribute called \code{geneSets}. If the input was an |
|
33 |
+\code{\link{ExpressionSet}} object, then the output will be also an \code{\link{ExpressionSet}} |
|
34 |
+object with the gene sets employed in the calculations stored in an |
|
35 |
+attributed called \code{geneSets}. If the input was an object of one of the |
|
36 |
+classes described in \code{\link{GsvaExprData}}, such as a \code{\link{SingleCellExperiment}}, |
|
37 |
+then the output will be of the same class, where enrichment scores will be |
|
38 |
+stored in an assay called \code{es} and the gene sets employed in the |
|
39 |
+calculations will be stored in the \code{rowData} slot of the object under the |
|
40 |
+column name \code{gs}. |
|
89 | 41 |
} |
90 | 42 |
\description{ |
91 | 43 |
Calculate GSVA scores in two steps: (1) calculate GSVA |
... | ... |
@@ -109,14 +61,17 @@ geneSets <- list(gset1=paste0("g", 1:3), |
109 | 61 |
y <- matrix(rnorm(n*p), nrow=p, ncol=n, |
110 | 62 |
dimnames=list(paste("g", 1:p, sep="") , paste("s", 1:n, sep=""))) |
111 | 63 |
|
64 |
+## genes in set1 are expressed at higher levels in the last 'nGrp1+1' to 'n' samples |
|
65 |
+y[geneSets$set1, (nGrp1+1):n] <- y[geneSets$set1, (nGrp1+1):n] + 2 |
|
66 |
+ |
|
112 | 67 |
## build GSVA parameter object |
113 | 68 |
gsvapar <- gsvaParam(y, geneSets) |
114 | 69 |
|
115 | 70 |
## calculate GSVA ranks |
116 |
-gsva_ranks <- gsvaRanks(gsvapar) |
|
117 |
-gsva_ranks |
|
71 |
+gsvarankspar <- gsvaRanks(gsvapar) |
|
72 |
+gsvarankspar |
|
118 | 73 |
## calculate GSVA scores |
119 |
-gsva_es <- gsvaScores(gsvapar, gsva_ranks) |
|
74 |
+gsva_es <- gsvaScores(gsvarankspar) |
|
120 | 75 |
gsva_es |
121 | 76 |
|
122 | 77 |
## calculate now GSVA scores in a single step |
... | ... |
@@ -130,7 +85,8 @@ geneSets2 <- list(gset1=paste0("g", 3:6), |
130 | 85 |
gset2=paste0("g", c(1, 2, 7, 8))) |
131 | 86 |
|
132 | 87 |
## note that there is no need to calculate the GSVA ranks again |
133 |
-gsvaScores(gsvapar, gsva_ranks, geneSets2) |
|
88 |
+geneSets(gsvarankspar) <- geneSets2 |
|
89 |
+gsvaScores(gsvarankspar) |
|
134 | 90 |
|
135 | 91 |
} |
136 | 92 |
\references{ |
... | ... |
@@ -140,5 +96,5 @@ variation analysis for microarray and RNA-Seq data. |
140 | 96 |
\href{https://doi.org/10.1186/1471-2105-14-7}{DOI} |
141 | 97 |
} |
142 | 98 |
\seealso{ |
143 |
-\code{\link{gsvaParam}}, \code{\link{gsva}} |
|
99 |
+\code{\linkS4class{gsvaParam}}, \code{\linkS4class{gsvaRanksParam}}, \code{\link{gsva}} |
|
144 | 100 |
} |