... | ... |
@@ -5,27 +5,49 @@ |
5 | 5 |
#' |
6 | 6 |
#' @details |
7 | 7 |
#' |
8 |
-#' Takes two data frames with functional term information and gene mapping and |
|
9 |
-#' converts them into an object required by `functional_enrichment` for fast |
|
10 |
-#' analysis. |
|
8 |
+#' Takes two data frames with functional term information (\code{terms}) and |
|
9 |
+#' gene mapping (\code{mapping}) and converts them into an object required by |
|
10 |
+#' \code{functional_enrichment} for fast analysis. Terms and mapping can be |
|
11 |
+#' created with database access functions in this package, for example |
|
12 |
+#' \code{fetch_reactome} or \code{fetch_go_from_go}. |
|
11 | 13 |
#' |
12 |
-#' @param terms Information about term names/descriptions. A tibble with |
|
13 |
-#' columns \code{term_id} and \code{term_name}. |
|
14 |
+#' @param terms Information about term names/descriptions. A tibble with columns |
|
15 |
+#' \code{term_id} and \code{term_name}. |
|
14 | 16 |
#' @param mapping Information about term-feature mapping. A tibble with |
15 |
-#' \code{term_id} and a feature id, as identified with \code{feature_name} argument. For |
|
16 |
-#' example, if this tibble contains \code{gene_symbol} and \code{term_id}, then |
|
17 |
-#' `feature_name = "gene_symbol"`. |
|
18 |
-#' @param all_features A vector with all feature ids (background for |
|
19 |
-#' enrichment). |
|
20 |
-#' @param feature_name Which column to use from mapping table, e.g. "gene_symbol" |
|
21 |
-#' or "ensembl_gene_id". |
|
17 |
+#' \code{term_id} and a feature id, as identified with \code{feature_name} |
|
18 |
+#' argument. For example, if this tibble contains \code{gene_symbol} and |
|
19 |
+#' \code{term_id}, then \code{feature_name = "gene_symbol"}. |
|
20 |
+#' @param all_features A vector with all feature ids used as background for |
|
21 |
+#' enrichment. If not specified, all features from \code{mapping} will be |
|
22 |
+#' used, resulting in a larger objects size. |
|
23 |
+#' @param feature_name Which column to use from mapping table, e.g. |
|
24 |
+#' "gene_symbol" or "ensembl_gene_id". |
|
22 | 25 |
#' |
23 |
-#' @return An object required by `functional_enrichment`. |
|
26 |
+#' @return An object class \code{fterms} required by |
|
27 |
+#' \code{functional_enrichment}. |
|
24 | 28 |
#' @export |
25 |
-prepare_for_enrichment <- function(terms, mapping, all_features, feature_name = "gene_id") { |
|
26 |
- # Check for column name |
|
27 |
- if (!(feature_name %in% colnames(mapping))) { |
|
29 |
+prepare_for_enrichment <- function(terms, mapping, all_features = NULL, feature_name = "gene_id") { |
|
30 |
+ # Check terms |
|
31 |
+ if (!all(c("term_id", "term_name") %in% colnames(terms))) |
|
32 |
+ stop("Column names in 'terms' should be 'term_id' and 'term_name'.") |
|
33 |
+ |
|
34 |
+ # Check mapping |
|
35 |
+ if (!("term_id" %in% colnames(mapping))) |
|
36 |
+ stop("'mapping' should contain a column named 'term_id'.") |
|
37 |
+ |
|
38 |
+ # Check for feature name |
|
39 |
+ if (!(feature_name %in% colnames(mapping))) |
|
28 | 40 |
stop(paste(feature_name, "column not found in mapping table. Check feature_name argument.")) |
41 |
+ |
|
42 |
+ # Replace empty all_features with everything from mapping |
|
43 |
+ map_features <- mapping[[feature_name]] |> |
|
44 |
+ unique() |
|
45 |
+ if (is.null(all_features)) { |
|
46 |
+ all_features <- map_features |
|
47 |
+ } else { |
|
48 |
+ # Check if mapping is contained in all features |
|
49 |
+ if (length(intersect(all_features, map_features)) == 0) |
|
50 |
+ stop("No overlap between 'all_features' and features found in 'mapping'. Did you provide correct 'all_features'?") |
|
29 | 51 |
} |
30 | 52 |
|
31 | 53 |
# Check for missing term descriptions |
... | ... |
@@ -63,13 +85,15 @@ prepare_for_enrichment <- function(terms, mapping, all_features, feature_name = |
63 | 85 |
term2name = term2name, |
64 | 86 |
term2feature = term2feature, |
65 | 87 |
feature2term = feature2term |
66 |
- ) |
|
88 |
+ ) |> |
|
89 |
+ structure(class = "fterms") |
|
67 | 90 |
} |
68 | 91 |
|
69 | 92 |
|
70 | 93 |
#' Fast functional enrichment |
71 | 94 |
#' |
72 |
-#' Fast functional enrichment based on hypergeometric distribution. Can be used in interactive applications. |
|
95 |
+#' Fast functional enrichment based on hypergeometric distribution. Can be used |
|
96 |
+#' in interactive applications. |
|
73 | 97 |
#' |
74 | 98 |
#' @details |
75 | 99 |
#' |
... | ... |
@@ -84,8 +108,8 @@ prepare_for_enrichment <- function(terms, mapping, all_features, feature_name = |
84 | 108 |
#' @param feat_all A character vector with all feature identifiers. This is the |
85 | 109 |
#' background for enrichment. |
86 | 110 |
#' @param feat_sel A character vector with feature identifiers in the selection. |
87 |
-#' @param term_data Functional term data, as explained in details. It can be |
|
88 |
-#' created using \code{prepare_for_enrichment}. |
|
111 |
+#' @param term_data An object class \code{fterms}, as explained in details. |
|
112 |
+#' It can be created using \code{prepare_for_enrichment}. |
|
89 | 113 |
#' @param feat2name An optional named list to convert feature id into feature |
90 | 114 |
#' name. |
91 | 115 |
#' @param min_count Minimal count of features with term in the selection to be |
... | ... |
@@ -111,6 +135,9 @@ prepare_for_enrichment <- function(terms, mapping, all_features, feature_name = |
111 | 135 |
functional_enrichment <- function(feat_all, feat_sel, term_data, feat2name = NULL, |
112 | 136 |
min_count = 2, fdr_limit = 0.05) { |
113 | 137 |
|
138 |
+ if (!(class(term_data) == "fterms")) |
|
139 |
+ stop("'term_data' should be an object of class 'fterms'.") |
|
140 |
+ |
|
114 | 141 |
# all terms present in the selection |
115 | 142 |
our_terms <- feat_sel |> |
116 | 143 |
purrr::map(\(x) term_data$feature2term[[x]]) |> |
... | ... |
@@ -19,8 +19,8 @@ background for enrichment.} |
19 | 19 |
|
20 | 20 |
\item{feat_sel}{A character vector with feature identifiers in the selection.} |
21 | 21 |
|
22 |
-\item{term_data}{Functional term data, as explained in details. It can be |
|
23 |
-created using \code{prepare_for_enrichment}.} |
|
22 |
+\item{term_data}{An object class \code{fterms}, as explained in details. |
|
23 |
+It can be created using \code{prepare_for_enrichment}.} |
|
24 | 24 |
|
25 | 25 |
\item{feat2name}{An optional named list to convert feature id into feature |
26 | 26 |
name.} |
... | ... |
@@ -42,7 +42,8 @@ A tibble with enrichment results. For each term the following |
42 | 42 |
approach. |
43 | 43 |
} |
44 | 44 |
\description{ |
45 |
-Fast functional enrichment based on hypergeometric distribution. Can be used in interactive applications. |
|
45 |
+Fast functional enrichment based on hypergeometric distribution. Can be used |
|
46 |
+in interactive applications. |
|
46 | 47 |
} |
47 | 48 |
\details{ |
48 | 49 |
Functional enrichment in a selection (e.g. significantly DE features) of |
... | ... |
@@ -4,32 +4,41 @@ |
4 | 4 |
\alias{prepare_for_enrichment} |
5 | 5 |
\title{Prepare term data for enrichment analysis} |
6 | 6 |
\usage{ |
7 |
-prepare_for_enrichment(terms, mapping, all_features, feature_name = "gene_id") |
|
7 |
+prepare_for_enrichment( |
|
8 |
+ terms, |
|
9 |
+ mapping, |
|
10 |
+ all_features = NULL, |
|
11 |
+ feature_name = "gene_id" |
|
12 |
+) |
|
8 | 13 |
} |
9 | 14 |
\arguments{ |
10 |
-\item{terms}{Information about term names/descriptions. A tibble with |
|
11 |
-columns \code{term_id} and \code{term_name}.} |
|
15 |
+\item{terms}{Information about term names/descriptions. A tibble with columns |
|
16 |
+\code{term_id} and \code{term_name}.} |
|
12 | 17 |
|
13 | 18 |
\item{mapping}{Information about term-feature mapping. A tibble with |
14 |
-\code{term_id} and a feature id, as identified with \code{feature_name} argument. For |
|
15 |
-example, if this tibble contains \code{gene_symbol} and \code{term_id}, then |
|
16 |
-`feature_name = "gene_symbol"`.} |
|
19 |
+\code{term_id} and a feature id, as identified with \code{feature_name} |
|
20 |
+argument. For example, if this tibble contains \code{gene_symbol} and |
|
21 |
+\code{term_id}, then \code{feature_name = "gene_symbol"}.} |
|
17 | 22 |
|
18 |
-\item{all_features}{A vector with all feature ids (background for |
|
19 |
-enrichment).} |
|
23 |
+\item{all_features}{A vector with all feature ids used as background for |
|
24 |
+enrichment. If not specified, all features from \code{mapping} will be |
|
25 |
+used, resulting in a larger objects size.} |
|
20 | 26 |
|
21 |
-\item{feature_name}{Which column to use from mapping table, e.g. "gene_symbol" |
|
22 |
-or "ensembl_gene_id".} |
|
27 |
+\item{feature_name}{Which column to use from mapping table, e.g. |
|
28 |
+"gene_symbol" or "ensembl_gene_id".} |
|
23 | 29 |
} |
24 | 30 |
\value{ |
25 |
-An object required by `functional_enrichment`. |
|
31 |
+An object class \code{fterms} required by |
|
32 |
+ \code{functional_enrichment}. |
|
26 | 33 |
} |
27 | 34 |
\description{ |
28 | 35 |
Prepare term data downloaded with \code{fetch_*} functions for fast |
29 | 36 |
enrichment analysis. |
30 | 37 |
} |
31 | 38 |
\details{ |
32 |
-Takes two data frames with functional term information and gene mapping and |
|
33 |
-converts them into an object required by `functional_enrichment` for fast |
|
34 |
-analysis. |
|
39 |
+Takes two data frames with functional term information (\code{terms}) and |
|
40 |
+gene mapping (\code{mapping}) and converts them into an object required by |
|
41 |
+\code{functional_enrichment} for fast analysis. Terms and mapping can be |
|
42 |
+created with database access functions in this package, for example |
|
43 |
+\code{fetch_reactome} or \code{fetch_go_from_go}. |
|
35 | 44 |
} |