Browse code

Added sanity checks in prepare_for_enrichment; possible NULL all features; class name

Marek Gierlinski authored on 18/08/2022 08:59:30
Showing 3 changed files

... ...
@@ -5,27 +5,49 @@
5 5
 #'
6 6
 #' @details
7 7
 #'
8
-#' Takes two data frames with functional term information and gene mapping and
9
-#' converts them into an object required by `functional_enrichment` for fast
10
-#' analysis.
8
+#' Takes two data frames with functional term information (\code{terms}) and
9
+#' gene mapping (\code{mapping}) and converts them into an object required by
10
+#' \code{functional_enrichment} for fast analysis. Terms and mapping can be
11
+#' created with database access functions in this package, for example
12
+#' \code{fetch_reactome} or \code{fetch_go_from_go}.
11 13
 #'
12
-#' @param terms Information about term names/descriptions. A tibble with
13
-#'   columns \code{term_id} and \code{term_name}.
14
+#' @param terms Information about term names/descriptions. A tibble with columns
15
+#'   \code{term_id} and \code{term_name}.
14 16
 #' @param mapping Information about term-feature mapping. A tibble with
15
-#'   \code{term_id} and a feature id, as identified with \code{feature_name} argument. For
16
-#'   example, if this tibble contains \code{gene_symbol} and \code{term_id}, then
17
-#'   `feature_name = "gene_symbol"`.
18
-#' @param all_features A vector with all feature ids (background for
19
-#'   enrichment).
20
-#' @param feature_name Which column to use from mapping table, e.g. "gene_symbol"
21
-#'   or "ensembl_gene_id".
17
+#'   \code{term_id} and a feature id, as identified with \code{feature_name}
18
+#'   argument. For example, if this tibble contains \code{gene_symbol} and
19
+#'   \code{term_id}, then \code{feature_name = "gene_symbol"}.
20
+#' @param all_features A vector with all feature ids used as background for
21
+#'   enrichment. If not specified, all features from \code{mapping} will be
22
+#'   used, resulting in a larger objects size.
23
+#' @param feature_name Which column to use from mapping table, e.g.
24
+#'   "gene_symbol" or "ensembl_gene_id".
22 25
 #'
23
-#' @return An object required by `functional_enrichment`.
26
+#' @return An object class \code{fterms} required by
27
+#'   \code{functional_enrichment}.
24 28
 #' @export
25
-prepare_for_enrichment <- function(terms, mapping, all_features, feature_name = "gene_id") {
26
-  # Check for column name
27
-  if (!(feature_name %in% colnames(mapping))) {
29
+prepare_for_enrichment <- function(terms, mapping, all_features = NULL, feature_name = "gene_id") {
30
+  # Check terms
31
+  if (!all(c("term_id", "term_name") %in% colnames(terms)))
32
+    stop("Column names in 'terms' should be 'term_id' and 'term_name'.")
33
+
34
+  # Check mapping
35
+  if (!("term_id" %in% colnames(mapping)))
36
+    stop("'mapping' should contain a column named 'term_id'.")
37
+
38
+  # Check for feature name
39
+  if (!(feature_name %in% colnames(mapping)))
28 40
     stop(paste(feature_name, "column not found in mapping table. Check feature_name argument."))
41
+
42
+  # Replace empty all_features with everything from mapping
43
+  map_features <- mapping[[feature_name]] |>
44
+    unique()
45
+  if (is.null(all_features)) {
46
+    all_features <- map_features
47
+  } else {
48
+    # Check if mapping is contained in all features
49
+    if (length(intersect(all_features, map_features)) == 0)
50
+      stop("No overlap between 'all_features' and features found in 'mapping'. Did you provide correct 'all_features'?")
29 51
   }
30 52
 
31 53
   # Check for missing term descriptions
... ...
@@ -63,13 +85,15 @@ prepare_for_enrichment <- function(terms, mapping, all_features, feature_name =
63 85
     term2name = term2name,
64 86
     term2feature = term2feature,
65 87
     feature2term = feature2term
66
-  )
88
+  ) |>
89
+    structure(class = "fterms")
67 90
 }
68 91
 
69 92
 
70 93
 #' Fast functional enrichment
71 94
 #'
72
-#' Fast functional enrichment based on hypergeometric distribution. Can be used in interactive applications.
95
+#' Fast functional enrichment based on hypergeometric distribution. Can be used
96
+#' in interactive applications.
73 97
 #'
74 98
 #' @details
75 99
 #'
... ...
@@ -84,8 +108,8 @@ prepare_for_enrichment <- function(terms, mapping, all_features, feature_name =
84 108
 #' @param feat_all A character vector with all feature identifiers. This is the
85 109
 #'   background for enrichment.
86 110
 #' @param feat_sel A character vector with feature identifiers in the selection.
87
-#' @param term_data Functional term data, as explained in details. It can be
88
-#'   created using \code{prepare_for_enrichment}.
111
+#' @param term_data An object class \code{fterms}, as explained in details.
112
+#'   It can be created using \code{prepare_for_enrichment}.
89 113
 #' @param feat2name An optional named list to convert feature id into feature
90 114
 #'   name.
91 115
 #' @param min_count Minimal count of features with term in the selection to be
... ...
@@ -111,6 +135,9 @@ prepare_for_enrichment <- function(terms, mapping, all_features, feature_name =
111 135
 functional_enrichment <- function(feat_all, feat_sel, term_data, feat2name = NULL,
112 136
                                   min_count = 2, fdr_limit = 0.05) {
113 137
 
138
+  if (!(class(term_data) == "fterms"))
139
+    stop("'term_data' should be an object of class 'fterms'.")
140
+
114 141
   # all terms present in the selection
115 142
   our_terms <- feat_sel |>
116 143
     purrr::map(\(x) term_data$feature2term[[x]]) |>
... ...
@@ -19,8 +19,8 @@ background for enrichment.}
19 19
 
20 20
 \item{feat_sel}{A character vector with feature identifiers in the selection.}
21 21
 
22
-\item{term_data}{Functional term data, as explained in details. It can be
23
-created using \code{prepare_for_enrichment}.}
22
+\item{term_data}{An object class \code{fterms}, as explained in details.
23
+It can be created using \code{prepare_for_enrichment}.}
24 24
 
25 25
 \item{feat2name}{An optional named list to convert feature id into feature
26 26
 name.}
... ...
@@ -42,7 +42,8 @@ A tibble with enrichment results. For each term the following
42 42
   approach.
43 43
 }
44 44
 \description{
45
-Fast functional enrichment based on hypergeometric distribution. Can be used in interactive applications.
45
+Fast functional enrichment based on hypergeometric distribution. Can be used
46
+in interactive applications.
46 47
 }
47 48
 \details{
48 49
 Functional enrichment in a selection (e.g. significantly DE features) of
... ...
@@ -4,32 +4,41 @@
4 4
 \alias{prepare_for_enrichment}
5 5
 \title{Prepare term data for enrichment analysis}
6 6
 \usage{
7
-prepare_for_enrichment(terms, mapping, all_features, feature_name = "gene_id")
7
+prepare_for_enrichment(
8
+  terms,
9
+  mapping,
10
+  all_features = NULL,
11
+  feature_name = "gene_id"
12
+)
8 13
 }
9 14
 \arguments{
10
-\item{terms}{Information about term names/descriptions. A tibble with
11
-columns \code{term_id} and \code{term_name}.}
15
+\item{terms}{Information about term names/descriptions. A tibble with columns
16
+\code{term_id} and \code{term_name}.}
12 17
 
13 18
 \item{mapping}{Information about term-feature mapping. A tibble with
14
-\code{term_id} and a feature id, as identified with \code{feature_name} argument. For
15
-example, if this tibble contains \code{gene_symbol} and \code{term_id}, then
16
-`feature_name = "gene_symbol"`.}
19
+\code{term_id} and a feature id, as identified with \code{feature_name}
20
+argument. For example, if this tibble contains \code{gene_symbol} and
21
+\code{term_id}, then \code{feature_name = "gene_symbol"}.}
17 22
 
18
-\item{all_features}{A vector with all feature ids (background for
19
-enrichment).}
23
+\item{all_features}{A vector with all feature ids used as background for
24
+enrichment. If not specified, all features from \code{mapping} will be
25
+used, resulting in a larger objects size.}
20 26
 
21
-\item{feature_name}{Which column to use from mapping table, e.g. "gene_symbol"
22
-or "ensembl_gene_id".}
27
+\item{feature_name}{Which column to use from mapping table, e.g.
28
+"gene_symbol" or "ensembl_gene_id".}
23 29
 }
24 30
 \value{
25
-An object required by `functional_enrichment`.
31
+An object class \code{fterms} required by
32
+  \code{functional_enrichment}.
26 33
 }
27 34
 \description{
28 35
 Prepare term data downloaded with \code{fetch_*} functions for fast
29 36
 enrichment analysis.
30 37
 }
31 38
 \details{
32
-Takes two data frames with functional term information and gene mapping and
33
-converts them into an object required by `functional_enrichment` for fast
34
-analysis.
39
+Takes two data frames with functional term information (\code{terms}) and
40
+gene mapping (\code{mapping}) and converts them into an object required by
41
+\code{functional_enrichment} for fast analysis. Terms and mapping can be
42
+created with database access functions in this package, for example
43
+\code{fetch_reactome} or \code{fetch_go_from_go}.
35 44
 }