... | ... |
@@ -68,26 +68,35 @@ prepare_for_enrichment <- function(terms, mapping, all_features = NULL, feature_ |
68 | 68 |
terms <- dplyr::bind_rows(terms, dummy) |
69 | 69 |
} |
70 | 70 |
|
71 |
- # List to select term name |
|
72 |
- term2name <- terms$term_name |> |
|
73 |
- purrr::set_names(terms$term_id) |
|
71 |
+ # Hash to select term name |
|
72 |
+ term2name <- Rfast::Hash( |
|
73 |
+ keys = terms$term_id, |
|
74 |
+ values = terms$term_name |
|
75 |
+ ) |
|
74 | 76 |
|
75 | 77 |
# feature-term tibble |
76 | 78 |
feature_term <- mapping |> |
77 | 79 |
dplyr::rename(feature_id = !!feature_name) |> |
78 |
- dplyr::filter(feature_id %in% all_features) |
|
80 |
+ dplyr::filter(feature_id %in% all_features) |> |
|
81 |
+ dplyr::select(feature_id, term_id) |
|
79 | 82 |
|
80 |
- # Feature to terms conversion list |
|
81 |
- feature2term <- feature_term |> |
|
83 |
+ # Feature to terms hash |
|
84 |
+ f2t <- feature_term |> |
|
82 | 85 |
dplyr::group_by(feature_id) |> |
83 |
- dplyr::summarise(terms = list(term_id)) |> |
|
84 |
- tibble::deframe() |
|
85 |
- |
|
86 |
- # Term to feature conversion list |
|
87 |
- term2feature <- feature_term |> |
|
86 |
+ dplyr::summarise(terms = list(term_id)) |
|
87 |
+ feature2term <- Rfast::Hash( |
|
88 |
+ keys = f2t$feature_id, |
|
89 |
+ values = f2t$terms |
|
90 |
+ ) |
|
91 |
+ |
|
92 |
+ # Term to feature hash |
|
93 |
+ t2f <- feature_term |> |
|
88 | 94 |
dplyr::group_by(term_id) |> |
89 |
- dplyr::summarise(features = list(feature_id)) |> |
|
90 |
- tibble::deframe() |
|
95 |
+ dplyr::summarise(features = list(feature_id)) |
|
96 |
+ term2feature <- Rfast::Hash( |
|
97 |
+ keys = t2f$term_id, |
|
98 |
+ values = t2f$features |
|
99 |
+ ) |
|
91 | 100 |
|
92 | 101 |
list( |
93 | 102 |
term2name = term2name, |
... | ... |
@@ -157,7 +166,7 @@ functional_enrichment <- function(feat_all, feat_sel, term_data, feat2name = NUL |
157 | 166 |
|
158 | 167 |
# all terms present in the selection |
159 | 168 |
our_terms <- feat_sel |> |
160 |
- purrr::map(\(x) term_data$feature2term[[x]]) |> |
|
169 |
+ purrr::map(\(x) term_data$feature2term[x]) |> |
|
161 | 170 |
unlist() |> |
162 | 171 |
unique() |
163 | 172 |
# number of features in selection |
... | ... |
@@ -167,7 +176,9 @@ functional_enrichment <- function(feat_all, feat_sel, term_data, feat2name = NUL |
167 | 176 |
|
168 | 177 |
res <- purrr::map_dfr(our_terms, function(term_id) { |
169 | 178 |
# all features with the term |
170 |
- tfeats <- term_data$term2feature[[term_id]] |
|
179 |
+ # [[1]] is needed because hash values are one-element lists |
|
180 |
+ # term_data$term2feature is a Hash object |
|
181 |
+ tfeats <- term_data$term2feature[term_id][[1]] |
|
171 | 182 |
|
172 | 183 |
# features from selection with the term |
173 | 184 |
# this is faster than intersect(tfeats, feat_sel) |
... | ... |
@@ -201,7 +212,7 @@ functional_enrichment <- function(feat_all, feat_sel, term_data, feat2name = NUL |
201 | 212 |
|
202 | 213 |
if (!is.null(feat2name)) tfeats_sel <- feat2name[tfeats_sel] |> unname() |
203 | 214 |
|
204 |
- term_name <- term_data$term2name[[term_id]] |
|
215 |
+ term_name <- term_data$term2name[term_id] |
|
205 | 216 |
# returns NAs if no term found |
206 | 217 |
if (is.null(term_name)) term_name <- NA_character_ |
207 | 218 |
|
... | ... |
@@ -15,10 +15,6 @@ terms <- tibble::tibble( |
15 | 15 |
term_name = term_names |
16 | 16 |
) |
17 | 17 |
|
18 |
-# Prepare data for enrichment |
|
19 |
-term2name <- term_names |> |
|
20 |
- purrr::set_names(term_ids) |
|
21 |
- |
|
22 | 18 |
# random selection of features for terms |
23 | 19 |
set.seed(666) |
24 | 20 |
mapping <- purrr::map2_dfr(term_ids, term_sizes, function(tid, n) { |
... | ... |
@@ -28,42 +24,44 @@ mapping <- purrr::map2_dfr(term_ids, term_sizes, function(tid, n) { |
28 | 24 |
) |
29 | 25 |
}) |
30 | 26 |
|
31 |
-# Feature to terms conversion list |
|
32 |
-feature2term <- mapping |> |
|
33 |
- dplyr::group_by(feature_id) |> |
|
34 |
- dplyr::summarise(terms = list(term_id)) |> |
|
35 |
- tibble::deframe() |
|
36 |
- |
|
37 |
-# Term to feature conversion list |
|
38 |
-term2feature <- mapping |> |
|
39 |
- dplyr::group_by(term_id) |> |
|
40 |
- dplyr::summarise(features = list(feature_id)) |> |
|
41 |
- tibble::deframe() |
|
42 |
- |
|
43 |
-# final structure required by functional_enrichment |
|
44 |
-term_data <- list( |
|
45 |
- term2name = term2name, |
|
46 |
- feature2term = feature2term, |
|
47 |
- term2feature = term2feature |
|
48 |
-) |> |
|
49 |
- structure(class = "fenr_terms") |
|
50 | 27 |
|
51 | 28 |
|
52 | 29 |
|
53 |
- |
|
54 |
-test_that("Expected normal output", { |
|
30 |
+test_that("Expected correct output", { |
|
55 | 31 |
td <- prepare_for_enrichment(terms, mapping, feature_name = "feature_id") |
56 | 32 |
|
57 |
- # Order is not mandatory, so need to sort before comparison |
|
58 |
- expect_equal(sort(term_data$term2name), sort(td$term2name)) |
|
59 |
- |
|
60 |
- p1 <- purrr::map2(td$term2feature, term_data$term2feature, function(f1, f2) { |
|
61 |
- expect_equal(sort(f1), sort(f2)) |
|
62 |
- }) |
|
63 |
- |
|
64 |
- p2 <- purrr::map2(td$feature2term, term_data$feature2term, function(f1, f2) { |
|
65 |
- expect_equal(sort(f1), sort(f2)) |
|
66 |
- }) |
|
33 |
+ # Check term names |
|
34 |
+ for(i in seq_along(terms$term_id)) { |
|
35 |
+ r <- terms[i, ] |
|
36 |
+ expect_equal(r$term_name, td$term2name[r$term_id]) |
|
37 |
+ } |
|
38 |
+ |
|
39 |
+ |
|
40 |
+ # Check term-feature hash |
|
41 |
+ term_ids <- mapping$term_id |> unique() |
|
42 |
+ chk1 <- term_ids |> |
|
43 |
+ purrr::map(function(trm) { |
|
44 |
+ expected <- mapping |> |
|
45 |
+ dplyr::filter(term_id == trm) |> |
|
46 |
+ dplyr::pull(feature_id) |> |
|
47 |
+ sort() |
|
48 |
+ returned <- td$term2feature[trm][[1]] |> |
|
49 |
+ sort() |
|
50 |
+ expect_equal(expected, returned) |
|
51 |
+ }) |
|
52 |
+ |
|
53 |
+ # Check feature-term hash |
|
54 |
+ feature_ids <- mapping$feature_id |> unique() |
|
55 |
+ chk2 <- feature_ids |> |
|
56 |
+ purrr::map(function(feat) { |
|
57 |
+ expected <- mapping |> |
|
58 |
+ dplyr::filter(feature_id == feat) |> |
|
59 |
+ dplyr::pull(term_id) |> |
|
60 |
+ sort() |
|
61 |
+ returned <- td$feature2term[feat][[1]] |> |
|
62 |
+ sort() |
|
63 |
+ expect_equal(expected, returned) |
|
64 |
+ }) |
|
67 | 65 |
}) |
68 | 66 |
|
69 | 67 |
|