... | ... |
@@ -1,6 +1,6 @@ |
1 | 1 |
Package: fenr |
2 | 2 |
Title: Fast functional enrichment |
3 |
-Version: 0.1.13 |
|
3 |
+Version: 0.1.14 |
|
4 | 4 |
Authors@R: person( |
5 | 5 |
given = "Marek", |
6 | 6 |
family = "Gierlinski", |
... | ... |
@@ -32,7 +32,6 @@ Imports: |
32 | 32 |
readr, |
33 | 33 |
stringr, |
34 | 34 |
tibble, |
35 |
- Rfast, |
|
36 | 35 |
httr, |
37 | 36 |
XML, |
38 | 37 |
jsonlite, |
... | ... |
@@ -50,3 +50,6 @@ |
50 | 50 |
- Added functions `get_term_features` and `get_feature_terms` to access data safely |
51 | 51 |
- HACK: BioPlanet server's SSL certificate expired, so need insecure download. |
52 | 52 |
|
53 |
+## Version 0.1.14 |
|
54 |
+ |
|
55 |
+- Ditched large and clunky `Rfast` and using native R environments as fast hashes (see https://riptutorial.com/r/example/18339/environments-as-hash-maps) |
... | ... |
@@ -77,10 +77,11 @@ prepare_for_enrichment <- function(terms, mapping, all_features = NULL, feature_ |
77 | 77 |
} |
78 | 78 |
|
79 | 79 |
# Hash to select term name |
80 |
- term2name <- Rfast::Hash( |
|
81 |
- keys = terms$term_id, |
|
82 |
- values = terms$term_name |
|
83 |
- ) |
|
80 |
+ term2name <- new.env(hash = TRUE) |
|
81 |
+ for (i in 1:nrow(terms)) { |
|
82 |
+ r <- terms[i, ] |
|
83 |
+ term2name[[r$term_id]] <- r$term_name |
|
84 |
+ } |
|
84 | 85 |
|
85 | 86 |
# feature-term tibble |
86 | 87 |
feature_term <- mapping |> |
... | ... |
@@ -93,18 +94,18 @@ prepare_for_enrichment <- function(terms, mapping, all_features = NULL, feature_ |
93 | 94 |
dplyr::group_by(feature_id) |> |
94 | 95 |
dplyr::summarise(terms = list(term_id)) |> |
95 | 96 |
tibble::deframe() |
96 |
- feature2term <- Rfast::Hash() |
|
97 |
+ feature2term <- new.env(hash = TRUE) |
|
97 | 98 |
for(feat in names(f2t)) |
98 |
- feature2term[feat] <- f2t[[feat]] |
|
99 |
+ feature2term[[feat]] <- f2t[[feat]] |
|
99 | 100 |
|
100 | 101 |
# Term to feature hash |
101 | 102 |
t2f <- feature_term |> |
102 | 103 |
dplyr::group_by(term_id) |> |
103 | 104 |
dplyr::summarise(features = list(feature_id)) |> |
104 | 105 |
tibble::deframe() |
105 |
- term2feature <- Rfast::Hash() |
|
106 |
+ term2feature <- new.env(hash = TRUE) |
|
106 | 107 |
for(term in names(t2f)) |
107 |
- term2feature[term] <- t2f[[term]] |
|
108 |
+ term2feature[[term]] <- t2f[[term]] |
|
108 | 109 |
|
109 | 110 |
list( |
110 | 111 |
term2name = term2name, |
... | ... |
@@ -171,7 +172,7 @@ functional_enrichment <- function(feat_all, feat_sel, term_data, feat2name = NUL |
171 | 172 |
|
172 | 173 |
# all terms present in the selection |
173 | 174 |
our_terms <- feat_sel |> |
174 |
- purrr::map(~term_data$feature2term[.x]) |> |
|
175 |
+ purrr::map(~term_data$feature2term[[.x]]) |> |
|
175 | 176 |
unlist() |> |
176 | 177 |
unique() |
177 | 178 |
|
... | ... |
@@ -182,8 +183,8 @@ functional_enrichment <- function(feat_all, feat_sel, term_data, feat2name = NUL |
182 | 183 |
|
183 | 184 |
res <- purrr::map_dfr(our_terms, function(term_id) { |
184 | 185 |
# all features with the term |
185 |
- # term_data$term2feature is a Hash object |
|
186 |
- tfeats <- term_data$term2feature[term_id] |
|
186 |
+ # term_data$term2feature is a hash environment |
|
187 |
+ tfeats <- term_data$term2feature[[term_id]] |
|
187 | 188 |
|
188 | 189 |
# features from selection with the term |
189 | 190 |
# this is faster than intersect(tfeats, feat_sel) |
... | ... |
@@ -217,7 +218,7 @@ functional_enrichment <- function(feat_all, feat_sel, term_data, feat2name = NUL |
217 | 218 |
|
218 | 219 |
if (!is.null(feat2name)) tfeats_sel <- feat2name[tfeats_sel] |> unname() |
219 | 220 |
|
220 |
- term_name <- term_data$term2name[term_id] |
|
221 |
+ term_name <- term_data$term2name[[term_id]] |
|
221 | 222 |
# returns NAs if no term found |
222 | 223 |
if (is.null(term_name)) term_name <- NA_character_ |
223 | 224 |
|
... | ... |
@@ -123,7 +123,7 @@ get_term_features <- function(term_data, term_id) { |
123 | 123 |
assert_that(is(term_data, "fenr_terms")) |
124 | 124 |
assert_that(is.string(term_id)) |
125 | 125 |
|
126 |
- term_data$term2feature[term_id] |
|
126 |
+ term_data$term2feature[[term_id]] |
|
127 | 127 |
} |
128 | 128 |
|
129 | 129 |
|
... | ... |
@@ -142,5 +142,5 @@ get_feature_term <- function(term_data, feature_id) { |
142 | 142 |
assert_that(is(term_data, "fenr_terms")) |
143 | 143 |
assert_that(is.string(feature_id)) |
144 | 144 |
|
145 |
- term_data$feature2term[feature_id] |
|
145 |
+ term_data$feature2term[[feature_id]] |
|
146 | 146 |
} |
... | ... |
@@ -1,5 +1,6 @@ |
1 | 1 |
library(testthat) |
2 | 2 |
|
3 |
+ |
|
3 | 4 |
# Set 100 features |
4 | 5 |
N <- 100 |
5 | 6 |
features_all <- sprintf("gene_%03d", seq_len(N)) |
... | ... |
@@ -33,7 +34,7 @@ test_that("Expected correct output", { |
33 | 34 |
# Check term names |
34 | 35 |
for(i in seq_along(terms$term_id)) { |
35 | 36 |
r <- terms[i, ] |
36 |
- expect_equal(sort(r$term_name), sort(td$term2name[r$term_id])) |
|
37 |
+ expect_equal(sort(r$term_name), sort(td$term2name[[r$term_id]])) |
|
37 | 38 |
} |
38 | 39 |
|
39 | 40 |
|
... | ... |
@@ -45,7 +46,7 @@ test_that("Expected correct output", { |
45 | 46 |
dplyr::filter(term_id == trm) |> |
46 | 47 |
dplyr::pull(feature_id) |> |
47 | 48 |
sort() |
48 |
- returned <- td$term2feature[trm] |> |
|
49 |
+ returned <- td$term2feature[[trm]] |> |
|
49 | 50 |
sort() |
50 | 51 |
expect_equal(expected, returned) |
51 | 52 |
}) |
... | ... |
@@ -58,7 +59,7 @@ test_that("Expected correct output", { |
58 | 59 |
dplyr::filter(feature_id == feat) |> |
59 | 60 |
dplyr::pull(term_id) |> |
60 | 61 |
sort() |
61 |
- returned <- td$feature2term[feat] |> |
|
62 |
+ returned <- td$feature2term[[feat]] |> |
|
62 | 63 |
sort() |
63 | 64 |
expect_equal(expected, returned) |
64 | 65 |
}) |