Browse code

Version 0.1.14

MarekGierlinski authored on 02/02/2023 11:42:19
Showing 5 changed files

... ...
@@ -1,6 +1,6 @@
1 1
 Package: fenr
2 2
 Title: Fast functional enrichment
3
-Version: 0.1.13
3
+Version: 0.1.14
4 4
 Authors@R: person(
5 5
     given = "Marek",
6 6
     family = "Gierlinski",
... ...
@@ -32,7 +32,6 @@ Imports:
32 32
     readr,
33 33
     stringr,
34 34
     tibble,
35
-    Rfast,
36 35
     httr,
37 36
     XML,
38 37
     jsonlite,
... ...
@@ -50,3 +50,6 @@
50 50
 - Added functions `get_term_features` and `get_feature_terms` to access data safely
51 51
 - HACK: BioPlanet server's SSL certificate expired, so need insecure download.
52 52
 
53
+## Version 0.1.14
54
+
55
+- Ditched large and clunky `Rfast` and using native R environments as fast hashes (see https://riptutorial.com/r/example/18339/environments-as-hash-maps)
... ...
@@ -77,10 +77,11 @@ prepare_for_enrichment <- function(terms, mapping, all_features = NULL, feature_
77 77
   }
78 78
 
79 79
   # Hash to select term name
80
-  term2name <- Rfast::Hash(
81
-    keys = terms$term_id,
82
-    values = terms$term_name
83
-  )
80
+  term2name <- new.env(hash = TRUE)
81
+  for (i in 1:nrow(terms)) {
82
+    r <- terms[i, ]
83
+    term2name[[r$term_id]] <- r$term_name
84
+  }
84 85
 
85 86
   # feature-term tibble
86 87
   feature_term <- mapping |>
... ...
@@ -93,18 +94,18 @@ prepare_for_enrichment <- function(terms, mapping, all_features = NULL, feature_
93 94
     dplyr::group_by(feature_id) |>
94 95
     dplyr::summarise(terms = list(term_id)) |>
95 96
     tibble::deframe()
96
-  feature2term <- Rfast::Hash()
97
+  feature2term <- new.env(hash = TRUE)
97 98
   for(feat in names(f2t))
98
-    feature2term[feat] <- f2t[[feat]]
99
+    feature2term[[feat]] <- f2t[[feat]]
99 100
 
100 101
   # Term to feature hash
101 102
   t2f <- feature_term |>
102 103
     dplyr::group_by(term_id) |>
103 104
     dplyr::summarise(features = list(feature_id)) |>
104 105
     tibble::deframe()
105
-  term2feature <- Rfast::Hash()
106
+  term2feature <- new.env(hash = TRUE)
106 107
   for(term in names(t2f))
107
-    term2feature[term] <- t2f[[term]]
108
+    term2feature[[term]] <- t2f[[term]]
108 109
 
109 110
   list(
110 111
     term2name = term2name,
... ...
@@ -171,7 +172,7 @@ functional_enrichment <- function(feat_all, feat_sel, term_data, feat2name = NUL
171 172
 
172 173
   # all terms present in the selection
173 174
   our_terms <- feat_sel |>
174
-    purrr::map(~term_data$feature2term[.x]) |>
175
+    purrr::map(~term_data$feature2term[[.x]]) |>
175 176
     unlist() |>
176 177
     unique()
177 178
 
... ...
@@ -182,8 +183,8 @@ functional_enrichment <- function(feat_all, feat_sel, term_data, feat2name = NUL
182 183
 
183 184
   res <- purrr::map_dfr(our_terms, function(term_id) {
184 185
     # all features with the term
185
-    # term_data$term2feature is a Hash object
186
-    tfeats <- term_data$term2feature[term_id]
186
+    # term_data$term2feature is a hash environment
187
+    tfeats <- term_data$term2feature[[term_id]]
187 188
 
188 189
     # features from selection with the term
189 190
     # this is faster than intersect(tfeats, feat_sel)
... ...
@@ -217,7 +218,7 @@ functional_enrichment <- function(feat_all, feat_sel, term_data, feat2name = NUL
217 218
 
218 219
     if (!is.null(feat2name)) tfeats_sel <- feat2name[tfeats_sel] |> unname()
219 220
 
220
-    term_name <- term_data$term2name[term_id]
221
+    term_name <- term_data$term2name[[term_id]]
221 222
     # returns NAs if no term found
222 223
     if (is.null(term_name)) term_name <- NA_character_
223 224
 
... ...
@@ -123,7 +123,7 @@ get_term_features <- function(term_data, term_id) {
123 123
   assert_that(is(term_data, "fenr_terms"))
124 124
   assert_that(is.string(term_id))
125 125
 
126
-  term_data$term2feature[term_id]
126
+  term_data$term2feature[[term_id]]
127 127
 }
128 128
 
129 129
 
... ...
@@ -142,5 +142,5 @@ get_feature_term <- function(term_data, feature_id) {
142 142
   assert_that(is(term_data, "fenr_terms"))
143 143
   assert_that(is.string(feature_id))
144 144
 
145
-  term_data$feature2term[feature_id]
145
+  term_data$feature2term[[feature_id]]
146 146
 }
... ...
@@ -1,5 +1,6 @@
1 1
 library(testthat)
2 2
 
3
+
3 4
 # Set 100 features
4 5
 N <- 100
5 6
 features_all <- sprintf("gene_%03d", seq_len(N))
... ...
@@ -33,7 +34,7 @@ test_that("Expected correct output", {
33 34
   # Check term names
34 35
   for(i in seq_along(terms$term_id)) {
35 36
     r <- terms[i, ]
36
-    expect_equal(sort(r$term_name), sort(td$term2name[r$term_id]))
37
+    expect_equal(sort(r$term_name), sort(td$term2name[[r$term_id]]))
37 38
   }
38 39
 
39 40
 
... ...
@@ -45,7 +46,7 @@ test_that("Expected correct output", {
45 46
         dplyr::filter(term_id == trm) |>
46 47
         dplyr::pull(feature_id) |>
47 48
         sort()
48
-      returned <- td$term2feature[trm] |>
49
+      returned <- td$term2feature[[trm]] |>
49 50
         sort()
50 51
       expect_equal(expected, returned)
51 52
     })
... ...
@@ -58,7 +59,7 @@ test_that("Expected correct output", {
58 59
         dplyr::filter(feature_id == feat) |>
59 60
         dplyr::pull(term_id) |>
60 61
         sort()
61
-      returned <- td$feature2term[feat] |>
62
+      returned <- td$feature2term[[feat]] |>
62 63
         sort()
63 64
       expect_equal(expected, returned)
64 65
     })