Bioconductor Code: fenr

Browse code

Version 0.1.14

MarekGierlinski authored on 02/02/2023 11:42:19
Showing 5 changed files

DESCRIPTION index 056b1d5..8d9756c 100644
NEWS.md index 43ad384..d877a4d 100644
R/enrichment.R index 3ab7eb9..76b4d98 100644
R/util.R index 7a938bc..81a1137 100644
tests/testthat/test_prepare_for_enrichment.R index 9c168da..e3c15f8 100644

History View file @ 4b6d4e6

@@ -1,6 +1,6 @@
                      Package: fenr
                      Title: Fast functional enrichment
                     -Version: 0.1.13
                     +Version: 0.1.14
                      Authors@R: person(
                          given = "Marek",
                          family = "Gierlinski",
@@ -32,7 +32,6 @@ Imports:
                          readr,
                          stringr,
                          tibble,
                     -    Rfast,
                          httr,
                          XML,
                          jsonlite,

NEWS.md

History View file @ 4b6d4e6

@@ -50,3 +50,6 @@
                      - Added functions `get_term_features` and `get_feature_terms` to access data safely
                      - HACK: BioPlanet server's SSL certificate expired, so need insecure download.
                     +## Version 0.1.14
+                    +
                     +- Ditched large and clunky `Rfast` and using native R environments as fast hashes (see https://riptutorial.com/r/example/18339/environments-as-hash-maps)

R/enrichment.R

History View file @ 4b6d4e6

@@ -77,10 +77,11 @@ prepare_for_enrichment <- function(terms, mapping, all_features = NULL, feature_
+                       }
                        # Hash to select term name
                     -  term2name <- Rfast::Hash(
                     -    keys = terms$term_id,
                     -    values = terms$term_name
                     -  )
                     +  term2name <- new.env(hash = TRUE)
                     +  for (i in 1:nrow(terms)) {
                     +    r <- terms[i, ]
                     +    term2name[[r$term_id]] <- r$term_name
                     +  }
                        # feature-term tibble
                        feature_term <- mapping |>
@@ -93,18 +94,18 @@ prepare_for_enrichment <- function(terms, mapping, all_features = NULL, feature_
                          dplyr::group_by(feature_id) |>
                          dplyr::summarise(terms = list(term_id)) |>
                          tibble::deframe()
                     -  feature2term <- Rfast::Hash()
                     +  feature2term <- new.env(hash = TRUE)
                        for(feat in names(f2t))
                     -    feature2term[feat] <- f2t[[feat]]
                     +    feature2term[[feat]] <- f2t[[feat]]
                        # Term to feature hash
                        t2f <- feature_term |>
                          dplyr::group_by(term_id) |>
                          dplyr::summarise(features = list(feature_id)) |>
                          tibble::deframe()
                     -  term2feature <- Rfast::Hash()
                     +  term2feature <- new.env(hash = TRUE)
                        for(term in names(t2f))
                     -    term2feature[term] <- t2f[[term]]
                     +    term2feature[[term]] <- t2f[[term]]
                        list(
                          term2name = term2name,
@@ -171,7 +172,7 @@ functional_enrichment <- function(feat_all, feat_sel, term_data, feat2name = NUL
                        # all terms present in the selection
                        our_terms <- feat_sel |>
                     -    purrr::map(~term_data$feature2term[.x]) |>
                     +    purrr::map(~term_data$feature2term[[.x]]) |>
                          unlist() |>
                          unique()
@@ -182,8 +183,8 @@ functional_enrichment <- function(feat_all, feat_sel, term_data, feat2name = NUL
                        res <- purrr::map_dfr(our_terms, function(term_id) {
                          # all features with the term
                     -    # term_data$term2feature is a Hash object
                     -    tfeats <- term_data$term2feature[term_id]
                     +    # term_data$term2feature is a hash environment
                     +    tfeats <- term_data$term2feature[[term_id]]
                          # features from selection with the term
                          # this is faster than intersect(tfeats, feat_sel)
@@ -217,7 +218,7 @@ functional_enrichment <- function(feat_all, feat_sel, term_data, feat2name = NUL
                          if (!is.null(feat2name)) tfeats_sel <- feat2name[tfeats_sel] |> unname()
                     -    term_name <- term_data$term2name[term_id]
                     +    term_name <- term_data$term2name[[term_id]]
                          # returns NAs if no term found
                          if (is.null(term_name)) term_name <- NA_character_

R/util.R

History View file @ 4b6d4e6

@@ -123,7 +123,7 @@ get_term_features <- function(term_data, term_id) {
                        assert_that(is(term_data, "fenr_terms"))
                        assert_that(is.string(term_id))
                     -  term_data$term2feature[term_id]
                     +  term_data$term2feature[[term_id]]
+                     }
@@ -142,5 +142,5 @@ get_feature_term <- function(term_data, feature_id) {
                        assert_that(is(term_data, "fenr_terms"))
                        assert_that(is.string(feature_id))
                     -  term_data$feature2term[feature_id]
                     +  term_data$feature2term[[feature_id]]
+                     }

tests/testthat/test_prepare_for_enrichment.R

History View file @ 4b6d4e6

@@ -1,5 +1,6 @@
                      library(testthat)
+                    +
                      # Set 100 features
                      N <- 100
                      features_all <- sprintf("gene_%03d", seq_len(N))
@@ -33,7 +34,7 @@ test_that("Expected correct output", {
                        # Check term names
                        for(i in seq_along(terms$term_id)) {
                          r <- terms[i, ]
                     -    expect_equal(sort(r$term_name), sort(td$term2name[r$term_id]))
                     +    expect_equal(sort(r$term_name), sort(td$term2name[[r$term_id]]))
+                       }
@@ -45,7 +46,7 @@ test_that("Expected correct output", {
                              dplyr::filter(term_id == trm) |>
                              dplyr::pull(feature_id) |>
                              sort()
                     -      returned <- td$term2feature[trm] |>
                     +      returned <- td$term2feature[[trm]] |>
                              sort()
                            expect_equal(expected, returned)
                          })
@@ -58,7 +59,7 @@ test_that("Expected correct output", {
                              dplyr::filter(feature_id == feat) |>
                              dplyr::pull(term_id) |>
                              sort()
                     -      returned <- td$feature2term[feat] |>
                     +      returned <- td$feature2term[[feat]] |>
                              sort()
                            expect_equal(expected, returned)
                          })