Browse code

A memory bug fix

Marek Gierlinski authored on 08/01/2025 14:09:56
Showing 3 changed files

... ...
@@ -1,6 +1,6 @@
1 1
 Package: fenr
2 2
 Title: Fast functional enrichment for interactive applications
3
-Version: 1.3.1
3
+Version: 1.5.1
4 4
 Authors@R: person(
5 5
     given = "Marek",
6 6
     family = "Gierlinski",
... ...
@@ -53,6 +53,6 @@ Suggests:
53 53
     knitr,
54 54
     rmarkdown,
55 55
     topGO
56
-RoxygenNote: 7.3.1
56
+RoxygenNote: 7.3.2
57 57
 VignetteBuilder: knitr
58 58
 LazyData: false
... ...
@@ -171,3 +171,6 @@
171 171
 
172 172
  - Go term namespace added to the information extracted by `fetch_go`.
173 173
 
174
+## Version 1.4.1
175
+
176
+ - Attempted to fix a bizarre error message on Bioconductor's test machines with older version of MacOS. Windows and Linux are not affected; my laptop running Sequoia 5.2 does not show show errors. I suspect a memory leak in older systems. The error `vector memory limit of 64.0 Gb reached, see mem.maxVSize()` happened in the function parse_kegg_genes(), a flat-file parser for KEGG. It occurred around the call tidyr::separate(), which I replaced with an alternative approach. Will see if the error is fixed.
174 177
\ No newline at end of file
... ...
@@ -96,16 +96,20 @@ parse_kegg_genes <- function(s) {
96 96
       i <- i + 1
97 97
     }
98 98
 
99
-    # create final tibble, attempt to extract gene symbols when semicolon is found
100
-    genes |>
101
-      tibble::as_tibble_col(column_name = "data") |>
102
-      tidyr::separate(data, c("gene_id", "gene_symbol"), sep = "\\s+", extra = "merge") |>
103
-      dplyr::mutate(gene_symbol = dplyr::if_else(
104
-        stringr::str_detect(gene_symbol, ";"),
105
-        stringr::str_remove(gene_symbol, ";.+$"),
106
-        gene_id
107
-      )) |>
108
-      tibble::add_column(term_id = pathway)
99
+    purrr::map(genes, function(gene) {
100
+      # First element - gene ID, second - gene symbol, if contains semicolon
101
+      v <- stringr::str_split_1(gene, "\\s+")
102
+      tibble::tibble(
103
+        gene_id = v[1],
104
+        gene_symbol = ifelse(
105
+          stringr::str_detect(v[2], ";"),
106
+          stringr::str_remove(v[2], ";.*$"),
107
+          v[1]
108
+        ),
109
+        term_id = pathway
110
+      )
111
+    }) |> 
112
+      purrr::list_rbind()
109 113
   }) |>
110 114
     purrr::list_rbind()
111 115
 }