... | ... |
@@ -173,4 +173,8 @@ |
173 | 173 |
|
174 | 174 |
## Version 1.4.1 |
175 | 175 |
|
176 |
- - Attempted to fix a bizarre error message on Bioconductor's test machines with older version of MacOS. Windows and Linux are not affected; my laptop running Sequoia 5.2 does not show show errors. I suspect a memory leak in older systems. The error `vector memory limit of 64.0 Gb reached, see mem.maxVSize()` happened in the function parse_kegg_genes(), a flat-file parser for KEGG. It occurred around the call tidyr::separate(), which I replaced with an alternative approach. Will see if the error is fixed. |
|
177 | 176 |
\ No newline at end of file |
177 |
+ - Attempted to fix a bizarre error message on Bioconductor's test machines with older version of MacOS. Windows and Linux are not affected; my laptop running Sequoia 5.2 does not show show errors. I suspect a memory leak in older systems. The error `vector memory limit of 64.0 Gb reached, see mem.maxVSize()` happened in the function parse_kegg_genes(), a flat-file parser for KEGG. It occurred around the call tidyr::separate(), which I replaced with an alternative approach. Will see if the error is fixed. |
|
178 |
+ |
|
179 |
+ ## Version 1.4.2 |
|
180 |
+ |
|
181 |
+ - Added evidence code column to GO-term mapping table. It can be used to filter mapping based on their quality. See https://geneontology.org/docs/guide-go-evidence-codes for explanation. |
|
178 | 182 |
\ No newline at end of file |
... | ... |
@@ -48,6 +48,7 @@ stringr::str_glue(" |
48 | 48 |
<Attribute name = 'ensembl_gene_id'/> |
49 | 49 |
<Attribute name = 'external_gene_name'/> |
50 | 50 |
<Attribute name = 'go_id'/> |
51 |
+ <Attribute name = 'go_linkage_type'/> |
|
51 | 52 |
</Dataset> |
52 | 53 |
</Query>") |> |
53 | 54 |
stringr::str_replace_all("\n", "") |> |
... | ... |
@@ -195,12 +196,13 @@ fetch_go_species <- function(on_error = c("stop", "warn", "ignore")) { |
195 | 196 |
#' either "stop" to halt execution, "warn" to issue a warning and return |
196 | 197 |
#' `NULL` or "ignore" to return `NULL` without warnings. Defaults to "stop". |
197 | 198 |
#' |
198 |
-#' @return A tibble with columns \code{gene_symbol}, \code{uniprot_id} and \code{term_id}. |
|
199 |
+#' @return A tibble with columns \code{gene_symbol}, \code{uniprot_id}, |
|
200 |
+#' \code{term_id} and \code{evidence}. |
|
199 | 201 |
#' @noRd |
200 | 202 |
fetch_go_genes_go <- function(species, use_cache, on_error) { |
201 | 203 |
# Binding variables from non-standard evaluation locally |
202 | 204 |
gene_id <- db_object_synonym <- symbol <- NULL |
203 |
- db_id <- go_term <- NULL |
|
205 |
+ db_id <- go_term <- evidence <- NULL |
|
204 | 206 |
|
205 | 207 |
url <- get_go_annotation_url() |
206 | 208 |
if(!assert_url_path(url, on_error)) |
... | ... |
@@ -211,7 +213,7 @@ fetch_go_genes_go <- function(species, use_cache, on_error) { |
211 | 213 |
readr::read_tsv(lpath, comment = "!", quote = "", col_names = GAF_COLUMNS, |
212 | 214 |
col_types = GAF_TYPES) |> |
213 | 215 |
dplyr::mutate(gene_id = stringr::str_remove(db_object_synonym, "\\|.*$")) |> |
214 |
- dplyr::select(gene_symbol = symbol, gene_id, db_id, term_id = go_term) |> |
|
216 |
+ dplyr::select(gene_symbol = symbol, gene_id, db_id, term_id = go_term, evidence) |> |
|
215 | 217 |
dplyr::distinct() |
216 | 218 |
} |
217 | 219 |
|
... | ... |
@@ -272,8 +274,8 @@ fetch_go_from_go <- function(species, use_cache, on_error) { |
272 | 274 |
#' either "stop" to halt execution, "warn" to issue a warning and return |
273 | 275 |
#' `NULL` or "ignore" to return `NULL` without warnings. Defaults to "stop". |
274 | 276 |
#' |
275 |
-#' @return A tibble with columns \code{gene_id}, \code{gene_symbol} and |
|
276 |
-#' \code{term_id}. |
|
277 |
+#' @return A tibble with columns \code{gene_id}, \code{gene_symbol}, |
|
278 |
+#' \code{term_id} and \code{evidence}. |
|
277 | 279 |
#' @noRd |
278 | 280 |
fetch_go_genes_bm <- function(dataset, use_cache, on_error) { |
279 | 281 |
xml <- get_biomart_xml(dataset) |> |
... | ... |
@@ -287,8 +289,8 @@ fetch_go_genes_bm <- function(dataset, use_cache, on_error) { |
287 | 289 |
# Problems with cache, bfcneedsupdate returns error for this query |
288 | 290 |
# lpath <- cached_url_path(stringr::str_glue("biomart_{dataset}"), resp, use_cache) |
289 | 291 |
res <- readr::read_tsv(req, show_col_types = FALSE) |
290 |
- if(ncol(res) == 3) { |
|
291 |
- res |> rlang::set_names(c("gene_id", "gene_symbol", "term_id")) |
|
292 |
+ if(ncol(res) == 4) { |
|
293 |
+ res |> rlang::set_names(c("gene_id", "gene_symbol", "term_id", "evidence")) |
|
292 | 294 |
} else { |
293 | 295 |
error_response("Problem with Biomart", on_error) |
294 | 296 |
} |
... | ... |
@@ -98,6 +98,8 @@ The second tibble contains gene-term mapping: |
98 | 98 |
go$mapping |
99 | 99 |
``` |
100 | 100 |
|
101 |
+Note that the mapping can be filtered based on the [evidence code](https://geneontology.org/docs/guide-go-evidence-codes/) (column `evidence`) to include only high-quality GO annotations, before further analysis. Here, we simply use all annotations. |
|
102 |
+ |
|
101 | 103 |
To make these user-friendly data more suitable for rapid functional enrichment analysis, they need to be converted into a machine-friendly object using the following function: |
102 | 104 |
|
103 | 105 |
```{r prepare_for_enrichment} |