Browse code

Improving KEGG

Marek Gierlinski authored on 19/08/2022 10:08:26
Showing 5 changed files

... ...
@@ -14,6 +14,7 @@ Encoding: UTF-8
14 14
 LazyData: true
15 15
 biocViews:
16 16
 Imports:
17
+    assertthat,
17 18
     stats,
18 19
     dplyr,
19 20
     httr,
... ...
@@ -5,7 +5,9 @@ export(fetch_go_from_bm)
5 5
 export(fetch_go_from_go)
6 6
 export(fetch_go_species)
7 7
 export(fetch_kegg)
8
+export(fetch_kegg_species)
8 9
 export(fetch_reactome)
9 10
 export(fetch_reactome_species)
10 11
 export(functional_enrichment)
11 12
 export(prepare_for_enrichment)
13
+import(assertthat)
... ...
@@ -1,3 +1,14 @@
1
+#' Find all species available from KEGG
2
+#'
3
+#' @return A tibble, in which column \code{organism} contains species
4
+#'   designations used in function \code{fetch_kegg}.
5
+#' @export
6
+fetch_kegg_species <- function() {
7
+  KEGGREST::keggList("organism") |>
8
+    tibble::as_tibble()
9
+}
10
+
11
+
1 12
 #' Get functional term data from KEGG
2 13
 #'
3 14
 #' Download information (pathway ID and name) and gene-pathway mapping (entrez
... ...
@@ -7,21 +18,29 @@
7 18
 #' based on BioConductor package \pkg{KEGGREST}.
8 19
 #'
9 20
 #' @param species KEGG species code, for example "hsa" for human. The
10
-#'   full list of available KEGG species can be found by using
11
-#'   \code{KEGGREST::keggList("organism")}. The column \code{organism} contains
12
-#'   the codes used here.
13
-#' @param batch_size Nubmer of pathways sent to KEGG database in one query. The
21
+#'   full list of available KEGG species can be found by using \code{fetch_kegg_species}.
22
+#' @param batch_size Number of pathways sent to KEGG database in one query. The
14 23
 #'   maximum allowed is 10.
15 24
 #'
16 25
 #' @return A list with \code{terms} and \code{mapping} tibbles.
17 26
 #' @export
27
+#' @import assertthat
18 28
 #'
19 29
 #' @examples
20 30
 #' \dontrun{
21 31
 #' kegg_data <- fetch_kegg("hsa")
22 32
 #' }
23 33
 fetch_kegg <- function(species, batch_size = 10) {
24
-  lst <- KEGGREST::keggList("pathway", species)
34
+  assert_that(is.string(species))
35
+  assert_that(is.count(batch_size))
36
+  assert_that(batch_size <= 10, msg = "batch_size needs to be between 1 and 10")
37
+
38
+  lst <- tryCatch(
39
+    KEGGREST::keggList("pathway", species),
40
+    error = function(err)
41
+      stop(stringr::str_glue("There is a problem retrieving KEGG pathways for species '{species}'."))
42
+  )
43
+
25 44
   terms <- tibble::tibble(
26 45
     term_id = names(lst) |> stringr::str_remove("path:"),
27 46
     term_name = lst
... ...
@@ -31,7 +50,11 @@ fetch_kegg <- function(species, batch_size = 10) {
31 50
 
32 51
   pb <- progress::progress_bar$new(total = length(batches))
33 52
   mapping <- purrr::map_dfr(batches, function(batch) {
34
-    pws <- KEGGREST::keggGet(batch)
53
+    pws <- tryCatch(
54
+      KEGGREST::keggGet(batch),
55
+      error = function(err)
56
+        stop(stringr::str_glue("There is a problem retrieving KEGG batch':\n{err}"))
57
+    )
35 58
     pb$tick()
36 59
     purrr::map_dfr(pws, function(pw) {
37 60
       if (!is.null(pw$GENE)) {
... ...
@@ -8,11 +8,9 @@ fetch_kegg(species, batch_size = 10)
8 8
 }
9 9
 \arguments{
10 10
 \item{species}{KEGG species code, for example "hsa" for human. The
11
-full list of available KEGG species can be found by using
12
-\code{KEGGREST::keggList("organism")}. The column \code{organism} contains
13
-the codes used here.}
11
+full list of available KEGG species can be found by using \code{fetch_kegg_species}.}
14 12
 
15
-\item{batch_size}{Nubmer of pathways sent to KEGG database in one query. The
13
+\item{batch_size}{Number of pathways sent to KEGG database in one query. The
16 14
 maximum allowed is 10.}
17 15
 }
18 16
 \value{
19 17
new file mode 100644
... ...
@@ -0,0 +1,15 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/kegg.R
3
+\name{fetch_kegg_species}
4
+\alias{fetch_kegg_species}
5
+\title{Find all species available from KEGG}
6
+\usage{
7
+fetch_kegg_species()
8
+}
9
+\value{
10
+A tibble, in which column \code{organism} contains species
11
+  designations used in function \code{fetch_kegg}.
12
+}
13
+\description{
14
+Find all species available from KEGG
15
+}