Browse code

Improving documentation

Marek Gierlinski authored on 19/08/2022 09:22:50
Showing 17 changed files

... ...
@@ -1,5 +1,8 @@
1 1
 #' Get functional term data from BioPlanet
2 2
 #'
3
+#' Download term information (term ID and name) and gene-pathway mapping
4
+#' (NCBI gene ID, gene symbol and pathway ID) from BioPlanet.
5
+#'
3 6
 #' @return A list with \code{terms} and \code{mapping} tibbles.
4 7
 #' @export
5 8
 #'
... ...
@@ -16,12 +16,12 @@
16 16
 #' @param mapping Information about term-feature mapping. A tibble with
17 17
 #'   \code{term_id} and a feature id, as identified with \code{feature_name}
18 18
 #'   argument. For example, if this tibble contains \code{gene_symbol} and
19
-#'   \code{term_id}, then \code{feature_name = "gene_symbol"}.
19
+#'   \code{term_id}, then you need to set \code{feature_name = "gene_symbol"}.
20 20
 #' @param all_features A vector with all feature ids used as background for
21
-#'   enrichment. If not specified, all features from \code{mapping} will be
22
-#'   used, resulting in a larger objects size.
21
+#'   enrichment. If not specified, all features found in \code{mapping} will be
22
+#'   used, resulting in a larger object size.
23 23
 #' @param feature_name Which column to use from mapping table, e.g.
24
-#'   "gene_symbol" or "ensembl_gene_id".
24
+#'   \code{gene_symbol} or \code{ensembl_gene_id}.
25 25
 #'
26 26
 #' @return An object class \code{fenr_terms} required by
27 27
 #'   \code{functional_enrichment}.
... ...
@@ -98,18 +98,18 @@ prepare_for_enrichment <- function(terms, mapping, all_features = NULL, feature_
98 98
 #' @details
99 99
 #'
100 100
 #' Functional enrichment in a selection (e.g. significantly DE features) of
101
-#' features, using hypergeometric probability. A feature can be a gene, protein,
102
-#' etc. `term_data` is an object with functional term information and
103
-#' feature-term assignment. It is a list of: `term2info` - a named vector term
104
-#' id => term name; `term2feature` - a list term_id => vector of feature_ids;
105
-#' `feature2term` - a list feature id => vector of term ids. It can be created
106
-#' by `prepare_for_enrichment` function.
101
+#' features, using hypergeometric probability (that is Fisher's exact test). A
102
+#' feature can be a gene, protein, etc. \code{term_data} is an object with
103
+#' functional term information and feature-term assignment.
107 104
 #'
108 105
 #' @param feat_all A character vector with all feature identifiers. This is the
109 106
 #'   background for enrichment.
110 107
 #' @param feat_sel A character vector with feature identifiers in the selection.
111
-#' @param term_data An object class \code{fenr_terms}, as explained in details.
112
-#'   It can be created using \code{prepare_for_enrichment}.
108
+#' @param term_data An object class \code{fenr_terms}, created by
109
+#'   \code{prepare_for_enrichment}. It is a list of three elements: \itemize{
110
+#'   \item{\code{term2info} - a named vector term id => term name}
111
+#'   \item{\code{term2feature} - a list term id => vector of feature ids}
112
+#'   \item{\code{feature2term} - a list feature id => vector of term ids}}
113 113
 #' @param feat2name An optional named list to convert feature id into feature
114 114
 #'   name.
115 115
 #' @param min_count Minimal count of features with term in the selection to be
... ...
@@ -117,14 +117,16 @@ prepare_for_enrichment <- function(terms, mapping, all_features = NULL, feature_
117 117
 #' @param fdr_limit Only terms with p_adjust below this limit are returned.
118 118
 #'
119 119
 #' @return A tibble with enrichment results. For each term the following
120
-#'   quantities are reported: N_with - number of features with this term in the
121
-#'   among all features, n_with_sel - number of features with this term in the
122
-#'   selection, n_expect - expected number of features with this term in the
123
-#'   selection, under the null hypothesis that terms are assigned to features
124
-#'   randomly, enrichment - ratio of n_with_sel / n_expect; odds_ratio - odds
125
-#'   ratio for enrichment, p_value - p-value from a single hypergeometric test;
126
-#'   p_adjust - p-value adjusted for multiple tests using Benjamini-Hochberg
127
-#'   approach.
120
+#'   quantities are reported: \itemize{ \item{\code{N_with} - number of features
121
+#'   with this term in the among all features} \item{\code{n_with_sel} - number of
122
+#'   features with this term in the selection} \item{\code{n_expect} - expected
123
+#'   number of features with this term in the selection, under the null
124
+#'   hypothesis that terms are assigned to features randomly}
125
+#'   \item{\code{enrichment} - ratio of n_with_sel / n_expect}
126
+#'   \item{\code{odds_ratio} - odds ratio for enrichment; is infinite, when all
127
+#'   features with the given term are in the selection} \item{\code{p_value} -
128
+#'   p-value from a single hypergeometric test} \item{\code{p_adjust} - p-value
129
+#'   adjusted for multiple tests using Benjamini-Hochberg approach} }
128 130
 #'
129 131
 #' @examples
130 132
 #' bp <- fetch_bp()
... ...
@@ -4,9 +4,9 @@
4 4
 #' gene ID, gene symbol and pathway ID) from KEGG. Gene symbols are extracted
5 5
 #' from gene descriptions. For some species (e.g. yeast), gene symbols are
6 6
 #' returned instead of entrez IDs and not in gene description. This function is
7
-#' based on BioConductor package \code{KEGGREST}.
7
+#' based on BioConductor package \pkg{KEGGREST}.
8 8
 #'
9
-#' @param species KEGG species code, for example \code{"hsa"} for human. The
9
+#' @param species KEGG species code, for example "hsa" for human. The
10 10
 #'   full list of available KEGG species can be found by using
11 11
 #'   \code{KEGGREST::keggList("organism")}. The column \code{organism} contains
12 12
 #'   the codes used here.
... ...
@@ -2,7 +2,7 @@
2 2
 #'
3 3
 #' @param spec Reactome species
4 4
 #'
5
-#' @return A tibble with \code{term_id} and \code{term_name}
5
+#' @return A tibble with columns \code{term_id} and \code{term_name}
6 6
 fetch_reactome_pathways <- function(spec) {
7 7
   url <- "https://reactome.org/download/current/ReactomePathways.txt"
8 8
   colms <- c("term_id", "term_name", "species")
... ...
@@ -14,7 +14,7 @@ fetch_reactome_pathways <- function(spec) {
14 14
 #'
15 15
 #' @param spec Reactome species
16 16
 #'
17
-#' @return A tibble with \code{gene_id} and \code{term_id}
17
+#' @return A tibble with columns \code{gene_id} and \code{term_id}
18 18
 fetch_reactome_ensembl_genes <- function(spec) {
19 19
   url <- "https://reactome.org/download/current/Ensembl2Reactome.txt"
20 20
   colms <- c("gene_id", "term_id", "url", "event", "evidence", "species")
... ...
@@ -26,7 +26,7 @@ fetch_reactome_ensembl_genes <- function(spec) {
26 26
 
27 27
 #' List of available Reactome species
28 28
 #'
29
-#' @return A character vector with species names used by Reactome
29
+#' @return A character vector with species names used by Reactome.
30 30
 #' @export
31 31
 fetch_reactome_species <- function() {
32 32
   url <- "https://reactome.org/download/current/Ensembl2Reactome.txt"
... ...
@@ -41,8 +41,8 @@ fetch_reactome_species <- function() {
41 41
 #' Download term information (pathway ID and name) and gene-pathway mapping
42 42
 #' (Ensembl gene ID and pathway ID) from Reactome.
43 43
 #'
44
-#' @param species Reactome species designation, for example \code{"Homo
45
-#'   sapiens"} for human. Full list of available species can be found using
44
+#' @param species Reactome species designation, for example "Homo
45
+#'   sapiens" for human. Full list of available species can be found using
46 46
 #'   \code{fetch_reactome_species()}.
47 47
 #'
48 48
 #' @return A list with \code{terms} and \code{mapping} tibbles.
... ...
@@ -1,6 +1,6 @@
1 1
 #' Check if URL file exists
2 2
 #'
3
-#' From https://stackoverflow.com/questions/52911812/check-if-url-exists-in-r.
3
+#' From \url{https://stackoverflow.com/questions/52911812/check-if-url-exists-in-r}.
4 4
 #
5 5
 #' @param x a single URL
6 6
 #' @param non_2xx_return_value what to do if the site exists but the
... ...
@@ -10,7 +10,8 @@ fetch_bp()
10 10
 A list with \code{terms} and \code{mapping} tibbles.
11 11
 }
12 12
 \description{
13
-Get functional term data from BioPlanet
13
+Download term information (term ID and name) and gene-pathway mapping
14
+(NCBI gene ID, gene symbol and pathway ID) from BioPlanet.
14 15
 }
15 16
 \examples{
16 17
 \dontrun{
... ...
@@ -7,9 +7,9 @@
7 7
 fetch_go_from_go(species)
8 8
 }
9 9
 \arguments{
10
-\item{species}{Root name for species file under
11
-\code{http://current.geneontology.org/annotations}. Examples are
12
-\code{"goa_human"} for human, \code{"mgi"} for mouse or \code{"sgd"} for yeast.}
10
+\item{species}{Species designation. Base file name for species file under
11
+\url{http://current.geneontology.org/annotations}. Examples are
12
+\file{goa_human} for human, \file{mgi} for mouse or \file{sgd} for yeast.}
13 13
 }
14 14
 \value{
15 15
 A list with \code{terms} and \code{mapping} tibbles.
... ...
@@ -11,7 +11,8 @@ fetch_go_genes_bm(mart)
11 11
 database, created with, e.g., \code{useEnsembl}.}
12 12
 }
13 13
 \value{
14
-A tibble with ensembl_gene_is, gene_symbol and term_id.
14
+A tibble with columns \code{ensembl_gene_id}, \code{gene_symbol} and
15
+  \code{term_id}.
15 16
 }
16 17
 \description{
17 18
 Download GO term gene mapping from Ensembl
... ...
@@ -7,12 +7,12 @@
7 7
 fetch_go_genes_go(species)
8 8
 }
9 9
 \arguments{
10
-\item{species}{Root name for species file under
11
-\code{http://current.geneontology.org/annotations}. Examples are
12
-\code{goa_human} for human, \code{mgi} for mouse or \code{sgd} for yeast.}
10
+\item{species}{Species designation. Base file name for species file under
11
+\url{http://current.geneontology.org/annotations}. Examples are
12
+\file{goa_human} for human, \file{mgi} for mouse or \file{sgd} for yeast.}
13 13
 }
14 14
 \value{
15
-A tibble with gene_name, uniprot_id and term_id.
15
+A tibble with columns \code{gene_symbol}, \code{uniprot_id} and \code{term_id}.
16 16
 }
17 17
 \description{
18 18
 Download GO term gene mapping from geneontology.org
... ...
@@ -7,7 +7,7 @@
7 7
 fetch_kegg(species, batch_size = 10)
8 8
 }
9 9
 \arguments{
10
-\item{species}{KEGG species code, for example \code{"hsa"} for human. The
10
+\item{species}{KEGG species code, for example "hsa" for human. The
11 11
 full list of available KEGG species can be found by using
12 12
 \code{KEGGREST::keggList("organism")}. The column \code{organism} contains
13 13
 the codes used here.}
... ...
@@ -23,7 +23,7 @@ Download information (pathway ID and name) and gene-pathway mapping (entrez
23 23
 gene ID, gene symbol and pathway ID) from KEGG. Gene symbols are extracted
24 24
 from gene descriptions. For some species (e.g. yeast), gene symbols are
25 25
 returned instead of entrez IDs and not in gene description. This function is
26
-based on BioConductor package \code{KEGGREST}.
26
+based on BioConductor package \pkg{KEGGREST}.
27 27
 }
28 28
 \examples{
29 29
 \dontrun{
... ...
@@ -7,8 +7,8 @@
7 7
 fetch_reactome(species)
8 8
 }
9 9
 \arguments{
10
-\item{species}{Reactome species designation, for example \code{"Homo
11
-sapiens"} for human. Full list of available species can be found using
10
+\item{species}{Reactome species designation, for example "Homo
11
+sapiens" for human. Full list of available species can be found using
12 12
 \code{fetch_reactome_species()}.}
13 13
 }
14 14
 \value{
... ...
@@ -10,7 +10,7 @@ fetch_reactome_ensembl_genes(spec)
10 10
 \item{spec}{Reactome species}
11 11
 }
12 12
 \value{
13
-A tibble with \code{gene_id} and \code{term_id}
13
+A tibble with columns \code{gene_id} and \code{term_id}
14 14
 }
15 15
 \description{
16 16
 Download term Ensembl gene ID mapping from Reactome
... ...
@@ -10,7 +10,7 @@ fetch_reactome_pathways(spec)
10 10
 \item{spec}{Reactome species}
11 11
 }
12 12
 \value{
13
-A tibble with \code{term_id} and \code{term_name}
13
+A tibble with columns \code{term_id} and \code{term_name}
14 14
 }
15 15
 \description{
16 16
 Download pathway data from Reactome
... ...
@@ -7,7 +7,7 @@
7 7
 fetch_reactome_species()
8 8
 }
9 9
 \value{
10
-A character vector with species names used by Reactome
10
+A character vector with species names used by Reactome.
11 11
 }
12 12
 \description{
13 13
 List of available Reactome species
... ...
@@ -19,8 +19,11 @@ background for enrichment.}
19 19
 
20 20
 \item{feat_sel}{A character vector with feature identifiers in the selection.}
21 21
 
22
-\item{term_data}{An object class \code{fenr_terms}, as explained in details.
23
-It can be created using \code{prepare_for_enrichment}.}
22
+\item{term_data}{An object class \code{fenr_terms}, created by
23
+\code{prepare_for_enrichment}. It is a list of three elements: \itemize{
24
+\item{\code{term2info} - a named vector term id => term name}
25
+\item{\code{term2feature} - a list term id => vector of feature ids}
26
+\item{\code{feature2term} - a list feature id => vector of term ids}}}
24 27
 
25 28
 \item{feat2name}{An optional named list to convert feature id into feature
26 29
 name.}
... ...
@@ -32,14 +35,16 @@ used.}
32 35
 }
33 36
 \value{
34 37
 A tibble with enrichment results. For each term the following
35
-  quantities are reported: N_with - number of features with this term in the
36
-  among all features, n_with_sel - number of features with this term in the
37
-  selection, n_expect - expected number of features with this term in the
38
-  selection, under the null hypothesis that terms are assigned to features
39
-  randomly, enrichment - ratio of n_with_sel / n_expect; odds_ratio - odds
40
-  ratio for enrichment, p_value - p-value from a single hypergeometric test;
41
-  p_adjust - p-value adjusted for multiple tests using Benjamini-Hochberg
42
-  approach.
38
+  quantities are reported: \itemize{ \item{\code{N_with} - number of features
39
+  with this term in the among all features} \item{\code{n_with_sel} - number of
40
+  features with this term in the selection} \item{\code{n_expect} - expected
41
+  number of features with this term in the selection, under the null
42
+  hypothesis that terms are assigned to features randomly}
43
+  \item{\code{enrichment} - ratio of n_with_sel / n_expect}
44
+  \item{\code{odds_ratio} - odds ratio for enrichment; is infinite, when all
45
+  features with the given term are in the selection} \item{\code{p_value} -
46
+  p-value from a single hypergeometric test} \item{\code{p_adjust} - p-value
47
+  adjusted for multiple tests using Benjamini-Hochberg approach} }
43 48
 }
44 49
 \description{
45 50
 Fast functional enrichment based on hypergeometric distribution. Can be used
... ...
@@ -47,12 +52,9 @@ in interactive applications.
47 52
 }
48 53
 \details{
49 54
 Functional enrichment in a selection (e.g. significantly DE features) of
50
-features, using hypergeometric probability. A feature can be a gene, protein,
51
-etc. `term_data` is an object with functional term information and
52
-feature-term assignment. It is a list of: `term2info` - a named vector term
53
-id => term name; `term2feature` - a list term_id => vector of feature_ids;
54
-`feature2term` - a list feature id => vector of term ids. It can be created
55
-by `prepare_for_enrichment` function.
55
+features, using hypergeometric probability (that is Fisher's exact test). A
56
+feature can be a gene, protein, etc. \code{term_data} is an object with
57
+functional term information and feature-term assignment.
56 58
 }
57 59
 \examples{
58 60
 bp <- fetch_bp()
... ...
@@ -18,17 +18,17 @@ prepare_for_enrichment(
18 18
 \item{mapping}{Information about term-feature mapping. A tibble with
19 19
 \code{term_id} and a feature id, as identified with \code{feature_name}
20 20
 argument. For example, if this tibble contains \code{gene_symbol} and
21
-\code{term_id}, then \code{feature_name = "gene_symbol"}.}
21
+\code{term_id}, then you need to set \code{feature_name = "gene_symbol"}.}
22 22
 
23 23
 \item{all_features}{A vector with all feature ids used as background for
24
-enrichment. If not specified, all features from \code{mapping} will be
25
-used, resulting in a larger objects size.}
24
+enrichment. If not specified, all features found in \code{mapping} will be
25
+used, resulting in a larger object size.}
26 26
 
27 27
 \item{feature_name}{Which column to use from mapping table, e.g.
28
-"gene_symbol" or "ensembl_gene_id".}
28
+\code{gene_symbol} or \code{ensembl_gene_id}.}
29 29
 }
30 30
 \value{
31
-An object class \code{fterms} required by
31
+An object class \code{fenr_terms} required by
32 32
   \code{functional_enrichment}.
33 33
 }
34 34
 \description{
... ...
@@ -19,5 +19,5 @@ arises a warning message will be displayed. Default is `FALSE`.}
19 19
 to `httr::HEAD()` and/or `httr::GET()`}
20 20
 }
21 21
 \description{
22
-From https://stackoverflow.com/questions/52911812/check-if-url-exists-in-r.
22
+From \url{https://stackoverflow.com/questions/52911812/check-if-url-exists-in-r}.
23 23
 }