Browse code

updated

Xu Ren authored on 23/06/2018 21:23:13
Showing 20 changed files

... ...
@@ -1,7 +1,7 @@
1 1
 Package: methylGSA
2 2
 Type: Package
3 3
 Title: methylGSA: Gene Set Analysis Using the Outcome of Differential Methylation
4
-Version: 0.99.11
4
+Version: 0.99.12
5 5
 Authors@R: c(
6 6
     person("Xu", "Ren", 
7 7
         email = "[email protected]", role = c("aut", "cre")),
... ...
@@ -17,7 +17,6 @@ Imports:
17 17
     RobustRankAggreg,
18 18
     stringr,
19 19
     stats,
20
-    minfi,
21 20
     clusterProfiler,
22 21
     missMethyl,
23 22
     org.Hs.eg.db,
... ...
@@ -1,8 +1,10 @@
1 1
 # Generated by roxygen2: do not edit by hand
2 2
 
3
+export(getGS)
3 4
 export(methylRRA)
4 5
 export(methylglm)
5 6
 export(methylgometh)
7
+export(prepareAnnot)
6 8
 import(IlluminaHumanMethylation450kanno.ilmn12.hg19)
7 9
 import(IlluminaHumanMethylationEPICanno.ilm10b2.hg19)
8 10
 import(RobustRankAggreg)
... ...
@@ -11,7 +13,6 @@ import(reactome.db)
11 13
 import(stats)
12 14
 importFrom(AnnotationDbi,select)
13 15
 importFrom(clusterProfiler,GSEA)
14
-importFrom(minfi,getAnnotation)
15 16
 importFrom(missMethyl,gometh)
16 17
 importFrom(missMethyl,gsameth)
17 18
 importFrom(stringr,str_length)
... ...
@@ -1,3 +1,6 @@
1
+# methylGSA 0.99.10 
2
+* Support user-supplied mapping between CpGs and Genes
3
+
1 4
 # methylGSA 0.99.7  
2 5
 * Development version   
3 6
 * Changed package name to methylGSA
... ...
@@ -5,7 +5,6 @@
5 5
 #' @import IlluminaHumanMethylation450kanno.ilmn12.hg19
6 6
 #' @import IlluminaHumanMethylationEPICanno.ilm10b2.hg19
7 7
 #' @importFrom stringr str_length
8
-#' @importFrom minfi getAnnotation
9 8
 #' @details The implementation of the function is modified
10 9
 #' from .flattenAnn function in missMethyl package.
11 10
 #' @return A data frame contains CpG IDs and gene symbols.
... ...
@@ -20,13 +19,9 @@
20 19
 
21 20
 getAnnot = function(array.type){
22 21
     if(array.type=="450K")
23
-        FullAnnot = getAnnotation(
24
-            IlluminaHumanMethylation450kanno.ilmn12.hg19
25
-            ::IlluminaHumanMethylation450kanno.ilmn12.hg19)
22
+        FullAnnot = getAnnotation(IlluminaHumanMethylation450kanno.ilmn12.hg19)
26 23
     else
27
-        FullAnnot = getAnnotation(
28
-            IlluminaHumanMethylationEPICanno.ilm10b2.hg19
29
-            ::IlluminaHumanMethylationEPICanno.ilm10b2.hg19)
24
+        FullAnnot = getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b2.hg19)
30 25
 
31 26
     FullAnnot = FullAnnot[,c("Name","UCSC_RefGene_Name")]
32 27
     FullAnnot = FullAnnot[str_length(rownames(FullAnnot))==10,]
... ...
@@ -1,8 +1,10 @@
1 1
 #' @title Get Gene Sets
2 2
 #'
3 3
 #' @description This function gets gene sets information.
4
-#' @param geneids A vector contains all gene ids of interest.
4
+#' @param geneids A vector contains all gene ids of interest. Gene ids should
5
+#' be gene symbol.
5 6
 #' @param GS.type A string. "GO", "KEGG", or "Reactome".
7
+#' @export
6 8
 #' @import org.Hs.eg.db
7 9
 #' @import reactome.db
8 10
 #' @importFrom AnnotationDbi select
... ...
@@ -12,22 +14,26 @@
12 14
 #' Genome wide annotation for Human. R package version 3.5.0.
13 15
 #' @references Ligtenberg W (2017). reactome.db:
14 16
 #' A set of annotation maps for reactome. R package version 1.62.0.
17
+#' @examples
18
+#' geneids = c("FKBP5", "NDUFA1", "STAT5B")
19
+#' GO.list = getGS(geneids, "KEGG")
20
+#' head(GO.list)
15 21
 
16 22
 getGS = function(geneids, GS.type){
17
-    message("retrieving", GS.type, "sets...")
23
+    message("retrieving ", GS.type, " sets...")
18 24
     if(GS.type == "KEGG")
19 25
         GS.type = "PATH"
20 26
     if(GS.type == "Reactome"){
21 27
         ## first convert id to entrezid to use reactome.db
22 28
         gene.entrez = suppressMessages(
23 29
             select(org.Hs.eg.db, geneids,
24
-                   columns = "ENTREZID",keytype = "SYMBOL")$ENTREZID)
30
+                        columns = "ENTREZID",keytype = "SYMBOL")$ENTREZID)
25 31
         GOdf = suppressMessages(
26 32
             select(reactome.db, gene.entrez,
27
-                   columns = "REACTOMEID", keytype = "ENTREZID"))
33
+                        columns = "REACTOMEID", keytype = "ENTREZID"))
28 34
         genesymbol = suppressMessages(
29 35
             select(org.Hs.eg.db, GOdf$ENTREZID,
30
-                   columns = "SYMBOL", keytype = "ENTREZID")$SYMBOL)
36
+                        columns = "SYMBOL", keytype = "ENTREZID")$SYMBOL)
31 37
         GS.type = "REACTOMEID"
32 38
 
33 39
     }
... ...
@@ -35,7 +41,7 @@ getGS = function(geneids, GS.type){
35 41
         GOs = suppressMessages(
36 42
             na.omit(unique(
37 43
                 select(org.Hs.eg.db, geneids,
38
-                       GS.type,keytype = "SYMBOL")[,GS.type])))
44
+                            GS.type,keytype = "SYMBOL")[,GS.type])))
39 45
         GOdf = suppressMessages(
40 46
             select(org.Hs.eg.db, GOs, "SYMBOL", keytype = GS.type))
41 47
         genesymbol = GOdf$SYMBOL
... ...
@@ -6,9 +6,9 @@
6 6
 #' @param cpg.pval A named vector containing p-values of differential
7 7
 #' methylation test. Names should be CpG IDs.
8 8
 #' @param array.type A string. Either "450K" or "EPIC". Default is "450K".
9
-#' This argument will be ignore if CpG2Gene is provided.
10
-#' @param CpG2Gene A matrix or data frame with 1st column CpG ID and 2nd
11
-#' column gene name. Default is NULL.
9
+#' This argument will be ignored if FullAnnot is provided.
10
+#' @param FullAnnot A data frame provided by prepareAnnot function.
11
+#' Default is NULL.
12 12
 #' @param method A string. "ORA" or "GSEA". Default is "ORA"
13 13
 #' @param GS.list A list. Default is NULL. If there is no input list,
14 14
 #' Gene Ontology is used. Entry names are gene sets names, and elements
... ...
@@ -42,11 +42,12 @@
42 42
 #' data(cpgtoy)
43 43
 #' data(GSlisttoy)
44 44
 #' GS.list = GS.list[1:10]
45
-#' res1 = methylRRA(cpg.pval = cpg.pval, CpG2Gene = CpG2Gene,
45
+#' FullAnnot = prepareAnnot(CpG2Gene)
46
+#' res1 = methylRRA(cpg.pval = cpg.pval, FullAnnot = FullAnnot,
46 47
 #' method = "ORA", GS.list = GS.list)
47 48
 #' head(res1)
48 49
 
49
-methylRRA <- function(cpg.pval, array.type = "450K", CpG2Gene = NULL,
50
+methylRRA <- function(cpg.pval, array.type = "450K", FullAnnot = NULL,
50 51
                             method = "ORA", GS.list=NULL, GS.idtype = "SYMBOL",
51 52
                             GS.type = "GO", minsize = 100, maxsize = 500){
52 53
     if(!is.vector(cpg.pval) | !is.numeric(cpg.pval) | is.null(names(cpg.pval)))
... ...
@@ -64,18 +65,7 @@ methylRRA <- function(cpg.pval, array.type = "450K", CpG2Gene = NULL,
64 65
                         keytype = GS.idtype)$SYMBOL))
65 66
     GS.type = match.arg(GS.type, c("GO", "KEGG", "Reactome"))
66 67
 
67
-    if(!is.null(CpG2Gene)){
68
-        if(!is.character(CpG2Gene[,1])|!is.character(CpG2Gene[,2]))
69
-            stop("CpG2Gene should be a matrix or data frame with
70
-                    1st column CpG ID and 2nd column gene name")
71
-        if(ncol(CpG2Gene)!=2)
72
-            stop("CpG2Gene should contain two columns")
73
-        FullAnnot = data.frame(CpG2Gene)
74
-        colnames(FullAnnot) = c("Name", "UCSC_RefGene_Name")
75
-        rownames(FullAnnot) = FullAnnot$Name
76
-    }
77
-
78
-    else{
68
+    if(is.null(FullAnnot)){
79 69
         if(array.type!="450K" & array.type!="EPIC")
80 70
             stop("Input array type should be either 450K or EPIC")
81 71
         if(array.type=="450K")
... ...
@@ -6,9 +6,9 @@
6 6
 #' @param cpg.pval A named vector containing p-values of differential
7 7
 #' methylation test. Names should be CpG IDs.
8 8
 #' @param array.type A string. Either "450K" or "EPIC". Default is "450K".
9
-#' This argument will be ignore if CpG2Gene is provided.
10
-#' @param CpG2Gene A matrix or data frame with 1st column CpG ID and 2nd
11
-#' column gene name. Default is NULL.
9
+#' This argument will be ignored if FullAnnot is provided.
10
+#' @param FullAnnot A data frame provided by prepareAnnot function.
11
+#' Default is NULL.
12 12
 #' @param GS.list A list. Default is NULL. If there is no input list,
13 13
 #' Gene Ontology is used. Entry names are gene sets names, and elements
14 14
 #' correpond to genes that gene sets contain.
... ...
@@ -39,11 +39,12 @@
39 39
 #' data(cpgtoy)
40 40
 #' data(GSlisttoy)
41 41
 #' GS.list = GS.list[1:10]
42
-#' res = methylglm(cpg.pval = cpg.pval, CpG2Gene = CpG2Gene, GS.list = GS.list,
43
-#' GS.idtype = "SYMBOL")
42
+#' FullAnnot = prepareAnnot(CpG2Gene)
43
+#' res = methylglm(cpg.pval = cpg.pval, FullAnnot = FullAnnot,
44
+#' GS.list = GS.list, GS.idtype = "SYMBOL")
44 45
 #' head(res)
45 46
 
46
-methylglm <- function(cpg.pval, array.type = "450K", CpG2Gene = NULL,
47
+methylglm <- function(cpg.pval, array.type = "450K", FullAnnot = NULL,
47 48
                             GS.list=NULL, GS.idtype = "SYMBOL", GS.type = "GO",
48 49
                             minsize = 100, maxsize = 500){
49 50
     if(!is.vector(cpg.pval) | !is.numeric(cpg.pval) | is.null(names(cpg.pval)))
... ...
@@ -62,18 +63,7 @@ methylglm <- function(cpg.pval, array.type = "450K", CpG2Gene = NULL,
62 63
                             keytype = GS.idtype)$SYMBOL))
63 64
     GS.type = match.arg(GS.type, c("GO", "KEGG", "Reactome"))
64 65
 
65
-    if(!is.null(CpG2Gene)){
66
-        if(!is.character(CpG2Gene[,1])|!is.character(CpG2Gene[,2]))
67
-            stop("CpG2Gene should be a matrix or data frame with
68
-                    1st column CpG ID and 2nd column gene name")
69
-        if(ncol(CpG2Gene)!=2)
70
-            stop("CpG2Gene should contain two columns")
71
-        FullAnnot = data.frame(CpG2Gene)
72
-        colnames(FullAnnot) = c("Name", "UCSC_RefGene_Name")
73
-        rownames(FullAnnot) = FullAnnot$Name
74
-    }
75
-
76
-    else{
66
+    if(is.null(FullAnnot)){
77 67
         if(array.type!="450K" & array.type!="EPIC")
78 68
             stop("Input array type should be either 450K or EPIC")
79 69
         if(array.type=="450K")
... ...
@@ -11,7 +11,7 @@
11 11
 #' Gene Ontology is used. Entry names are gene sets names, and elements
12 12
 #' correpond to genes that gene sets contain.
13 13
 #' @param GS.idtype A string. "SYMBOL", "ENSEMBL", "ENTREZID" or "REFSEQ".
14
-#' Default is "SYMBOL"
14
+#' Default is "SYMBOL".
15 15
 #' @param GS.type A string. "GO", "KEGG", or "Reactome"
16 16
 #' @param minsize An integer. If the number of genes in a gene set
17 17
 #' is less than this integer, this gene set is not tested. Default is 100.
... ...
@@ -34,11 +34,13 @@
34 34
 #' @references Carlson M (2017). org.Hs.eg.db: Genome wide annotation
35 35
 #' for Human. R package version 3.5.0.
36 36
 #' @examples
37
+#' \dontrun{
37 38
 #' library(IlluminaHumanMethylation450kanno.ilmn12.hg19)
38 39
 #' data(cpgtoy)
39 40
 #' res = methylgometh(cpg.pval = cpg.pval, sig.cut = 0.001, GS.type = "KEGG",
40 41
 #' minsize = 200, maxsize = 205)
41 42
 #' head(res)
43
+#' }
42 44
 
43 45
 
44 46
 methylgometh <- function(cpg.pval, sig.cut, array.type = "450K",
45 47
new file mode 100644
... ...
@@ -0,0 +1,53 @@
1
+#' @title Prepare user-supplied mapping between CpGs and genes.
2
+#'
3
+#' @description This function prepares CpG to gene mapping which will be
4
+#' used by methylRRA and methylglm.
5
+#' @param CpG2Gene A matrix, or a data frame or a list contains CpG to gene
6
+#' mapping. For a matrix or data frame, 1st column should be CpG ID and 2nd
7
+#' column should be gene name. For a list, entry names should be gene names,
8
+#' and elements correpond to CpG IDs.
9
+#' @param geneidtype A string. "SYMBOL", "ENSEMBL", "ENTREZID" or "REFSEQ".
10
+#' Default is "SYMBOL".
11
+#' @export
12
+#' @import org.Hs.eg.db
13
+#' @importFrom AnnotationDbi select
14
+#' @return A data frame contains ready to use CpG to gene mapping.
15
+#' @references Carlson M (2017). org.Hs.eg.db:
16
+#' Genome wide annotation for Human. R package version 3.5.0.
17
+#' @examples
18
+#' data(CpG2Genetoy)
19
+#' FullAnnot = prepareAnnot(CpG2Gene)
20
+#' head(FullAnnot)
21
+
22
+prepareAnnot <- function(CpG2Gene, geneidtype = "SYMBOL"){
23
+    geneidtype = match.arg(
24
+        geneidtype,c("SYMBOL", "ENSEMBL", "ENTREZID", "REFSEQ"))
25
+
26
+    if(is.matrix(CpG2Gene)|is.data.frame(CpG2Gene)){
27
+        if(!is.character(CpG2Gene[,1]))
28
+            stop("CpG ID should be characters")
29
+        if(ncol(CpG2Gene)!=2)
30
+            stop("CpG2Gene should contain two columns")
31
+        FullAnnot = data.frame(CpG2Gene)
32
+    }
33
+    else if(is.list(CpG2Gene)){
34
+        FullAnnot = data.frame(
35
+            CpG = unlist(CpG2Gene),
36
+            gene = rep(names(CpG2Gene),vapply(CpG2Gene, length, FUN.VALUE = 0)))
37
+    }
38
+    else
39
+        stop("CpG2Gene should be a matrix or a data frame or a list.")
40
+
41
+    colnames(FullAnnot) = c("Name", "UCSC_RefGene_Name")
42
+
43
+    if(geneidtype!="SYMBOL"){
44
+        temp = suppressMessages(
45
+            select(org.Hs.eg.db, FullAnnot$UCSC_RefGene_Name,
46
+                        columns = "SYMBOL",keytype = geneidtype))
47
+        FullAnnot$UCSC_RefGene_Name = temp$SYMBOL
48
+    }
49
+    rownames(FullAnnot) = FullAnnot$Name
50
+    return(FullAnnot)
51
+}
52
+
53
+
... ...
@@ -19,6 +19,7 @@ NULL
19 19
 #' @keywords datasets
20 20
 NULL
21 21
 
22
+
22 23
 #' @title An example of user user-supplied mapping between CpGs and genes
23 24
 #'
24 25
 #' @description An example of user user-supplied mapping between CpGs and genes
25 26
Binary files a/data/CpG2Genetoy.RData and b/data/CpG2Genetoy.RData differ
... ...
@@ -13,6 +13,7 @@ FullAnnot$UCSC_RefGene_Name = temp
13 13
 colnames(FullAnnot) = c("CpG", "Gene")
14 14
 rownames(FullAnnot) = NULL
15 15
 CpG2Gene = FullAnnot
16
+CpG2Gene = data.frame(CpG2Gene)
16 17
 
17 18
 save(CpG2Gene, file = 'data/CpG2Genetoy.RData', compress = 'xz')
18 19
 
... ...
@@ -7,7 +7,8 @@
7 7
 getGS(geneids, GS.type)
8 8
 }
9 9
 \arguments{
10
-\item{geneids}{A vector contains all gene ids of interest.}
10
+\item{geneids}{A vector contains all gene ids of interest. Gene ids should
11
+be gene symbol.}
11 12
 
12 13
 \item{GS.type}{A string. "GO", "KEGG", or "Reactome".}
13 14
 }
... ...
@@ -18,6 +19,11 @@ interest and their corresponding genes.
18 19
 \description{
19 20
 This function gets gene sets information.
20 21
 }
22
+\examples{
23
+geneids = c("FKBP5", "NDUFA1", "STAT5B")
24
+GO.list = getGS(geneids, "KEGG")
25
+head(GO.list)
26
+}
21 27
 \references{
22 28
 Carlson M (2017). org.Hs.eg.db:
23 29
 Genome wide annotation for Human. R package version 3.5.0.
... ...
@@ -5,7 +5,7 @@
5 5
 \title{Enrichment analysis after adjusting multiple p-values of
6 6
 each gene by Robust Rank Aggregation}
7 7
 \usage{
8
-methylRRA(cpg.pval, array.type = "450K", CpG2Gene = NULL, method = "ORA",
8
+methylRRA(cpg.pval, array.type = "450K", FullAnnot = NULL, method = "ORA",
9 9
   GS.list = NULL, GS.idtype = "SYMBOL", GS.type = "GO", minsize = 100,
10 10
   maxsize = 500)
11 11
 }
... ...
@@ -14,10 +14,10 @@ methylRRA(cpg.pval, array.type = "450K", CpG2Gene = NULL, method = "ORA",
14 14
 methylation test. Names should be CpG IDs.}
15 15
 
16 16
 \item{array.type}{A string. Either "450K" or "EPIC". Default is "450K".
17
-This argument will be ignore if CpG2Gene is provided.}
17
+This argument will be ignored if FullAnnot is provided.}
18 18
 
19
-\item{CpG2Gene}{A matrix or data frame with 1st column CpG ID and 2nd
20
-column gene name. Default is NULL.}
19
+\item{FullAnnot}{A data frame provided by prepareAnnot function.
20
+Default is NULL.}
21 21
 
22 22
 \item{method}{A string. "ORA" or "GSEA". Default is "ORA"}
23 23
 
... ...
@@ -48,7 +48,8 @@ data(CpG2Genetoy)
48 48
 data(cpgtoy)
49 49
 data(GSlisttoy)
50 50
 GS.list = GS.list[1:10]
51
-res1 = methylRRA(cpg.pval = cpg.pval, CpG2Gene = CpG2Gene,
51
+FullAnnot = prepareAnnot(CpG2Gene)
52
+res1 = methylRRA(cpg.pval = cpg.pval, FullAnnot = FullAnnot,
52 53
 method = "ORA", GS.list = GS.list)
53 54
 head(res1)
54 55
 }
... ...
@@ -5,7 +5,7 @@
5 5
 \title{Implement logistic regression adjusting
6 6
 for number of probes in enrichment analysis}
7 7
 \usage{
8
-methylglm(cpg.pval, array.type = "450K", CpG2Gene = NULL, GS.list = NULL,
8
+methylglm(cpg.pval, array.type = "450K", FullAnnot = NULL, GS.list = NULL,
9 9
   GS.idtype = "SYMBOL", GS.type = "GO", minsize = 100, maxsize = 500)
10 10
 }
11 11
 \arguments{
... ...
@@ -13,10 +13,10 @@ methylglm(cpg.pval, array.type = "450K", CpG2Gene = NULL, GS.list = NULL,
13 13
 methylation test. Names should be CpG IDs.}
14 14
 
15 15
 \item{array.type}{A string. Either "450K" or "EPIC". Default is "450K".
16
-This argument will be ignore if CpG2Gene is provided.}
16
+This argument will be ignored if FullAnnot is provided.}
17 17
 
18
-\item{CpG2Gene}{A matrix or data frame with 1st column CpG ID and 2nd
19
-column gene name. Default is NULL.}
18
+\item{FullAnnot}{A data frame provided by prepareAnnot function.
19
+Default is NULL.}
20 20
 
21 21
 \item{GS.list}{A list. Default is NULL. If there is no input list,
22 22
 Gene Ontology is used. Entry names are gene sets names, and elements
... ...
@@ -49,8 +49,9 @@ data(CpG2Genetoy)
49 49
 data(cpgtoy)
50 50
 data(GSlisttoy)
51 51
 GS.list = GS.list[1:10]
52
-res = methylglm(cpg.pval = cpg.pval, CpG2Gene = CpG2Gene, GS.list = GS.list,
53
-GS.idtype = "SYMBOL")
52
+FullAnnot = prepareAnnot(CpG2Gene)
53
+res = methylglm(cpg.pval = cpg.pval, FullAnnot = FullAnnot,
54
+GS.list = GS.list, GS.idtype = "SYMBOL")
54 55
 head(res)
55 56
 }
56 57
 \references{
... ...
@@ -21,7 +21,7 @@ Gene Ontology is used. Entry names are gene sets names, and elements
21 21
 correpond to genes that gene sets contain.}
22 22
 
23 23
 \item{GS.idtype}{A string. "SYMBOL", "ENSEMBL", "ENTREZID" or "REFSEQ".
24
-Default is "SYMBOL"}
24
+Default is "SYMBOL".}
25 25
 
26 26
 \item{GS.type}{A string. "GO", "KEGG", or "Reactome"}
27 27
 
... ...
@@ -39,12 +39,14 @@ This function calls gometh or gsameth function
39 39
 in missMethyl package to adjust number of probes in gene set testing
40 40
 }
41 41
 \examples{
42
+\dontrun{
42 43
 library(IlluminaHumanMethylation450kanno.ilmn12.hg19)
43 44
 data(cpgtoy)
44 45
 res = methylgometh(cpg.pval = cpg.pval, sig.cut = 0.001, GS.type = "KEGG",
45 46
 minsize = 200, maxsize = 205)
46 47
 head(res)
47 48
 }
49
+}
48 50
 \references{
49 51
 Phipson, B., Maksimovic, J., and Oshlack, A. (2015).
50 52
 missMethyl: an R package for analysing methylation data from Illuminas
51 53
new file mode 100644
... ...
@@ -0,0 +1,33 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/prepareAnnot.R
3
+\name{prepareAnnot}
4
+\alias{prepareAnnot}
5
+\title{Prepare user-supplied mapping between CpGs and genes.}
6
+\usage{
7
+prepareAnnot(CpG2Gene, geneidtype = "SYMBOL")
8
+}
9
+\arguments{
10
+\item{CpG2Gene}{A matrix, or a data frame or a list contains CpG to gene
11
+mapping. For a matrix or data frame, 1st column should be CpG ID and 2nd
12
+column should be gene name. For a list, entry names should be gene names,
13
+and elements correpond to CpG IDs.}
14
+
15
+\item{geneidtype}{A string. "SYMBOL", "ENSEMBL", "ENTREZID" or "REFSEQ".
16
+Default is "SYMBOL".}
17
+}
18
+\value{
19
+A data frame contains ready to use CpG to gene mapping.
20
+}
21
+\description{
22
+This function prepares CpG to gene mapping which will be
23
+used by methylRRA and methylglm.
24
+}
25
+\examples{
26
+data(CpG2Genetoy)
27
+FullAnnot = prepareAnnot(CpG2Gene)
28
+head(FullAnnot)
29
+}
30
+\references{
31
+Carlson M (2017). org.Hs.eg.db:
32
+Genome wide annotation for Human. R package version 3.5.0.
33
+}
... ...
@@ -3,6 +3,13 @@ context("Test internal functions")
3 3
 library(org.Hs.eg.db)
4 4
 library(reactome.db)
5 5
 data(GSlisttoy)
6
+data(CpG2Genetoy)
7
+
8
+test_that("check prepareAnnot", {
9
+    colnames(CpG2Gene) = c("Name", "UCSC_RefGene_Name")
10
+    rownames(CpG2Gene) = CpG2Gene$Name
11
+    expect_identical(prepareAnnot(CpG2Gene), CpG2Gene)
12
+})
6 13
 
7 14
 test_that("check getGS", {
8 15
     geneids = GS.list[[1]]
... ...
@@ -28,7 +35,6 @@ test_that("check getGS", {
28 35
 
29 36
     expect_identical(getGS(geneids, "KEGG"), KEGG.list)
30 37
 
31
-
32 38
     gene.entrez = suppressMessages(
33 39
         select(org.Hs.eg.db, geneids,
34 40
                columns = "ENTREZID",keytype = "SYMBOL")$ENTREZID)
... ...
@@ -4,9 +4,10 @@ data(CpG2Genetoy)
4 4
 data(cpgtoy)
5 5
 data(GSlisttoy)
6 6
 GS.list = GS.list[1:10]
7
+FullAnnot = prepareAnnot(CpG2Gene)
7 8
 
8 9
 test_that("check for valid output", {
9
-    res1 = methylglm(cpg.pval = cpg.pval, CpG2Gene = CpG2Gene,
10
+    res1 = methylglm(cpg.pval = cpg.pval, FullAnnot = FullAnnot,
10 11
                      GS.list = GS.list, GS.idtype = "SYMBOL",
11 12
                      minsize = 100, maxsize = 300)
12 13
     expect_is(res1, 'data.frame')
... ...
@@ -15,7 +16,7 @@ test_that("check for valid output", {
15 16
     expect_true(all(res1$padj>=0 & res1$padj<=1))
16 17
     expect_true(all(colnames(res1) %in% c("ID", "size", "pvalue", "padj")))
17 18
 
18
-    res2 = methylRRA(cpg.pval = cpg.pval, CpG2Gene = CpG2Gene,
19
+    res2 = methylRRA(cpg.pval = cpg.pval, FullAnnot = FullAnnot,
19 20
                      method = "ORA", GS.list = GS.list)
20 21
     expect_is(res2, 'data.frame')
21 22
     expect_equal(dim(res2)[2], 4)
... ...
@@ -23,7 +24,7 @@ test_that("check for valid output", {
23 24
     expect_true(all(res2$padj>=0 & res2$padj<=1))
24 25
     expect_true(all(colnames(res2) %in% c("ID", "size", "pvalue", "padj")))
25 26
 
26
-    res3 = methylRRA(cpg.pval = cpg.pval, CpG2Gene = CpG2Gene,
27
+    res3 = methylRRA(cpg.pval = cpg.pval, FullAnnot = FullAnnot,
27 28
                      method = "GSEA", GS.list = GS.list)
28 29
     expect_is(res3, 'data.frame')
29 30
     expect_equal(dim(res3)[2], 7)
... ...
@@ -1,6 +1,6 @@
1 1
 ---
2 2
 title: "methylGSA: Gene Set Analysis for DNA Methylation Datasets"
3
-author: "Pei Fen Kuan and Xu Ren"
3
+author: "Xu Ren and Pei Fen Kuan"
4 4
 date: "`r Sys.Date()`"
5 5
 output: 
6 6
     rmarkdown::html_document:
... ...
@@ -42,7 +42,7 @@ weighted resampling and Wallenius non-central hypergeometric approximation.
42 42
 * Gene Ontology (via org.Hs.eg.db)
43 43
 * KEGG (via org.Hs.eg.db)
44 44
 * Reactome (via reactome.db)
45
-* User input gene sets. Supported input gene ID types:
45
+* User-supplied gene sets. Supported input gene ID types:
46 46
     + "SYMBOL"
47 47
     + "ENSEMBL"
48 48
     + "ENTREZID"
... ...
@@ -169,10 +169,10 @@ head(res4, 15)
169 169
 methylGSA provides many other options for users to customize the analysis. 
170 170
 
171 171
 * `array.type` is to specify which array type to use. It is either "450K" or 
172
-"EPIC". Default is "450K". This argument will be ignore if CpG2Gene is 
172
+"EPIC". Default is "450K". This argument will be ignored if `FullAnnot` is 
173 173
 provided.
174
-* `CpG2Gene` is user supplied mapping between CpG ID and gene. It should be 
175
-a matrix or data frame with 1st column CpG ID and 2nd column gene name.
174
+* `FullAnnot` is preprocessed mapping between CpG ID and gene name provided by
175
+prepareAnnot function. Default is NULL. Check example below for details. 
176 176
 * `GS.list` is user input gene sets to be tested. It should be a list with 
177 177
 entry names gene sets IDs and elements correpond to genes that gene sets 
178 178
 contain. If there is no input list, Gene Ontology is used.
... ...
@@ -197,17 +197,27 @@ data(GSlisttoy)
197 197
 head(lapply(GS.list, function(x) return(x[1:30])), 3)   
198 198
 ```
199 199
 
200
-This is an example of user-supplied CpG to gene mapping
200
+methylglm and methylRRA support user supplied CpG ID to gene mapping. The 
201
+mapping is expected to be a matrix, or a data frame or a list. For a 
202
+matrix or data frame, 1st column should be CpG ID and 2nd column should be gene 
203
+name. For a list, entry names should be gene names and elements correpond to 
204
+CpG IDs. This is an example of user-supplied CpG to gene mapping:
201 205
 ```{r}
202 206
 data(CpG2Genetoy)
203 207
 head(CpG2Gene)   
204 208
 ```
205 209
 
206
-Test the gene sets using "ORA" in methylRRA
210
+To use user supplied mapping in methylglm or methylRRA, first preprocess the
211
+mapping by prepareAnnot function
212
+```{r}
213
+FullAnnot = prepareAnnot(CpG2Gene) 
214
+```
207 215
 
216
+Test the gene sets using "ORA" in methylRRA, use `FullAnnot` argument to 
217
+provide the preprocessed CpG ID to gene mapping.
208 218
 ```{r}
209 219
 GS.list = GS.list[1:10]
210
-res5 = methylRRA(cpg.pval = cpg.pval, CpG2Gene = CpG2Gene, method = "ORA", 
220
+res5 = methylRRA(cpg.pval = cpg.pval, FullAnnot = FullAnnot, method = "ORA", 
211 221
                     GS.list = GS.list, GS.idtype = "SYMBOL", 
212 222
                     minsize = 100, maxsize = 300)
213 223
 head(res5, 10)