... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
Package: methylGSA |
2 | 2 |
Type: Package |
3 | 3 |
Title: methylGSA: Gene Set Analysis Using the Outcome of Differential Methylation |
4 |
-Version: 0.99.11 |
|
4 |
+Version: 0.99.12 |
|
5 | 5 |
Authors@R: c( |
6 | 6 |
person("Xu", "Ren", |
7 | 7 |
email = "[email protected]", role = c("aut", "cre")), |
... | ... |
@@ -17,7 +17,6 @@ Imports: |
17 | 17 |
RobustRankAggreg, |
18 | 18 |
stringr, |
19 | 19 |
stats, |
20 |
- minfi, |
|
21 | 20 |
clusterProfiler, |
22 | 21 |
missMethyl, |
23 | 22 |
org.Hs.eg.db, |
... | ... |
@@ -1,8 +1,10 @@ |
1 | 1 |
# Generated by roxygen2: do not edit by hand |
2 | 2 |
|
3 |
+export(getGS) |
|
3 | 4 |
export(methylRRA) |
4 | 5 |
export(methylglm) |
5 | 6 |
export(methylgometh) |
7 |
+export(prepareAnnot) |
|
6 | 8 |
import(IlluminaHumanMethylation450kanno.ilmn12.hg19) |
7 | 9 |
import(IlluminaHumanMethylationEPICanno.ilm10b2.hg19) |
8 | 10 |
import(RobustRankAggreg) |
... | ... |
@@ -11,7 +13,6 @@ import(reactome.db) |
11 | 13 |
import(stats) |
12 | 14 |
importFrom(AnnotationDbi,select) |
13 | 15 |
importFrom(clusterProfiler,GSEA) |
14 |
-importFrom(minfi,getAnnotation) |
|
15 | 16 |
importFrom(missMethyl,gometh) |
16 | 17 |
importFrom(missMethyl,gsameth) |
17 | 18 |
importFrom(stringr,str_length) |
... | ... |
@@ -5,7 +5,6 @@ |
5 | 5 |
#' @import IlluminaHumanMethylation450kanno.ilmn12.hg19 |
6 | 6 |
#' @import IlluminaHumanMethylationEPICanno.ilm10b2.hg19 |
7 | 7 |
#' @importFrom stringr str_length |
8 |
-#' @importFrom minfi getAnnotation |
|
9 | 8 |
#' @details The implementation of the function is modified |
10 | 9 |
#' from .flattenAnn function in missMethyl package. |
11 | 10 |
#' @return A data frame contains CpG IDs and gene symbols. |
... | ... |
@@ -20,13 +19,9 @@ |
20 | 19 |
|
21 | 20 |
getAnnot = function(array.type){ |
22 | 21 |
if(array.type=="450K") |
23 |
- FullAnnot = getAnnotation( |
|
24 |
- IlluminaHumanMethylation450kanno.ilmn12.hg19 |
|
25 |
- ::IlluminaHumanMethylation450kanno.ilmn12.hg19) |
|
22 |
+ FullAnnot = getAnnotation(IlluminaHumanMethylation450kanno.ilmn12.hg19) |
|
26 | 23 |
else |
27 |
- FullAnnot = getAnnotation( |
|
28 |
- IlluminaHumanMethylationEPICanno.ilm10b2.hg19 |
|
29 |
- ::IlluminaHumanMethylationEPICanno.ilm10b2.hg19) |
|
24 |
+ FullAnnot = getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b2.hg19) |
|
30 | 25 |
|
31 | 26 |
FullAnnot = FullAnnot[,c("Name","UCSC_RefGene_Name")] |
32 | 27 |
FullAnnot = FullAnnot[str_length(rownames(FullAnnot))==10,] |
... | ... |
@@ -1,8 +1,10 @@ |
1 | 1 |
#' @title Get Gene Sets |
2 | 2 |
#' |
3 | 3 |
#' @description This function gets gene sets information. |
4 |
-#' @param geneids A vector contains all gene ids of interest. |
|
4 |
+#' @param geneids A vector contains all gene ids of interest. Gene ids should |
|
5 |
+#' be gene symbol. |
|
5 | 6 |
#' @param GS.type A string. "GO", "KEGG", or "Reactome". |
7 |
+#' @export |
|
6 | 8 |
#' @import org.Hs.eg.db |
7 | 9 |
#' @import reactome.db |
8 | 10 |
#' @importFrom AnnotationDbi select |
... | ... |
@@ -12,22 +14,26 @@ |
12 | 14 |
#' Genome wide annotation for Human. R package version 3.5.0. |
13 | 15 |
#' @references Ligtenberg W (2017). reactome.db: |
14 | 16 |
#' A set of annotation maps for reactome. R package version 1.62.0. |
17 |
+#' @examples |
|
18 |
+#' geneids = c("FKBP5", "NDUFA1", "STAT5B") |
|
19 |
+#' GO.list = getGS(geneids, "KEGG") |
|
20 |
+#' head(GO.list) |
|
15 | 21 |
|
16 | 22 |
getGS = function(geneids, GS.type){ |
17 |
- message("retrieving", GS.type, "sets...") |
|
23 |
+ message("retrieving ", GS.type, " sets...") |
|
18 | 24 |
if(GS.type == "KEGG") |
19 | 25 |
GS.type = "PATH" |
20 | 26 |
if(GS.type == "Reactome"){ |
21 | 27 |
## first convert id to entrezid to use reactome.db |
22 | 28 |
gene.entrez = suppressMessages( |
23 | 29 |
select(org.Hs.eg.db, geneids, |
24 |
- columns = "ENTREZID",keytype = "SYMBOL")$ENTREZID) |
|
30 |
+ columns = "ENTREZID",keytype = "SYMBOL")$ENTREZID) |
|
25 | 31 |
GOdf = suppressMessages( |
26 | 32 |
select(reactome.db, gene.entrez, |
27 |
- columns = "REACTOMEID", keytype = "ENTREZID")) |
|
33 |
+ columns = "REACTOMEID", keytype = "ENTREZID")) |
|
28 | 34 |
genesymbol = suppressMessages( |
29 | 35 |
select(org.Hs.eg.db, GOdf$ENTREZID, |
30 |
- columns = "SYMBOL", keytype = "ENTREZID")$SYMBOL) |
|
36 |
+ columns = "SYMBOL", keytype = "ENTREZID")$SYMBOL) |
|
31 | 37 |
GS.type = "REACTOMEID" |
32 | 38 |
|
33 | 39 |
} |
... | ... |
@@ -35,7 +41,7 @@ getGS = function(geneids, GS.type){ |
35 | 41 |
GOs = suppressMessages( |
36 | 42 |
na.omit(unique( |
37 | 43 |
select(org.Hs.eg.db, geneids, |
38 |
- GS.type,keytype = "SYMBOL")[,GS.type]))) |
|
44 |
+ GS.type,keytype = "SYMBOL")[,GS.type]))) |
|
39 | 45 |
GOdf = suppressMessages( |
40 | 46 |
select(org.Hs.eg.db, GOs, "SYMBOL", keytype = GS.type)) |
41 | 47 |
genesymbol = GOdf$SYMBOL |
... | ... |
@@ -6,9 +6,9 @@ |
6 | 6 |
#' @param cpg.pval A named vector containing p-values of differential |
7 | 7 |
#' methylation test. Names should be CpG IDs. |
8 | 8 |
#' @param array.type A string. Either "450K" or "EPIC". Default is "450K". |
9 |
-#' This argument will be ignore if CpG2Gene is provided. |
|
10 |
-#' @param CpG2Gene A matrix or data frame with 1st column CpG ID and 2nd |
|
11 |
-#' column gene name. Default is NULL. |
|
9 |
+#' This argument will be ignored if FullAnnot is provided. |
|
10 |
+#' @param FullAnnot A data frame provided by prepareAnnot function. |
|
11 |
+#' Default is NULL. |
|
12 | 12 |
#' @param method A string. "ORA" or "GSEA". Default is "ORA" |
13 | 13 |
#' @param GS.list A list. Default is NULL. If there is no input list, |
14 | 14 |
#' Gene Ontology is used. Entry names are gene sets names, and elements |
... | ... |
@@ -42,11 +42,12 @@ |
42 | 42 |
#' data(cpgtoy) |
43 | 43 |
#' data(GSlisttoy) |
44 | 44 |
#' GS.list = GS.list[1:10] |
45 |
-#' res1 = methylRRA(cpg.pval = cpg.pval, CpG2Gene = CpG2Gene, |
|
45 |
+#' FullAnnot = prepareAnnot(CpG2Gene) |
|
46 |
+#' res1 = methylRRA(cpg.pval = cpg.pval, FullAnnot = FullAnnot, |
|
46 | 47 |
#' method = "ORA", GS.list = GS.list) |
47 | 48 |
#' head(res1) |
48 | 49 |
|
49 |
-methylRRA <- function(cpg.pval, array.type = "450K", CpG2Gene = NULL, |
|
50 |
+methylRRA <- function(cpg.pval, array.type = "450K", FullAnnot = NULL, |
|
50 | 51 |
method = "ORA", GS.list=NULL, GS.idtype = "SYMBOL", |
51 | 52 |
GS.type = "GO", minsize = 100, maxsize = 500){ |
52 | 53 |
if(!is.vector(cpg.pval) | !is.numeric(cpg.pval) | is.null(names(cpg.pval))) |
... | ... |
@@ -64,18 +65,7 @@ methylRRA <- function(cpg.pval, array.type = "450K", CpG2Gene = NULL, |
64 | 65 |
keytype = GS.idtype)$SYMBOL)) |
65 | 66 |
GS.type = match.arg(GS.type, c("GO", "KEGG", "Reactome")) |
66 | 67 |
|
67 |
- if(!is.null(CpG2Gene)){ |
|
68 |
- if(!is.character(CpG2Gene[,1])|!is.character(CpG2Gene[,2])) |
|
69 |
- stop("CpG2Gene should be a matrix or data frame with |
|
70 |
- 1st column CpG ID and 2nd column gene name") |
|
71 |
- if(ncol(CpG2Gene)!=2) |
|
72 |
- stop("CpG2Gene should contain two columns") |
|
73 |
- FullAnnot = data.frame(CpG2Gene) |
|
74 |
- colnames(FullAnnot) = c("Name", "UCSC_RefGene_Name") |
|
75 |
- rownames(FullAnnot) = FullAnnot$Name |
|
76 |
- } |
|
77 |
- |
|
78 |
- else{ |
|
68 |
+ if(is.null(FullAnnot)){ |
|
79 | 69 |
if(array.type!="450K" & array.type!="EPIC") |
80 | 70 |
stop("Input array type should be either 450K or EPIC") |
81 | 71 |
if(array.type=="450K") |
... | ... |
@@ -6,9 +6,9 @@ |
6 | 6 |
#' @param cpg.pval A named vector containing p-values of differential |
7 | 7 |
#' methylation test. Names should be CpG IDs. |
8 | 8 |
#' @param array.type A string. Either "450K" or "EPIC". Default is "450K". |
9 |
-#' This argument will be ignore if CpG2Gene is provided. |
|
10 |
-#' @param CpG2Gene A matrix or data frame with 1st column CpG ID and 2nd |
|
11 |
-#' column gene name. Default is NULL. |
|
9 |
+#' This argument will be ignored if FullAnnot is provided. |
|
10 |
+#' @param FullAnnot A data frame provided by prepareAnnot function. |
|
11 |
+#' Default is NULL. |
|
12 | 12 |
#' @param GS.list A list. Default is NULL. If there is no input list, |
13 | 13 |
#' Gene Ontology is used. Entry names are gene sets names, and elements |
14 | 14 |
#' correpond to genes that gene sets contain. |
... | ... |
@@ -39,11 +39,12 @@ |
39 | 39 |
#' data(cpgtoy) |
40 | 40 |
#' data(GSlisttoy) |
41 | 41 |
#' GS.list = GS.list[1:10] |
42 |
-#' res = methylglm(cpg.pval = cpg.pval, CpG2Gene = CpG2Gene, GS.list = GS.list, |
|
43 |
-#' GS.idtype = "SYMBOL") |
|
42 |
+#' FullAnnot = prepareAnnot(CpG2Gene) |
|
43 |
+#' res = methylglm(cpg.pval = cpg.pval, FullAnnot = FullAnnot, |
|
44 |
+#' GS.list = GS.list, GS.idtype = "SYMBOL") |
|
44 | 45 |
#' head(res) |
45 | 46 |
|
46 |
-methylglm <- function(cpg.pval, array.type = "450K", CpG2Gene = NULL, |
|
47 |
+methylglm <- function(cpg.pval, array.type = "450K", FullAnnot = NULL, |
|
47 | 48 |
GS.list=NULL, GS.idtype = "SYMBOL", GS.type = "GO", |
48 | 49 |
minsize = 100, maxsize = 500){ |
49 | 50 |
if(!is.vector(cpg.pval) | !is.numeric(cpg.pval) | is.null(names(cpg.pval))) |
... | ... |
@@ -62,18 +63,7 @@ methylglm <- function(cpg.pval, array.type = "450K", CpG2Gene = NULL, |
62 | 63 |
keytype = GS.idtype)$SYMBOL)) |
63 | 64 |
GS.type = match.arg(GS.type, c("GO", "KEGG", "Reactome")) |
64 | 65 |
|
65 |
- if(!is.null(CpG2Gene)){ |
|
66 |
- if(!is.character(CpG2Gene[,1])|!is.character(CpG2Gene[,2])) |
|
67 |
- stop("CpG2Gene should be a matrix or data frame with |
|
68 |
- 1st column CpG ID and 2nd column gene name") |
|
69 |
- if(ncol(CpG2Gene)!=2) |
|
70 |
- stop("CpG2Gene should contain two columns") |
|
71 |
- FullAnnot = data.frame(CpG2Gene) |
|
72 |
- colnames(FullAnnot) = c("Name", "UCSC_RefGene_Name") |
|
73 |
- rownames(FullAnnot) = FullAnnot$Name |
|
74 |
- } |
|
75 |
- |
|
76 |
- else{ |
|
66 |
+ if(is.null(FullAnnot)){ |
|
77 | 67 |
if(array.type!="450K" & array.type!="EPIC") |
78 | 68 |
stop("Input array type should be either 450K or EPIC") |
79 | 69 |
if(array.type=="450K") |
... | ... |
@@ -11,7 +11,7 @@ |
11 | 11 |
#' Gene Ontology is used. Entry names are gene sets names, and elements |
12 | 12 |
#' correpond to genes that gene sets contain. |
13 | 13 |
#' @param GS.idtype A string. "SYMBOL", "ENSEMBL", "ENTREZID" or "REFSEQ". |
14 |
-#' Default is "SYMBOL" |
|
14 |
+#' Default is "SYMBOL". |
|
15 | 15 |
#' @param GS.type A string. "GO", "KEGG", or "Reactome" |
16 | 16 |
#' @param minsize An integer. If the number of genes in a gene set |
17 | 17 |
#' is less than this integer, this gene set is not tested. Default is 100. |
... | ... |
@@ -34,11 +34,13 @@ |
34 | 34 |
#' @references Carlson M (2017). org.Hs.eg.db: Genome wide annotation |
35 | 35 |
#' for Human. R package version 3.5.0. |
36 | 36 |
#' @examples |
37 |
+#' \dontrun{ |
|
37 | 38 |
#' library(IlluminaHumanMethylation450kanno.ilmn12.hg19) |
38 | 39 |
#' data(cpgtoy) |
39 | 40 |
#' res = methylgometh(cpg.pval = cpg.pval, sig.cut = 0.001, GS.type = "KEGG", |
40 | 41 |
#' minsize = 200, maxsize = 205) |
41 | 42 |
#' head(res) |
43 |
+#' } |
|
42 | 44 |
|
43 | 45 |
|
44 | 46 |
methylgometh <- function(cpg.pval, sig.cut, array.type = "450K", |
45 | 47 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,53 @@ |
1 |
+#' @title Prepare user-supplied mapping between CpGs and genes. |
|
2 |
+#' |
|
3 |
+#' @description This function prepares CpG to gene mapping which will be |
|
4 |
+#' used by methylRRA and methylglm. |
|
5 |
+#' @param CpG2Gene A matrix, or a data frame or a list contains CpG to gene |
|
6 |
+#' mapping. For a matrix or data frame, 1st column should be CpG ID and 2nd |
|
7 |
+#' column should be gene name. For a list, entry names should be gene names, |
|
8 |
+#' and elements correpond to CpG IDs. |
|
9 |
+#' @param geneidtype A string. "SYMBOL", "ENSEMBL", "ENTREZID" or "REFSEQ". |
|
10 |
+#' Default is "SYMBOL". |
|
11 |
+#' @export |
|
12 |
+#' @import org.Hs.eg.db |
|
13 |
+#' @importFrom AnnotationDbi select |
|
14 |
+#' @return A data frame contains ready to use CpG to gene mapping. |
|
15 |
+#' @references Carlson M (2017). org.Hs.eg.db: |
|
16 |
+#' Genome wide annotation for Human. R package version 3.5.0. |
|
17 |
+#' @examples |
|
18 |
+#' data(CpG2Genetoy) |
|
19 |
+#' FullAnnot = prepareAnnot(CpG2Gene) |
|
20 |
+#' head(FullAnnot) |
|
21 |
+ |
|
22 |
+prepareAnnot <- function(CpG2Gene, geneidtype = "SYMBOL"){ |
|
23 |
+ geneidtype = match.arg( |
|
24 |
+ geneidtype,c("SYMBOL", "ENSEMBL", "ENTREZID", "REFSEQ")) |
|
25 |
+ |
|
26 |
+ if(is.matrix(CpG2Gene)|is.data.frame(CpG2Gene)){ |
|
27 |
+ if(!is.character(CpG2Gene[,1])) |
|
28 |
+ stop("CpG ID should be characters") |
|
29 |
+ if(ncol(CpG2Gene)!=2) |
|
30 |
+ stop("CpG2Gene should contain two columns") |
|
31 |
+ FullAnnot = data.frame(CpG2Gene) |
|
32 |
+ } |
|
33 |
+ else if(is.list(CpG2Gene)){ |
|
34 |
+ FullAnnot = data.frame( |
|
35 |
+ CpG = unlist(CpG2Gene), |
|
36 |
+ gene = rep(names(CpG2Gene),vapply(CpG2Gene, length, FUN.VALUE = 0))) |
|
37 |
+ } |
|
38 |
+ else |
|
39 |
+ stop("CpG2Gene should be a matrix or a data frame or a list.") |
|
40 |
+ |
|
41 |
+ colnames(FullAnnot) = c("Name", "UCSC_RefGene_Name") |
|
42 |
+ |
|
43 |
+ if(geneidtype!="SYMBOL"){ |
|
44 |
+ temp = suppressMessages( |
|
45 |
+ select(org.Hs.eg.db, FullAnnot$UCSC_RefGene_Name, |
|
46 |
+ columns = "SYMBOL",keytype = geneidtype)) |
|
47 |
+ FullAnnot$UCSC_RefGene_Name = temp$SYMBOL |
|
48 |
+ } |
|
49 |
+ rownames(FullAnnot) = FullAnnot$Name |
|
50 |
+ return(FullAnnot) |
|
51 |
+} |
|
52 |
+ |
|
53 |
+ |
... | ... |
@@ -7,7 +7,8 @@ |
7 | 7 |
getGS(geneids, GS.type) |
8 | 8 |
} |
9 | 9 |
\arguments{ |
10 |
-\item{geneids}{A vector contains all gene ids of interest.} |
|
10 |
+\item{geneids}{A vector contains all gene ids of interest. Gene ids should |
|
11 |
+be gene symbol.} |
|
11 | 12 |
|
12 | 13 |
\item{GS.type}{A string. "GO", "KEGG", or "Reactome".} |
13 | 14 |
} |
... | ... |
@@ -18,6 +19,11 @@ interest and their corresponding genes. |
18 | 19 |
\description{ |
19 | 20 |
This function gets gene sets information. |
20 | 21 |
} |
22 |
+\examples{ |
|
23 |
+geneids = c("FKBP5", "NDUFA1", "STAT5B") |
|
24 |
+GO.list = getGS(geneids, "KEGG") |
|
25 |
+head(GO.list) |
|
26 |
+} |
|
21 | 27 |
\references{ |
22 | 28 |
Carlson M (2017). org.Hs.eg.db: |
23 | 29 |
Genome wide annotation for Human. R package version 3.5.0. |
... | ... |
@@ -5,7 +5,7 @@ |
5 | 5 |
\title{Enrichment analysis after adjusting multiple p-values of |
6 | 6 |
each gene by Robust Rank Aggregation} |
7 | 7 |
\usage{ |
8 |
-methylRRA(cpg.pval, array.type = "450K", CpG2Gene = NULL, method = "ORA", |
|
8 |
+methylRRA(cpg.pval, array.type = "450K", FullAnnot = NULL, method = "ORA", |
|
9 | 9 |
GS.list = NULL, GS.idtype = "SYMBOL", GS.type = "GO", minsize = 100, |
10 | 10 |
maxsize = 500) |
11 | 11 |
} |
... | ... |
@@ -14,10 +14,10 @@ methylRRA(cpg.pval, array.type = "450K", CpG2Gene = NULL, method = "ORA", |
14 | 14 |
methylation test. Names should be CpG IDs.} |
15 | 15 |
|
16 | 16 |
\item{array.type}{A string. Either "450K" or "EPIC". Default is "450K". |
17 |
-This argument will be ignore if CpG2Gene is provided.} |
|
17 |
+This argument will be ignored if FullAnnot is provided.} |
|
18 | 18 |
|
19 |
-\item{CpG2Gene}{A matrix or data frame with 1st column CpG ID and 2nd |
|
20 |
-column gene name. Default is NULL.} |
|
19 |
+\item{FullAnnot}{A data frame provided by prepareAnnot function. |
|
20 |
+Default is NULL.} |
|
21 | 21 |
|
22 | 22 |
\item{method}{A string. "ORA" or "GSEA". Default is "ORA"} |
23 | 23 |
|
... | ... |
@@ -48,7 +48,8 @@ data(CpG2Genetoy) |
48 | 48 |
data(cpgtoy) |
49 | 49 |
data(GSlisttoy) |
50 | 50 |
GS.list = GS.list[1:10] |
51 |
-res1 = methylRRA(cpg.pval = cpg.pval, CpG2Gene = CpG2Gene, |
|
51 |
+FullAnnot = prepareAnnot(CpG2Gene) |
|
52 |
+res1 = methylRRA(cpg.pval = cpg.pval, FullAnnot = FullAnnot, |
|
52 | 53 |
method = "ORA", GS.list = GS.list) |
53 | 54 |
head(res1) |
54 | 55 |
} |
... | ... |
@@ -5,7 +5,7 @@ |
5 | 5 |
\title{Implement logistic regression adjusting |
6 | 6 |
for number of probes in enrichment analysis} |
7 | 7 |
\usage{ |
8 |
-methylglm(cpg.pval, array.type = "450K", CpG2Gene = NULL, GS.list = NULL, |
|
8 |
+methylglm(cpg.pval, array.type = "450K", FullAnnot = NULL, GS.list = NULL, |
|
9 | 9 |
GS.idtype = "SYMBOL", GS.type = "GO", minsize = 100, maxsize = 500) |
10 | 10 |
} |
11 | 11 |
\arguments{ |
... | ... |
@@ -13,10 +13,10 @@ methylglm(cpg.pval, array.type = "450K", CpG2Gene = NULL, GS.list = NULL, |
13 | 13 |
methylation test. Names should be CpG IDs.} |
14 | 14 |
|
15 | 15 |
\item{array.type}{A string. Either "450K" or "EPIC". Default is "450K". |
16 |
-This argument will be ignore if CpG2Gene is provided.} |
|
16 |
+This argument will be ignored if FullAnnot is provided.} |
|
17 | 17 |
|
18 |
-\item{CpG2Gene}{A matrix or data frame with 1st column CpG ID and 2nd |
|
19 |
-column gene name. Default is NULL.} |
|
18 |
+\item{FullAnnot}{A data frame provided by prepareAnnot function. |
|
19 |
+Default is NULL.} |
|
20 | 20 |
|
21 | 21 |
\item{GS.list}{A list. Default is NULL. If there is no input list, |
22 | 22 |
Gene Ontology is used. Entry names are gene sets names, and elements |
... | ... |
@@ -49,8 +49,9 @@ data(CpG2Genetoy) |
49 | 49 |
data(cpgtoy) |
50 | 50 |
data(GSlisttoy) |
51 | 51 |
GS.list = GS.list[1:10] |
52 |
-res = methylglm(cpg.pval = cpg.pval, CpG2Gene = CpG2Gene, GS.list = GS.list, |
|
53 |
-GS.idtype = "SYMBOL") |
|
52 |
+FullAnnot = prepareAnnot(CpG2Gene) |
|
53 |
+res = methylglm(cpg.pval = cpg.pval, FullAnnot = FullAnnot, |
|
54 |
+GS.list = GS.list, GS.idtype = "SYMBOL") |
|
54 | 55 |
head(res) |
55 | 56 |
} |
56 | 57 |
\references{ |
... | ... |
@@ -21,7 +21,7 @@ Gene Ontology is used. Entry names are gene sets names, and elements |
21 | 21 |
correpond to genes that gene sets contain.} |
22 | 22 |
|
23 | 23 |
\item{GS.idtype}{A string. "SYMBOL", "ENSEMBL", "ENTREZID" or "REFSEQ". |
24 |
-Default is "SYMBOL"} |
|
24 |
+Default is "SYMBOL".} |
|
25 | 25 |
|
26 | 26 |
\item{GS.type}{A string. "GO", "KEGG", or "Reactome"} |
27 | 27 |
|
... | ... |
@@ -39,12 +39,14 @@ This function calls gometh or gsameth function |
39 | 39 |
in missMethyl package to adjust number of probes in gene set testing |
40 | 40 |
} |
41 | 41 |
\examples{ |
42 |
+\dontrun{ |
|
42 | 43 |
library(IlluminaHumanMethylation450kanno.ilmn12.hg19) |
43 | 44 |
data(cpgtoy) |
44 | 45 |
res = methylgometh(cpg.pval = cpg.pval, sig.cut = 0.001, GS.type = "KEGG", |
45 | 46 |
minsize = 200, maxsize = 205) |
46 | 47 |
head(res) |
47 | 48 |
} |
49 |
+} |
|
48 | 50 |
\references{ |
49 | 51 |
Phipson, B., Maksimovic, J., and Oshlack, A. (2015). |
50 | 52 |
missMethyl: an R package for analysing methylation data from Illuminas |
51 | 53 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,33 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/prepareAnnot.R |
|
3 |
+\name{prepareAnnot} |
|
4 |
+\alias{prepareAnnot} |
|
5 |
+\title{Prepare user-supplied mapping between CpGs and genes.} |
|
6 |
+\usage{ |
|
7 |
+prepareAnnot(CpG2Gene, geneidtype = "SYMBOL") |
|
8 |
+} |
|
9 |
+\arguments{ |
|
10 |
+\item{CpG2Gene}{A matrix, or a data frame or a list contains CpG to gene |
|
11 |
+mapping. For a matrix or data frame, 1st column should be CpG ID and 2nd |
|
12 |
+column should be gene name. For a list, entry names should be gene names, |
|
13 |
+and elements correpond to CpG IDs.} |
|
14 |
+ |
|
15 |
+\item{geneidtype}{A string. "SYMBOL", "ENSEMBL", "ENTREZID" or "REFSEQ". |
|
16 |
+Default is "SYMBOL".} |
|
17 |
+} |
|
18 |
+\value{ |
|
19 |
+A data frame contains ready to use CpG to gene mapping. |
|
20 |
+} |
|
21 |
+\description{ |
|
22 |
+This function prepares CpG to gene mapping which will be |
|
23 |
+used by methylRRA and methylglm. |
|
24 |
+} |
|
25 |
+\examples{ |
|
26 |
+data(CpG2Genetoy) |
|
27 |
+FullAnnot = prepareAnnot(CpG2Gene) |
|
28 |
+head(FullAnnot) |
|
29 |
+} |
|
30 |
+\references{ |
|
31 |
+Carlson M (2017). org.Hs.eg.db: |
|
32 |
+Genome wide annotation for Human. R package version 3.5.0. |
|
33 |
+} |
... | ... |
@@ -3,6 +3,13 @@ context("Test internal functions") |
3 | 3 |
library(org.Hs.eg.db) |
4 | 4 |
library(reactome.db) |
5 | 5 |
data(GSlisttoy) |
6 |
+data(CpG2Genetoy) |
|
7 |
+ |
|
8 |
+test_that("check prepareAnnot", { |
|
9 |
+ colnames(CpG2Gene) = c("Name", "UCSC_RefGene_Name") |
|
10 |
+ rownames(CpG2Gene) = CpG2Gene$Name |
|
11 |
+ expect_identical(prepareAnnot(CpG2Gene), CpG2Gene) |
|
12 |
+}) |
|
6 | 13 |
|
7 | 14 |
test_that("check getGS", { |
8 | 15 |
geneids = GS.list[[1]] |
... | ... |
@@ -28,7 +35,6 @@ test_that("check getGS", { |
28 | 35 |
|
29 | 36 |
expect_identical(getGS(geneids, "KEGG"), KEGG.list) |
30 | 37 |
|
31 |
- |
|
32 | 38 |
gene.entrez = suppressMessages( |
33 | 39 |
select(org.Hs.eg.db, geneids, |
34 | 40 |
columns = "ENTREZID",keytype = "SYMBOL")$ENTREZID) |
... | ... |
@@ -4,9 +4,10 @@ data(CpG2Genetoy) |
4 | 4 |
data(cpgtoy) |
5 | 5 |
data(GSlisttoy) |
6 | 6 |
GS.list = GS.list[1:10] |
7 |
+FullAnnot = prepareAnnot(CpG2Gene) |
|
7 | 8 |
|
8 | 9 |
test_that("check for valid output", { |
9 |
- res1 = methylglm(cpg.pval = cpg.pval, CpG2Gene = CpG2Gene, |
|
10 |
+ res1 = methylglm(cpg.pval = cpg.pval, FullAnnot = FullAnnot, |
|
10 | 11 |
GS.list = GS.list, GS.idtype = "SYMBOL", |
11 | 12 |
minsize = 100, maxsize = 300) |
12 | 13 |
expect_is(res1, 'data.frame') |
... | ... |
@@ -15,7 +16,7 @@ test_that("check for valid output", { |
15 | 16 |
expect_true(all(res1$padj>=0 & res1$padj<=1)) |
16 | 17 |
expect_true(all(colnames(res1) %in% c("ID", "size", "pvalue", "padj"))) |
17 | 18 |
|
18 |
- res2 = methylRRA(cpg.pval = cpg.pval, CpG2Gene = CpG2Gene, |
|
19 |
+ res2 = methylRRA(cpg.pval = cpg.pval, FullAnnot = FullAnnot, |
|
19 | 20 |
method = "ORA", GS.list = GS.list) |
20 | 21 |
expect_is(res2, 'data.frame') |
21 | 22 |
expect_equal(dim(res2)[2], 4) |
... | ... |
@@ -23,7 +24,7 @@ test_that("check for valid output", { |
23 | 24 |
expect_true(all(res2$padj>=0 & res2$padj<=1)) |
24 | 25 |
expect_true(all(colnames(res2) %in% c("ID", "size", "pvalue", "padj"))) |
25 | 26 |
|
26 |
- res3 = methylRRA(cpg.pval = cpg.pval, CpG2Gene = CpG2Gene, |
|
27 |
+ res3 = methylRRA(cpg.pval = cpg.pval, FullAnnot = FullAnnot, |
|
27 | 28 |
method = "GSEA", GS.list = GS.list) |
28 | 29 |
expect_is(res3, 'data.frame') |
29 | 30 |
expect_equal(dim(res3)[2], 7) |
... | ... |
@@ -1,6 +1,6 @@ |
1 | 1 |
--- |
2 | 2 |
title: "methylGSA: Gene Set Analysis for DNA Methylation Datasets" |
3 |
-author: "Pei Fen Kuan and Xu Ren" |
|
3 |
+author: "Xu Ren and Pei Fen Kuan" |
|
4 | 4 |
date: "`r Sys.Date()`" |
5 | 5 |
output: |
6 | 6 |
rmarkdown::html_document: |
... | ... |
@@ -42,7 +42,7 @@ weighted resampling and Wallenius non-central hypergeometric approximation. |
42 | 42 |
* Gene Ontology (via org.Hs.eg.db) |
43 | 43 |
* KEGG (via org.Hs.eg.db) |
44 | 44 |
* Reactome (via reactome.db) |
45 |
-* User input gene sets. Supported input gene ID types: |
|
45 |
+* User-supplied gene sets. Supported input gene ID types: |
|
46 | 46 |
+ "SYMBOL" |
47 | 47 |
+ "ENSEMBL" |
48 | 48 |
+ "ENTREZID" |
... | ... |
@@ -169,10 +169,10 @@ head(res4, 15) |
169 | 169 |
methylGSA provides many other options for users to customize the analysis. |
170 | 170 |
|
171 | 171 |
* `array.type` is to specify which array type to use. It is either "450K" or |
172 |
-"EPIC". Default is "450K". This argument will be ignore if CpG2Gene is |
|
172 |
+"EPIC". Default is "450K". This argument will be ignored if `FullAnnot` is |
|
173 | 173 |
provided. |
174 |
-* `CpG2Gene` is user supplied mapping between CpG ID and gene. It should be |
|
175 |
-a matrix or data frame with 1st column CpG ID and 2nd column gene name. |
|
174 |
+* `FullAnnot` is preprocessed mapping between CpG ID and gene name provided by |
|
175 |
+prepareAnnot function. Default is NULL. Check example below for details. |
|
176 | 176 |
* `GS.list` is user input gene sets to be tested. It should be a list with |
177 | 177 |
entry names gene sets IDs and elements correpond to genes that gene sets |
178 | 178 |
contain. If there is no input list, Gene Ontology is used. |
... | ... |
@@ -197,17 +197,27 @@ data(GSlisttoy) |
197 | 197 |
head(lapply(GS.list, function(x) return(x[1:30])), 3) |
198 | 198 |
``` |
199 | 199 |
|
200 |
-This is an example of user-supplied CpG to gene mapping |
|
200 |
+methylglm and methylRRA support user supplied CpG ID to gene mapping. The |
|
201 |
+mapping is expected to be a matrix, or a data frame or a list. For a |
|
202 |
+matrix or data frame, 1st column should be CpG ID and 2nd column should be gene |
|
203 |
+name. For a list, entry names should be gene names and elements correpond to |
|
204 |
+CpG IDs. This is an example of user-supplied CpG to gene mapping: |
|
201 | 205 |
```{r} |
202 | 206 |
data(CpG2Genetoy) |
203 | 207 |
head(CpG2Gene) |
204 | 208 |
``` |
205 | 209 |
|
206 |
-Test the gene sets using "ORA" in methylRRA |
|
210 |
+To use user supplied mapping in methylglm or methylRRA, first preprocess the |
|
211 |
+mapping by prepareAnnot function |
|
212 |
+```{r} |
|
213 |
+FullAnnot = prepareAnnot(CpG2Gene) |
|
214 |
+``` |
|
207 | 215 |
|
216 |
+Test the gene sets using "ORA" in methylRRA, use `FullAnnot` argument to |
|
217 |
+provide the preprocessed CpG ID to gene mapping. |
|
208 | 218 |
```{r} |
209 | 219 |
GS.list = GS.list[1:10] |
210 |
-res5 = methylRRA(cpg.pval = cpg.pval, CpG2Gene = CpG2Gene, method = "ORA", |
|
220 |
+res5 = methylRRA(cpg.pval = cpg.pval, FullAnnot = FullAnnot, method = "ORA", |
|
211 | 221 |
GS.list = GS.list, GS.idtype = "SYMBOL", |
212 | 222 |
minsize = 100, maxsize = 300) |
213 | 223 |
head(res5, 10) |