... | ... |
@@ -8,7 +8,7 @@ |
8 | 8 |
#' @export |
9 | 9 |
#' |
10 | 10 |
#' @examples |
11 |
-#' methy <- system.file("methy_subset.tsv.bgz", package = "NanoMethViz") |
|
11 |
+#' methy <- system.file("methy_subset.tsv.bgz", package = "NanoMethViz", mustWork = FALSE) |
|
12 | 12 |
#' bsseq <- methy_to_bsseq(methy) |
13 | 13 |
#' edger_mat <- bsseq_to_edger(bsseq) |
14 | 14 |
bsseq_to_edger <- function(bsseq, regions = NULL) { |
... | ... |
@@ -80,6 +80,8 @@ methy_to_edger <- function(methy, regions = NULL, out_folder = tempdir(), verbos |
80 | 80 |
#' log_m_ratio <- bsseq_to_log_methy_ratio(bsseq, regions) |
81 | 81 |
|
82 | 82 |
bsseq_to_log_methy_ratio <- function(bsseq, regions = NULL, prior_count = 2, drop_na = TRUE) { |
83 |
+ assertthat::assert_that(prior_count >= 0) |
|
84 |
+ |
|
83 | 85 |
if (prior_count < 1) { |
84 | 86 |
warning("prior_count of 1 or higher is recommended") |
85 | 87 |
} |
... | ... |
@@ -12,19 +12,19 @@ |
12 | 12 |
#' bsseq <- methy_to_bsseq(methy) |
13 | 13 |
#' edger_mat <- bsseq_to_edger(bsseq) |
14 | 14 |
bsseq_to_edger <- function(bsseq, regions = NULL) { |
15 |
- edger_col_names <- .get_edger_col_names(bsseq) |
|
15 |
+ edger_col_names <- bsseq_to_edger.get_edger_col_names(bsseq) |
|
16 | 16 |
|
17 | 17 |
if (!is.null(regions)) { |
18 | 18 |
assertthat::assert_that(is(regions, "data.frame")) |
19 | 19 |
regions <- GenomicRanges::GRanges(regions) |
20 | 20 |
edger_row_names <- regions$gene_id |
21 | 21 |
} else { |
22 |
- edger_row_names <- .get_edger_row_names(bsseq) |
|
22 |
+ edger_row_names <- bsseq_to_edger.get_edger_row_names(bsseq) |
|
23 | 23 |
} |
24 | 24 |
|
25 | 25 |
# construct matrix |
26 |
- methylated <- .get_me_mat(bsseq, regions) |
|
27 |
- unmethylated <- .get_un_mat(bsseq, regions) |
|
26 |
+ methylated <- bsseq_to_edger.get_me_mat(bsseq, regions) |
|
27 |
+ unmethylated <- bsseq_to_edger.get_un_mat(bsseq, regions) |
|
28 | 28 |
|
29 | 29 |
edger_mat <- matrix( |
30 | 30 |
0, |
... | ... |
@@ -89,12 +89,12 @@ bsseq_to_log_methy_ratio <- function(bsseq, regions = NULL, prior_count = 2, dro |
89 | 89 |
regions <- GenomicRanges::GRanges(regions) |
90 | 90 |
row_names <- regions$gene_id |
91 | 91 |
} else { |
92 |
- row_names <- .get_edger_row_names(bsseq) |
|
92 |
+ row_names <- bsseq_to_edger.get_edger_row_names(bsseq) |
|
93 | 93 |
} |
94 | 94 |
|
95 | 95 |
col_names <- SummarizedExperiment::colData(bsseq)$sample |
96 |
- methylated <- .get_me_mat(bsseq, regions) |
|
97 |
- unmethylated <- .get_un_mat(bsseq, regions) |
|
96 |
+ methylated <- bsseq_to_edger.get_me_mat(bsseq, regions) |
|
97 |
+ unmethylated <- bsseq_to_edger.get_un_mat(bsseq, regions) |
|
98 | 98 |
|
99 | 99 |
log_mat <- log2(methylated + prior_count) - log2(unmethylated + prior_count) |
100 | 100 |
|
... | ... |
@@ -108,7 +108,8 @@ bsseq_to_log_methy_ratio <- function(bsseq, regions = NULL, prior_count = 2, dro |
108 | 108 |
log_mat |
109 | 109 |
} |
110 | 110 |
|
111 |
-.get_me_mat <- function(bsseq, regions) { |
|
111 |
+# helper functions ---- |
|
112 |
+bsseq_to_edger.get_me_mat <- function(bsseq, regions) { |
|
112 | 113 |
if (!is.null(regions)) { |
113 | 114 |
bsseq::getCoverage(bsseq, regions, type = "M", what = "perRegionTotal") |
114 | 115 |
} else { |
... | ... |
@@ -116,17 +117,17 @@ bsseq_to_log_methy_ratio <- function(bsseq, regions = NULL, prior_count = 2, dro |
116 | 117 |
} |
117 | 118 |
} |
118 | 119 |
|
119 |
-.get_un_mat <- function(bsseq, regions) { |
|
120 |
+bsseq_to_edger.get_un_mat <- function(bsseq, regions) { |
|
120 | 121 |
if (!is.null(regions)) { |
121 | 122 |
cov <- bsseq::getCoverage(bsseq, regions, type = "Cov", what = "perRegionTotal") |
122 | 123 |
} else { |
123 | 124 |
cov <- bsseq::getCoverage(bsseq, type = "Cov") |
124 | 125 |
} |
125 |
- me_mat <- .get_me_mat(bsseq, regions) |
|
126 |
+ me_mat <- bsseq_to_edger.get_me_mat(bsseq, regions) |
|
126 | 127 |
cov - me_mat |
127 | 128 |
} |
128 | 129 |
|
129 |
-.get_edger_col_names <- function(bsseq) { |
|
130 |
+bsseq_to_edger.get_edger_col_names <- function(bsseq) { |
|
130 | 131 |
samples <- SummarizedExperiment::colData(bsseq)$sample |
131 | 132 |
me_names <- paste0(samples, "_Me") |
132 | 133 |
un_names <- paste0(samples, "_Un") |
... | ... |
@@ -139,7 +140,7 @@ bsseq_to_log_methy_ratio <- function(bsseq, regions = NULL, prior_count = 2, dro |
139 | 140 |
edger_col_names |
140 | 141 |
} |
141 | 142 |
|
142 |
-.get_edger_row_names <- function(bsseq) { |
|
143 |
+bsseq_to_edger.get_edger_row_names <- function(bsseq) { |
|
143 | 144 |
gr <- bsseq::getBSseq(bsseq, type = "gr") |
144 | 145 |
seq <- as.character(SummarizedExperiment::seqnames(gr)) |
145 | 146 |
pos <- as.integer(SummarizedExperiment::start(gr)) |
... | ... |
@@ -28,14 +28,14 @@ bsseq_to_edger <- function(bsseq, regions = NULL) { |
28 | 28 |
|
29 | 29 |
edger_mat <- matrix( |
30 | 30 |
0, |
31 |
- ncol = 2*ncol(methylated), |
|
31 |
+ ncol = 2 * ncol(methylated), |
|
32 | 32 |
nrow = nrow(methylated), |
33 | 33 |
dimnames = list(edger_row_names, edger_col_names) |
34 | 34 |
) |
35 | 35 |
|
36 | 36 |
for (i in 0:(ncol(methylated) - 1)) { |
37 |
- edger_mat[, 2*i + 1] <- methylated[, i + 1] |
|
38 |
- edger_mat[, 2*i + 2] <- unmethylated[, i + 1] |
|
37 |
+ edger_mat[, 2 * i + 1] <- methylated[, i + 1] |
|
38 |
+ edger_mat[, 2 * i + 2] <- unmethylated[, i + 1] |
|
39 | 39 |
} |
40 | 40 |
|
41 | 41 |
edger_mat |
... | ... |
@@ -132,8 +132,8 @@ bsseq_to_log_methy_ratio <- function(bsseq, regions = NULL, prior_count = 2, dro |
132 | 132 |
un_names <- paste0(samples, "_Un") |
133 | 133 |
edger_col_names <- character(2 * length(samples)) |
134 | 134 |
for (i in 0:(length(samples) - 1)) { |
135 |
- edger_col_names[2*i + 1] <- me_names[i + 1] |
|
136 |
- edger_col_names[2*i + 2] <- un_names[i + 1] |
|
135 |
+ edger_col_names[2 * i + 1] <- me_names[i + 1] |
|
136 |
+ edger_col_names[2 * i + 2] <- un_names[i + 1] |
|
137 | 137 |
} |
138 | 138 |
|
139 | 139 |
edger_col_names |
... | ... |
@@ -68,6 +68,7 @@ methy_to_edger <- function(methy, regions = NULL, out_folder = tempdir(), verbos |
68 | 68 |
#' @param regions the regions to calculate log-methylation ratios over. If left NULL, ratios will be calculated per |
69 | 69 |
#' site. |
70 | 70 |
#' @param prior_count the prior count added to avoid taking log of 0. |
71 |
+#' @param drop_na whether to drop rows with all NA values. |
|
71 | 72 |
#' |
72 | 73 |
#' @return a matrix containing log-methylation-ratios. |
73 | 74 |
#' @export |
... | ... |
@@ -78,7 +79,7 @@ methy_to_edger <- function(methy, regions = NULL, out_folder = tempdir(), verbos |
78 | 79 |
#' regions <- exons_to_genes(NanoMethViz::exons(nmr)) |
79 | 80 |
#' log_m_ratio <- bsseq_to_log_methy_ratio(bsseq, regions) |
80 | 81 |
|
81 |
-bsseq_to_log_methy_ratio <- function(bsseq, regions = NULL, prior_count = 2) { |
|
82 |
+bsseq_to_log_methy_ratio <- function(bsseq, regions = NULL, prior_count = 2, drop_na = TRUE) { |
|
82 | 83 |
if (prior_count < 1) { |
83 | 84 |
warning("prior_count of 1 or higher is recommended") |
84 | 85 |
} |
... | ... |
@@ -99,6 +100,11 @@ bsseq_to_log_methy_ratio <- function(bsseq, regions = NULL, prior_count = 2) { |
99 | 100 |
|
100 | 101 |
dimnames(log_mat) <- list(row_names, col_names) |
101 | 102 |
|
103 |
+ if (drop_na) { |
|
104 |
+ # drop rows that are all NA |
|
105 |
+ log_mat <- log_mat[!apply(is.na(log_mat), 1, all), ] |
|
106 |
+ } |
|
107 |
+ |
|
102 | 108 |
log_mat |
103 | 109 |
} |
104 | 110 |
|
... | ... |
@@ -13,7 +13,14 @@ |
13 | 13 |
#' edger_mat <- bsseq_to_edger(bsseq) |
14 | 14 |
bsseq_to_edger <- function(bsseq, regions = NULL) { |
15 | 15 |
edger_col_names <- .get_edger_col_names(bsseq) |
16 |
- edger_row_names <- .get_edger_row_names(bsseq) |
|
16 |
+ |
|
17 |
+ if (!is.null(regions)) { |
|
18 |
+ assertthat::assert_that(is(regions, "data.frame")) |
|
19 |
+ regions <- GenomicRanges::GRanges(regions) |
|
20 |
+ edger_row_names <- regions$gene_id |
|
21 |
+ } else { |
|
22 |
+ edger_row_names <- .get_edger_row_names(bsseq) |
|
23 |
+ } |
|
17 | 24 |
|
18 | 25 |
# construct matrix |
19 | 26 |
methylated <- .get_me_mat(bsseq, regions) |
... | ... |
@@ -34,6 +41,24 @@ bsseq_to_edger <- function(bsseq, regions = NULL) { |
34 | 41 |
edger_mat |
35 | 42 |
} |
36 | 43 |
|
44 |
+#' Convert NanoMethResult object to edgeR methylation matrix |
|
45 |
+#' |
|
46 |
+#' @inheritParams methy_to_bsseq |
|
47 |
+#' @param regions the regions to calculate log-methylation ratios over. If left |
|
48 |
+#' NULL, ratios will be calculated per site. |
|
49 |
+#' |
|
50 |
+#' @return a matrix compatible with the edgeR differential methylation pipeline |
|
51 |
+#' @export |
|
52 |
+#' |
|
53 |
+#' @examples |
|
54 |
+#' nmr <- load_example_nanomethresult() |
|
55 |
+#' edger_mat <- methy_to_edger(nmr) |
|
56 |
+#' |
|
57 |
+methy_to_edger <- function(methy, regions = NULL, out_folder = tempdir(), verbose = TRUE) { |
|
58 |
+ bsseq <- methy_to_bsseq(methy = methy, out_folder = out_folder, verbose = verbose) |
|
59 |
+ bsseq_to_edger(bsseq, regions = regions) |
|
60 |
+} |
|
61 |
+ |
|
37 | 62 |
#' Convert BSseq object to log-methylation-ratio matrix |
38 | 63 |
#' |
39 | 64 |
#' Creates a log-methylation-ratio matrix from a BSseq object that is useful for |
... | ... |
@@ -1,6 +1,8 @@ |
1 | 1 |
#' Convert BSseq object to edgeR methylation matrix |
2 | 2 |
#' |
3 | 3 |
#' @param bsseq the BSseq object. |
4 |
+#' @param regions the regions to calculate log-methylation ratios over. If left NULL, ratios will be calculated per |
|
5 |
+#' site. |
|
4 | 6 |
#' |
5 | 7 |
#' @return a matrix compatible with the edgeR differential methylation pipeline |
6 | 8 |
#' @export |
... | ... |
@@ -9,13 +11,13 @@ |
9 | 11 |
#' methy <- system.file("methy_subset.tsv.bgz", package = "NanoMethViz") |
10 | 12 |
#' bsseq <- methy_to_bsseq(methy) |
11 | 13 |
#' edger_mat <- bsseq_to_edger(bsseq) |
12 |
-bsseq_to_edger <- function(bsseq) { |
|
14 |
+bsseq_to_edger <- function(bsseq, regions = NULL) { |
|
13 | 15 |
edger_col_names <- .get_edger_col_names(bsseq) |
14 | 16 |
edger_row_names <- .get_edger_row_names(bsseq) |
15 | 17 |
|
16 | 18 |
# construct matrix |
17 |
- methylated <- .get_me_mat(bsseq) |
|
18 |
- unmethylated <- .get_un_mat(bsseq) |
|
19 |
+ methylated <- .get_me_mat(bsseq, regions) |
|
20 |
+ unmethylated <- .get_un_mat(bsseq, regions) |
|
19 | 21 |
|
20 | 22 |
edger_mat <- matrix( |
21 | 23 |
0, |
... | ... |
@@ -38,7 +40,8 @@ bsseq_to_edger <- function(bsseq) { |
38 | 40 |
#' dimensionality reduction plots. |
39 | 41 |
#' |
40 | 42 |
#' @param bsseq the BSseq object. |
41 |
-#' @param regions the regions to calculate log-methylation ratios over. If left NULL, ratios will be calculated per site. |
|
43 |
+#' @param regions the regions to calculate log-methylation ratios over. If left NULL, ratios will be calculated per |
|
44 |
+#' site. |
|
42 | 45 |
#' @param prior_count the prior count added to avoid taking log of 0. |
43 | 46 |
#' |
44 | 47 |
#' @return a matrix containing log-methylation-ratios. |
... | ... |
@@ -49,6 +52,7 @@ bsseq_to_edger <- function(bsseq) { |
49 | 52 |
#' bsseq <- methy_to_bsseq(nmr) |
50 | 53 |
#' regions <- exons_to_genes(NanoMethViz::exons(nmr)) |
51 | 54 |
#' log_m_ratio <- bsseq_to_log_methy_ratio(bsseq, regions) |
55 |
+ |
|
52 | 56 |
bsseq_to_log_methy_ratio <- function(bsseq, regions = NULL, prior_count = 2) { |
53 | 57 |
if (prior_count < 1) { |
54 | 58 |
warning("prior_count of 1 or higher is recommended") |
... | ... |
@@ -38,24 +38,33 @@ bsseq_to_edger <- function(bsseq) { |
38 | 38 |
#' dimensionality reduction plots. |
39 | 39 |
#' |
40 | 40 |
#' @param bsseq the BSseq object. |
41 |
+#' @param regions the regions to calculate log-methylation ratios over. If left NULL, ratios will be calculated per site. |
|
41 | 42 |
#' @param prior_count the prior count added to avoid taking log of 0. |
42 | 43 |
#' |
43 | 44 |
#' @return a matrix containing log-methylation-ratios. |
44 | 45 |
#' @export |
45 | 46 |
#' |
46 | 47 |
#' @examples |
47 |
-#' methy <- system.file("methy_subset.tsv.bgz", package = "NanoMethViz") |
|
48 |
-#' bsseq <- methy_to_bsseq(methy) |
|
49 |
-#' log_m_ratio <- bsseq_to_log_methy_ratio(bsseq) |
|
50 |
-bsseq_to_log_methy_ratio <- function(bsseq, prior_count = 2) { |
|
48 |
+#' nmr <- load_example_nanomethresult() |
|
49 |
+#' bsseq <- methy_to_bsseq(nmr) |
|
50 |
+#' regions <- exons_to_genes(NanoMethViz::exons(nmr)) |
|
51 |
+#' log_m_ratio <- bsseq_to_log_methy_ratio(bsseq, regions) |
|
52 |
+bsseq_to_log_methy_ratio <- function(bsseq, regions = NULL, prior_count = 2) { |
|
51 | 53 |
if (prior_count < 1) { |
52 | 54 |
warning("prior_count of 1 or higher is recommended") |
53 | 55 |
} |
54 | 56 |
|
57 |
+ if (!is.null(regions)) { |
|
58 |
+ assertthat::assert_that(is(regions, "data.frame")) |
|
59 |
+ regions <- GenomicRanges::GRanges(regions) |
|
60 |
+ row_names <- regions$gene_id |
|
61 |
+ } else { |
|
62 |
+ row_names <- .get_edger_row_names(bsseq) |
|
63 |
+ } |
|
64 |
+ |
|
55 | 65 |
col_names <- SummarizedExperiment::colData(bsseq)$sample |
56 |
- row_names <- .get_edger_row_names(bsseq) |
|
57 |
- methylated <- .get_me_mat(bsseq) |
|
58 |
- unmethylated <- .get_un_mat(bsseq) |
|
66 |
+ methylated <- .get_me_mat(bsseq, regions) |
|
67 |
+ unmethylated <- .get_un_mat(bsseq, regions) |
|
59 | 68 |
|
60 | 69 |
log_mat <- log2(methylated + prior_count) - log2(unmethylated + prior_count) |
61 | 70 |
|
... | ... |
@@ -64,13 +73,21 @@ bsseq_to_log_methy_ratio <- function(bsseq, prior_count = 2) { |
64 | 73 |
log_mat |
65 | 74 |
} |
66 | 75 |
|
67 |
-.get_me_mat <- function(bsseq) { |
|
68 |
- bsseq::getBSseq(bsseq, type = "M") |
|
76 |
+.get_me_mat <- function(bsseq, regions) { |
|
77 |
+ if (!is.null(regions)) { |
|
78 |
+ bsseq::getCoverage(bsseq, regions, type = "M", what = "perRegionTotal") |
|
79 |
+ } else { |
|
80 |
+ bsseq::getCoverage(bsseq, type = "M") |
|
81 |
+ } |
|
69 | 82 |
} |
70 | 83 |
|
71 |
-.get_un_mat <- function(bsseq) { |
|
72 |
- cov <- bsseq::getBSseq(bsseq, type = "Cov") |
|
73 |
- me_mat <- .get_me_mat(bsseq) |
|
84 |
+.get_un_mat <- function(bsseq, regions) { |
|
85 |
+ if (!is.null(regions)) { |
|
86 |
+ cov <- bsseq::getCoverage(bsseq, regions, type = "Cov", what = "perRegionTotal") |
|
87 |
+ } else { |
|
88 |
+ cov <- bsseq::getCoverage(bsseq, type = "Cov") |
|
89 |
+ } |
|
90 |
+ me_mat <- .get_me_mat(bsseq, regions) |
|
74 | 91 |
cov - me_mat |
75 | 92 |
} |
76 | 93 |
|
... | ... |
@@ -46,7 +46,7 @@ bsseq_to_edger <- function(bsseq) { |
46 | 46 |
#' @examples |
47 | 47 |
#' methy <- system.file("methy_subset.tsv.bgz", package = "NanoMethViz") |
48 | 48 |
#' bsseq <- methy_to_bsseq(methy) |
49 |
-#' log_m_ratio <- bsseq_to_methy_log_ratio(bsseq) |
|
49 |
+#' log_m_ratio <- bsseq_to_log_methy_ratio(bsseq) |
|
50 | 50 |
bsseq_to_log_methy_ratio <- function(bsseq, prior_count = 2) { |
51 | 51 |
if (prior_count < 1) { |
52 | 52 |
warning("prior_count of 1 or higher is recommended") |
... | ... |
@@ -48,8 +48,8 @@ bsseq_to_edger <- function(bsseq) { |
48 | 48 |
#' bsseq <- methy_to_bsseq(methy) |
49 | 49 |
#' log_m_ratio <- bsseq_to_methy_log_ratio(bsseq) |
50 | 50 |
bsseq_to_log_methy_ratio <- function(bsseq, prior_count = 2) { |
51 |
- if (prior_count <= 1) { |
|
52 |
- warning("prior_count >1 is recommended") |
|
51 |
+ if (prior_count < 1) { |
|
52 |
+ warning("prior_count of 1 or higher is recommended") |
|
53 | 53 |
} |
54 | 54 |
|
55 | 55 |
col_names <- SummarizedExperiment::colData(bsseq)$sample |
... | ... |
@@ -49,7 +49,7 @@ bsseq_to_edger <- function(bsseq) { |
49 | 49 |
#' log_m_ratio <- bsseq_to_methy_log_ratio(bsseq) |
50 | 50 |
bsseq_to_log_methy_ratio <- function(bsseq, prior_count = 2) { |
51 | 51 |
if (prior_count <= 1) { |
52 |
- warning("prior_count should be > 1 to avoid division by 0") |
|
52 |
+ warning("prior_count >1 is recommended") |
|
53 | 53 |
} |
54 | 54 |
|
55 | 55 |
col_names <- SummarizedExperiment::colData(bsseq)$sample |
... | ... |
@@ -48,6 +48,10 @@ bsseq_to_edger <- function(bsseq) { |
48 | 48 |
#' bsseq <- methy_to_bsseq(methy) |
49 | 49 |
#' log_m_ratio <- bsseq_to_methy_log_ratio(bsseq) |
50 | 50 |
bsseq_to_log_methy_ratio <- function(bsseq, prior_count = 2) { |
51 |
+ if (prior_count <= 1) { |
|
52 |
+ warning("prior_count should be > 1 to avoid division by 0") |
|
53 |
+ } |
|
54 |
+ |
|
51 | 55 |
col_names <- SummarizedExperiment::colData(bsseq)$sample |
52 | 56 |
row_names <- .get_edger_row_names(bsseq) |
53 | 57 |
methylated <- .get_me_mat(bsseq) |
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,93 @@ |
1 |
+#' Convert BSseq object to edgeR methylation matrix |
|
2 |
+#' |
|
3 |
+#' @param bsseq the BSseq object. |
|
4 |
+#' |
|
5 |
+#' @return a matrix compatible with the edgeR differential methylation pipeline |
|
6 |
+#' @export |
|
7 |
+#' |
|
8 |
+#' @examples |
|
9 |
+#' methy <- system.file("methy_subset.tsv.bgz", package = "NanoMethViz") |
|
10 |
+#' bsseq <- methy_to_bsseq(methy) |
|
11 |
+#' edger_mat <- bsseq_to_edger(bsseq) |
|
12 |
+bsseq_to_edger <- function(bsseq) { |
|
13 |
+ edger_col_names <- .get_edger_col_names(bsseq) |
|
14 |
+ edger_row_names <- .get_edger_row_names(bsseq) |
|
15 |
+ |
|
16 |
+ # construct matrix |
|
17 |
+ methylated <- .get_me_mat(bsseq) |
|
18 |
+ unmethylated <- .get_un_mat(bsseq) |
|
19 |
+ |
|
20 |
+ edger_mat <- matrix( |
|
21 |
+ 0, |
|
22 |
+ ncol = 2*ncol(methylated), |
|
23 |
+ nrow = nrow(methylated), |
|
24 |
+ dimnames = list(edger_row_names, edger_col_names) |
|
25 |
+ ) |
|
26 |
+ |
|
27 |
+ for (i in 0:(ncol(methylated) - 1)) { |
|
28 |
+ edger_mat[, 2*i + 1] <- methylated[, i + 1] |
|
29 |
+ edger_mat[, 2*i + 2] <- unmethylated[, i + 1] |
|
30 |
+ } |
|
31 |
+ |
|
32 |
+ edger_mat |
|
33 |
+} |
|
34 |
+ |
|
35 |
+#' Convert BSseq object to log-methylation-ratio matrix |
|
36 |
+#' |
|
37 |
+#' Creates a log-methylation-ratio matrix from a BSseq object that is useful for |
|
38 |
+#' dimensionality reduction plots. |
|
39 |
+#' |
|
40 |
+#' @param bsseq the BSseq object. |
|
41 |
+#' @param prior_count the prior count added to avoid taking log of 0. |
|
42 |
+#' |
|
43 |
+#' @return a matrix containing log-methylation-ratios. |
|
44 |
+#' @export |
|
45 |
+#' |
|
46 |
+#' @examples |
|
47 |
+#' methy <- system.file("methy_subset.tsv.bgz", package = "NanoMethViz") |
|
48 |
+#' bsseq <- methy_to_bsseq(methy) |
|
49 |
+#' log_m_ratio <- bsseq_to_methy_log_ratio(bsseq) |
|
50 |
+bsseq_to_log_methy_ratio <- function(bsseq, prior_count = 2) { |
|
51 |
+ col_names <- SummarizedExperiment::colData(bsseq)$sample |
|
52 |
+ row_names <- .get_edger_row_names(bsseq) |
|
53 |
+ methylated <- .get_me_mat(bsseq) |
|
54 |
+ unmethylated <- .get_un_mat(bsseq) |
|
55 |
+ |
|
56 |
+ log_mat <- log2(methylated + prior_count) - log2(unmethylated + prior_count) |
|
57 |
+ |
|
58 |
+ dimnames(log_mat) <- list(row_names, col_names) |
|
59 |
+ |
|
60 |
+ log_mat |
|
61 |
+} |
|
62 |
+ |
|
63 |
+.get_me_mat <- function(bsseq) { |
|
64 |
+ bsseq::getBSseq(bsseq, type = "M") |
|
65 |
+} |
|
66 |
+ |
|
67 |
+.get_un_mat <- function(bsseq) { |
|
68 |
+ cov <- bsseq::getBSseq(bsseq, type = "Cov") |
|
69 |
+ me_mat <- .get_me_mat(bsseq) |
|
70 |
+ cov - me_mat |
|
71 |
+} |
|
72 |
+ |
|
73 |
+.get_edger_col_names <- function(bsseq) { |
|
74 |
+ samples <- SummarizedExperiment::colData(bsseq)$sample |
|
75 |
+ me_names <- paste0(samples, "_Me") |
|
76 |
+ un_names <- paste0(samples, "_Un") |
|
77 |
+ edger_col_names <- character(2 * length(samples)) |
|
78 |
+ for (i in 0:(length(samples) - 1)) { |
|
79 |
+ edger_col_names[2*i + 1] <- me_names[i + 1] |
|
80 |
+ edger_col_names[2*i + 2] <- un_names[i + 1] |
|
81 |
+ } |
|
82 |
+ |
|
83 |
+ edger_col_names |
|
84 |
+} |
|
85 |
+ |
|
86 |
+.get_edger_row_names <- function(bsseq) { |
|
87 |
+ gr <- bsseq::getBSseq(bsseq, type = "gr") |
|
88 |
+ seq <- as.character(SummarizedExperiment::seqnames(gr)) |
|
89 |
+ pos <- as.integer(SummarizedExperiment::start(gr)) |
|
90 |
+ edger_row_names <- paste(seq, pos, sep = "-") |
|
91 |
+ |
|
92 |
+ edger_row_names |
|
93 |
+} |