... | ... |
@@ -1,13 +1,20 @@ |
1 | 1 |
#' Plot aggregate regions |
2 | 2 |
#' |
3 |
-#' @param x the NanoMethResults object. |
|
3 |
+#' @param x the NanoMethResult object. |
|
4 | 4 |
#' @param regions a table of regions or GRanges, or a list of such objects. The table of regions must contain chr, start and end columns. |
5 | 5 |
#' @param groups if 'features' is a list, a vector of characters of the same length as the list containing names for each member. |
6 | 6 |
#' @param flank the number of flanking bases to add to each side of each region. |
7 |
+#' @param span the span for loess smoothing. |
|
7 | 8 |
#' |
8 | 9 |
#' @return |
9 | 10 |
#' @export |
10 |
-plot_aggregate_regions <- function(x, regions, groups = NULL, flank = 2000, stranded = TRUE) { |
|
11 |
+plot_agg_regions <- function( |
|
12 |
+ x, |
|
13 |
+ regions, |
|
14 |
+ groups = NULL, |
|
15 |
+ flank = 2000, |
|
16 |
+ stranded = TRUE |
|
17 |
+) { |
|
11 | 18 |
is_df_or_granges <- function(x) { |
12 | 19 |
is.data.frame(x) || is(x, "GRanges") |
13 | 20 |
} |
... | ... |
@@ -63,7 +70,7 @@ plot_aggregate_regions <- function(x, regions, groups = NULL, flank = 2000, stra |
63 | 70 |
ggplot2::theme_minimal() |
64 | 71 |
|
65 | 72 |
# take binned means |
66 |
- grid_size <- 2^10 |
|
73 |
+ grid_size <- 2^12 |
|
67 | 74 |
binned_pos <- seq(-1.1/3, 1 + 1.1/3, length.out = grid_size+ 2)[2:(1 + grid_size)] |
68 | 75 |
methy_data <- methy_data %>% |
69 | 76 |
mutate(interval = cut(rel_pos, breaks = grid_size)) %>% |
... | ... |
@@ -72,11 +79,9 @@ plot_aggregate_regions <- function(x, regions, groups = NULL, flank = 2000, stra |
72 | 79 |
ungroup() %>% |
73 | 80 |
mutate(rel_pos = binned_pos) |
74 | 81 |
|
75 |
- span <- 0.10 |
|
76 |
- |
|
77 | 82 |
if (!is.null(groups)) { |
78 | 83 |
p <- p + |
79 |
- .geom_smooth(methy_data, sspan = span, group = TRUE) + |
|
84 |
+ .agg_geom_smooth(methy_data, span = span, group = TRUE) + |
|
80 | 85 |
ggplot2::scale_colour_brewer(palette = "Set1") + |
81 | 86 |
ggplot2::coord_cartesian(clip = "off") |
82 | 87 |
} else { |
... | ... |
@@ -100,12 +105,114 @@ plot_aggregate_regions <- function(x, regions, groups = NULL, flank = 2000, stra |
100 | 105 |
ggplot2::ylab("Average Methylation Probability") |
101 | 106 |
} |
102 | 107 |
|
108 |
+#' Plot aggregate regions with grouped samples |
|
109 |
+#' |
|
110 |
+#' @param x the NanoMethResult object. |
|
111 |
+#' @param regions a table of regions or GRanges. |
|
112 |
+#' @param flank the number of flanking bases to add to each side of each region. |
|
113 |
+#' @param span the span for loess smoothing. |
|
114 |
+#' |
|
115 |
+#' @return |
|
116 |
+#' @export |
|
117 |
+plot_agg_regions_sample_grouped <- function( |
|
118 |
+ x, |
|
119 |
+ regions, |
|
120 |
+ flank = 2000, |
|
121 |
+ stranded = TRUE, |
|
122 |
+ span = 0.05 |
|
123 |
+) { |
|
124 |
+ is_df_or_granges <- function(x) { |
|
125 |
+ is.data.frame(x) || is(x, "GRanges") |
|
126 |
+ } |
|
127 |
+ |
|
128 |
+ assert_that(is_df_or_granges(regions) || is.list(regions)) |
|
129 |
+ |
|
130 |
+ # convert GRanges to tibble |
|
131 |
+ if (is(regions, "GRanges")) { |
|
132 |
+ regions <- tibble::as_tibble(regions) %>% |
|
133 |
+ dplyr::rename(chr = "seqnames") |
|
134 |
+ } |
|
135 |
+ |
|
136 |
+ # convert data.frame regions to single element list |
|
137 |
+ if (!is.null(dim(regions))) { |
|
138 |
+ regions <- list(regions) |
|
139 |
+ } |
|
140 |
+ |
|
141 |
+ # query methylation data |
|
142 |
+ methy_data <- purrr::map( |
|
143 |
+ regions, |
|
144 |
+ function(features) { |
|
145 |
+ .get_features_with_rel_pos( |
|
146 |
+ features, |
|
147 |
+ methy = methy(x), |
|
148 |
+ flank = flank, |
|
149 |
+ stranded = stranded, |
|
150 |
+ by_sample = TRUE) %>% |
|
151 |
+ dplyr::bind_rows() |
|
152 |
+ } |
|
153 |
+ ) |
|
154 |
+ |
|
155 |
+ methy_data <- methy_data %>% |
|
156 |
+ dplyr::bind_rows() |
|
157 |
+ |
|
158 |
+ methy_data <- methy_data %>% |
|
159 |
+ dplyr::group_by(sample, .data$rel_pos) %>% |
|
160 |
+ dplyr::summarise(methy_prob = mean(.data$methy_prob)) %>% |
|
161 |
+ dplyr::ungroup() |
|
162 |
+ |
|
163 |
+ # set up plot |
|
164 |
+ p <- ggplot2::ggplot() + |
|
165 |
+ ggplot2::ylim(c(0, 1)) + |
|
166 |
+ ggplot2::theme_minimal() |
|
167 |
+ |
|
168 |
+ # take binned means |
|
169 |
+ grid_size <- 2^12 |
|
170 |
+ binned_pos_df <- tibble::tibble( |
|
171 |
+ interval = levels(cut(methy_data$rel_pos, breaks = grid_size)), |
|
172 |
+ binned_pos = seq(-1.1/3, 1 + 1.1/3, length.out = grid_size+ 2)[2:(1 + grid_size)] |
|
173 |
+ ) |
|
174 |
+ |
|
175 |
+ methy_data <- methy_data %>% |
|
176 |
+ mutate(interval = cut(rel_pos, breaks = grid_size)) %>% |
|
177 |
+ group_by(sample, interval) %>% |
|
178 |
+ summarise(methy_prob = mean(methy_prob)) %>% |
|
179 |
+ ungroup() %>% |
|
180 |
+ left_join(binned_pos_df, by = "interval") %>% |
|
181 |
+ mutate(rel_pos = binned_pos) %>% |
|
182 |
+ left_join(samples(x), by = "sample") |
|
183 |
+ |
|
184 |
+ p <- p + |
|
185 |
+ ggplot2::stat_smooth( |
|
186 |
+ aes(x = .data$rel_pos, y = .data$methy_prob, group = haplotype, col = haplotype), |
|
187 |
+ method = "loess", |
|
188 |
+ span = span, |
|
189 |
+ na.rm = TRUE, |
|
190 |
+ se = FALSE, |
|
191 |
+ data = methy_data) + |
|
192 |
+ ggplot2::scale_colour_brewer(palette = "Set1") + |
|
193 |
+ ggplot2::coord_cartesian(clip = "off") |
|
194 |
+ |
|
195 |
+ kb_marker <- round(flank / 1000, 1) |
|
196 |
+ |
|
197 |
+ labels <- c(glue::glue("-{kb_marker}kb"), "start", "end", glue::glue("+{kb_marker}kb")) |
|
198 |
+ |
|
199 |
+ p + |
|
200 |
+ geom_vline(xintercept = 0, linetype = "dashed", color = "grey80") + |
|
201 |
+ geom_vline(xintercept = 1, linetype = "dashed", color = "grey80") + |
|
202 |
+ ggplot2::scale_x_continuous( |
|
203 |
+ name = "Relative Position", |
|
204 |
+ breaks = c(-.33, 0, 1, 1.33), |
|
205 |
+ limits = c(-0.33, 1.33), |
|
206 |
+ labels = labels) + |
|
207 |
+ ggplot2::ylab("Average Methylation Probability") |
|
208 |
+} |
|
209 |
+ |
|
103 | 210 |
.split_rows <- function(x) { |
104 | 211 |
split(x, 1:nrow(x)) |
105 | 212 |
} |
106 | 213 |
|
107 | 214 |
#' @importFrom purrr map2 |
108 |
-.get_features_with_rel_pos <- function(features, methy, flank, stranded, feature_ids = NULL) { |
|
215 |
+.get_features_with_rel_pos <- function(features, methy, flank, stranded, by_sample = FALSE, feature_ids = NULL) { |
|
109 | 216 |
.scale_to_feature <- function(x, feature, stranded = stranded) { |
110 | 217 |
within <- x >= feature$start & x <= feature$end |
111 | 218 |
upstream <- x < feature$start |
... | ... |
@@ -128,6 +235,10 @@ plot_aggregate_regions <- function(x, regions, groups = NULL, flank = 2000, stra |
128 | 235 |
features$end + flank * 1.10, |
129 | 236 |
simplify = FALSE) |
130 | 237 |
|
238 |
+ empty <- purrr::map_lgl(out, function(x) nrow(x) == 0) |
|
239 |
+ out <- out[!empty] |
|
240 |
+ features <- features[!empty, ] |
|
241 |
+ |
|
131 | 242 |
out <- purrr::map2(out, .split_rows(features), function(x, y) { |
132 | 243 |
if (nrow(x) == 0) { |
133 | 244 |
return(tibble::add_column(x, rel_pos = numeric())) |
... | ... |
@@ -135,10 +246,17 @@ plot_aggregate_regions <- function(x, regions, groups = NULL, flank = 2000, stra |
135 | 246 |
|
136 | 247 |
x$rel_pos <- .scale_to_feature(x$pos, y, stranded = stranded) |
137 | 248 |
|
138 |
- x %>% |
|
139 |
- group_by(rel_pos) %>% |
|
140 |
- summarise(methy_prob = mean(e1071::sigmoid(statistic))) %>% |
|
141 |
- ungroup() |
|
249 |
+ if (by_sample) { |
|
250 |
+ x %>% |
|
251 |
+ group_by(sample, rel_pos) %>% |
|
252 |
+ summarise(methy_prob = mean(e1071::sigmoid(statistic))) %>% |
|
253 |
+ ungroup() |
|
254 |
+ } else { |
|
255 |
+ x %>% |
|
256 |
+ group_by(rel_pos) %>% |
|
257 |
+ summarise(methy_prob = mean(e1071::sigmoid(statistic))) %>% |
|
258 |
+ ungroup() |
|
259 |
+ } |
|
142 | 260 |
}) |
143 | 261 |
|
144 | 262 |
if (!is.null(feature_ids)) { |
... | ... |
@@ -149,7 +267,7 @@ plot_aggregate_regions <- function(x, regions, groups = NULL, flank = 2000, stra |
149 | 267 |
out |
150 | 268 |
} |
151 | 269 |
|
152 |
-.geom_smooth <- function(data, span, group) { |
|
270 |
+.agg_geom_smooth <- function(data, span, group) { |
|
153 | 271 |
if (group) { |
154 | 272 |
ggplot2::stat_smooth( |
155 | 273 |
aes(x = .data$rel_pos, |