Browse code

Fixed hardcoded haplotype grouping and broken sparse data behavior

Shians authored on 09/03/2021 01:31:42
Showing 1 changed files

... ...
@@ -12,6 +12,11 @@
12 12
 #'
13 13
 #' @return a ggplot object containing the aggregate methylation trend.
14 14
 #'
15
+#' @examples
16
+#' nmr <- load_example_nanomethresult()
17
+#'
18
+#' plot_agg_regions(nmr, NanoMethViz::exons(nmr))
19
+#'
15 20
 #' @export
16 21
 plot_agg_regions <- function(
17 22
     x,
... ...
@@ -58,6 +63,7 @@ plot_agg_regions <- function(
58 63
     methy_data <- methy_data %>%
59 64
         dplyr::bind_rows(.id = "group")
60 65
 
66
+    # average methylation across relative coordinates
61 67
     if (!is.null(groups)) {
62 68
         methy_data <- methy_data %>%
63 69
         dplyr::group_by(.data$group, .data$rel_pos) %>%
... ...
@@ -70,39 +76,34 @@ plot_agg_regions <- function(
70 76
             dplyr::ungroup()
71 77
     }
72 78
 
79
+    # take binned means
80
+    grid_size <- 2^12
81
+    binned_pos_df <- .get_grid(grid_size = 2^12, rel_pos = methy_data$rel_pos)
82
+
83
+    kb_marker <- round(flank / 1000, 1)
84
+    labels <- c(glue::glue("-{kb_marker}kb"), "start", "end", glue::glue("+{kb_marker}kb"))
85
+
86
+    methy_data <- methy_data %>%
87
+        dplyr::mutate(interval = cut(.data$rel_pos, breaks = grid_size)) %>%
88
+        dplyr::group_by(.data$interval, .drop = TRUE) %>%
89
+        dplyr::summarise(methy_prob = mean(.data$methy_prob)) %>%
90
+        dplyr::ungroup() %>%
91
+        dplyr::left_join(binned_pos_df, by = "interval")
92
+
73 93
     # set up plot
74 94
     p <- ggplot2::ggplot() +
75 95
         ggplot2::ylim(c(0, 1)) +
76
-        ggplot2::theme_minimal()
96
+        ggplot2::theme_minimal() +
97
+        .agg_geom_smooth(methy_data, span = span, group = !is.null(groups))
77 98
 
78
-    # take binned means
79
-    grid_size <- 2^12
80
-    binned_pos <- seq(-1.1/3, 1 + 1.1/3, length.out = grid_size+ 2)[2:(1 + grid_size)]
81
-    methy_data <- methy_data %>%
82
-        mutate(interval = cut(.data$rel_pos, breaks = grid_size)) %>%
83
-        group_by(.data$interval, .drop = TRUE) %>%
84
-        summarise(methy_prob = mean(.data$methy_prob)) %>%
85
-        ungroup() %>%
86
-        mutate(rel_pos = .data$binned_pos)
87 99
 
88 100
     if (!is.null(groups)) {
89
-        p <- p +
90
-            .agg_geom_smooth(methy_data, span = span, group = TRUE) +
91
-            ggplot2::scale_colour_brewer(palette = "Set1") +
92
-            ggplot2::coord_cartesian(clip = "off")
93
-    } else {
94
-        p <- p +
95
-            .agg_geom_smooth(methy_data, span = span, group = FALSE) +
96
-            ggplot2::coord_cartesian(clip = "off")
101
+        p <- p + ggplot2::scale_colour_brewer(palette = "Set1")
97 102
     }
98 103
 
99
-    kb_marker <- round(flank / 1000, 1)
100
-
101
-    labels <- c(glue::glue("-{kb_marker}kb"), "start", "end", glue::glue("+{kb_marker}kb"))
102
-
103
-    p +
104
-        geom_vline(xintercept = 0, linetype = "dashed", color = "grey80") +
105
-        geom_vline(xintercept = 1, linetype = "dashed", color = "grey80") +
104
+    p + ggplot2::coord_cartesian(clip = "off") +
105
+        ggplot2::geom_vline(xintercept = 0, linetype = "dashed", color = "grey80") +
106
+        ggplot2::geom_vline(xintercept = 1, linetype = "dashed", color = "grey80") +
106 107
         ggplot2::scale_x_continuous(
107 108
             name = "Relative Position",
108 109
             breaks = c(-.33, 0, 1, 1.33),
... ...
@@ -117,6 +118,11 @@ plot_agg_regions <- function(
117 118
 #'
118 119
 #' @return a ggplot object containing the aggregate methylation trend.
119 120
 #'
121
+#' @examples
122
+#' nmr <- load_example_nanomethresult()
123
+#'
124
+#' plot_agg_regions_sample_grouped(nmr, NanoMethViz::exons(nmr))
125
+#'
120 126
 #' @export
121 127
 plot_agg_regions_sample_grouped <- function(
122 128
     x,
... ...
@@ -157,24 +163,14 @@ plot_agg_regions_sample_grouped <- function(
157 163
     )
158 164
 
159 165
     methy_data <- methy_data %>%
160
-        dplyr::bind_rows()
161
-
162
-    methy_data <- methy_data %>%
166
+        dplyr::bind_rows() %>%
163 167
         dplyr::group_by(sample, .data$rel_pos) %>%
164 168
         dplyr::summarise(methy_prob = mean(.data$methy_prob)) %>%
165 169
         dplyr::ungroup()
166 170
 
167
-    # set up plot
168
-    p <- ggplot2::ggplot() +
169
-        ggplot2::ylim(c(0, 1)) +
170
-        ggplot2::theme_minimal()
171
-
172 171
     # take binned means
173 172
     grid_size <- 2^12
174
-    binned_pos_df <- tibble::tibble(
175
-        interval = levels(cut(methy_data$rel_pos, breaks = grid_size)),
176
-        binned_pos = seq(-1.1/3, 1 + 1.1/3, length.out = grid_size+ 2)[2:(1 + grid_size)]
177
-    )
173
+    binned_pos_df <- .get_grid(grid_size = grid_size, rel_pos = methy_data$rel_pos)
178 174
 
179 175
     methy_data <- methy_data %>%
180 176
         dplyr::mutate(interval = cut(.data$rel_pos, breaks = grid_size)) %>%
... ...
@@ -185,24 +181,30 @@ plot_agg_regions_sample_grouped <- function(
185 181
         dplyr::mutate(rel_pos = .data$binned_pos) %>%
186 182
         dplyr::left_join(samples(x), by = "sample")
187 183
 
188
-    p <- p +
184
+    methy_data <- dplyr::left_join(methy_data, samples(x), by = c("sample", "group"))
185
+
186
+    kb_marker <- round(flank / 1000, 1)
187
+
188
+    labels <- c(glue::glue("-{kb_marker}kb"), "start", "end", glue::glue("+{kb_marker}kb"))
189
+
190
+    # set up plot
191
+    ggplot2::ggplot() +
192
+        ggplot2::ylim(c(0, 1)) +
193
+        ggplot2::theme_minimal() +
194
+        # smoothed line
189 195
         ggplot2::stat_smooth(
190
-            aes(x = .data$rel_pos, y = .data$methy_prob, group = .data$haplotype, col = .data$haplotype),
196
+            aes(x = .data$rel_pos, y = .data$methy_prob, group = .data$group, col = .data$group),
191 197
             method = "loess",
198
+            formula = "y ~ x",
192 199
             span = span,
193 200
             na.rm = TRUE,
194 201
             se = FALSE,
195 202
             data = methy_data) +
196 203
         ggplot2::scale_colour_brewer(palette = "Set1") +
197
-        ggplot2::coord_cartesian(clip = "off")
198
-
199
-    kb_marker <- round(flank / 1000, 1)
200
-
201
-    labels <- c(glue::glue("-{kb_marker}kb"), "start", "end", glue::glue("+{kb_marker}kb"))
202
-
203
-    p +
204
-        geom_vline(xintercept = 0, linetype = "dashed", color = "grey80") +
205
-        geom_vline(xintercept = 1, linetype = "dashed", color = "grey80") +
204
+        ggplot2::coord_cartesian(clip = "off") +
205
+        # start and end vertical dashes
206
+        ggplot2::geom_vline(xintercept = 0, linetype = "dashed", color = "grey80") +
207
+        ggplot2::geom_vline(xintercept = 1, linetype = "dashed", color = "grey80") +
206 208
         ggplot2::scale_x_continuous(
207 209
             name = "Relative Position",
208 210
             breaks = c(-.33, 0, 1, 1.33),
... ...
@@ -273,22 +275,35 @@ plot_agg_regions_sample_grouped <- function(
273 275
 .agg_geom_smooth <- function(data, span, group) {
274 276
     if (group) {
275 277
         ggplot2::stat_smooth(
276
-            aes(x = .data$rel_pos,
278
+            aes(x = .data$binned_pos,
277 279
                 y = .data$methy_prob,
278 280
                 group = .data$group,
279 281
                 col = .data$group),
280 282
             method = "loess",
283
+            formula = "y ~ x",
281 284
             span = span,
282 285
             na.rm = TRUE,
283 286
             se = FALSE,
284 287
             data = data)
285 288
     } else {
286 289
         ggplot2::stat_smooth(
287
-            aes(x = .data$rel_pos, y = .data$methy_prob),
290
+            aes(x = .data$binned_pos, y = .data$methy_prob),
288 291
             method = "loess",
292
+            formula = "y ~ x",
289 293
             span = span,
290 294
             na.rm = TRUE,
291 295
             se = FALSE,
292 296
             data = data)
293 297
     }
294 298
 }
299
+
300
+.get_grid <- function(grid_size, rel_pos) {
301
+    grid_size <- 2^12
302
+    binned_pos <- seq(-1.1/3, 1 + 1.1/3, length.out = grid_size + 2)[-c(1, grid_size + 2)]
303
+    binned_intervals <- levels(cut(rel_pos, breaks = grid_size))
304
+
305
+    tibble::tibble(
306
+        interval = levels(cut(rel_pos, breaks = grid_size)),
307
+        binned_pos = seq(-1.1/3, 1 + 1.1/3, length.out = grid_size+ 2)[2:(1 + grid_size)]
308
+    )
309
+}