Browse code

Added span argument and sample agg plots

shians authored on 31/07/2020 12:32:53
Showing 1 changed files

... ...
@@ -1,13 +1,20 @@
1 1
 #' Plot aggregate regions
2 2
 #'
3
-#' @param x the NanoMethResults object.
3
+#' @param x the NanoMethResult object.
4 4
 #' @param regions a table of regions or GRanges, or a list of such objects. The table of regions must contain chr, start and end columns.
5 5
 #' @param groups if 'features' is a list, a vector of characters of the same length as the list containing names for each member.
6 6
 #' @param flank the number of flanking bases to add to each side of each region.
7
+#' @param span the span for loess smoothing.
7 8
 #'
8 9
 #' @return
9 10
 #' @export
10
-plot_aggregate_regions <- function(x, regions, groups = NULL, flank = 2000, stranded = TRUE) {
11
+plot_agg_regions <- function(
12
+    x,
13
+    regions,
14
+    groups = NULL,
15
+    flank = 2000,
16
+    stranded = TRUE
17
+) {
11 18
     is_df_or_granges <- function(x) {
12 19
         is.data.frame(x) || is(x, "GRanges")
13 20
     }
... ...
@@ -63,7 +70,7 @@ plot_aggregate_regions <- function(x, regions, groups = NULL, flank = 2000, stra
63 70
         ggplot2::theme_minimal()
64 71
 
65 72
     # take binned means
66
-    grid_size <- 2^10
73
+    grid_size <- 2^12
67 74
     binned_pos <- seq(-1.1/3, 1 + 1.1/3, length.out = grid_size+ 2)[2:(1 + grid_size)]
68 75
     methy_data <- methy_data %>%
69 76
         mutate(interval = cut(rel_pos, breaks = grid_size)) %>%
... ...
@@ -72,11 +79,9 @@ plot_aggregate_regions <- function(x, regions, groups = NULL, flank = 2000, stra
72 79
         ungroup() %>%
73 80
         mutate(rel_pos = binned_pos)
74 81
 
75
-    span <- 0.10
76
-
77 82
     if (!is.null(groups)) {
78 83
         p <- p +
79
-            .geom_smooth(methy_data, sspan = span, group = TRUE) +
84
+            .agg_geom_smooth(methy_data, span = span, group = TRUE) +
80 85
             ggplot2::scale_colour_brewer(palette = "Set1") +
81 86
             ggplot2::coord_cartesian(clip = "off")
82 87
     } else {
... ...
@@ -100,12 +105,114 @@ plot_aggregate_regions <- function(x, regions, groups = NULL, flank = 2000, stra
100 105
         ggplot2::ylab("Average Methylation Probability")
101 106
 }
102 107
 
108
+#' Plot aggregate regions with grouped samples
109
+#'
110
+#' @param x the NanoMethResult object.
111
+#' @param regions a table of regions or GRanges.
112
+#' @param flank the number of flanking bases to add to each side of each region.
113
+#' @param span the span for loess smoothing.
114
+#'
115
+#' @return
116
+#' @export
117
+plot_agg_regions_sample_grouped <- function(
118
+    x,
119
+    regions,
120
+    flank = 2000,
121
+    stranded = TRUE,
122
+    span = 0.05
123
+) {
124
+    is_df_or_granges <- function(x) {
125
+        is.data.frame(x) || is(x, "GRanges")
126
+    }
127
+
128
+    assert_that(is_df_or_granges(regions) || is.list(regions))
129
+
130
+    # convert GRanges to tibble
131
+    if (is(regions, "GRanges")) {
132
+        regions <- tibble::as_tibble(regions) %>%
133
+            dplyr::rename(chr = "seqnames")
134
+    }
135
+
136
+    # convert data.frame regions to single element list
137
+    if (!is.null(dim(regions))) {
138
+        regions <- list(regions)
139
+    }
140
+
141
+    # query methylation data
142
+    methy_data <- purrr::map(
143
+        regions,
144
+        function(features) {
145
+            .get_features_with_rel_pos(
146
+                    features,
147
+                    methy = methy(x),
148
+                    flank = flank,
149
+                    stranded = stranded,
150
+                    by_sample = TRUE) %>%
151
+                dplyr::bind_rows()
152
+        }
153
+    )
154
+
155
+    methy_data <- methy_data %>%
156
+        dplyr::bind_rows()
157
+
158
+    methy_data <- methy_data %>%
159
+        dplyr::group_by(sample, .data$rel_pos) %>%
160
+        dplyr::summarise(methy_prob = mean(.data$methy_prob)) %>%
161
+        dplyr::ungroup()
162
+
163
+    # set up plot
164
+    p <- ggplot2::ggplot() +
165
+        ggplot2::ylim(c(0, 1)) +
166
+        ggplot2::theme_minimal()
167
+
168
+    # take binned means
169
+    grid_size <- 2^12
170
+    binned_pos_df <- tibble::tibble(
171
+        interval = levels(cut(methy_data$rel_pos, breaks = grid_size)),
172
+        binned_pos = seq(-1.1/3, 1 + 1.1/3, length.out = grid_size+ 2)[2:(1 + grid_size)]
173
+    )
174
+
175
+    methy_data <- methy_data %>%
176
+        mutate(interval = cut(rel_pos, breaks = grid_size)) %>%
177
+        group_by(sample, interval) %>%
178
+        summarise(methy_prob = mean(methy_prob)) %>%
179
+        ungroup() %>%
180
+        left_join(binned_pos_df, by = "interval") %>%
181
+        mutate(rel_pos = binned_pos) %>%
182
+        left_join(samples(x), by = "sample")
183
+
184
+    p <- p +
185
+        ggplot2::stat_smooth(
186
+            aes(x = .data$rel_pos, y = .data$methy_prob, group = haplotype, col = haplotype),
187
+            method = "loess",
188
+            span = span,
189
+            na.rm = TRUE,
190
+            se = FALSE,
191
+            data = methy_data) +
192
+        ggplot2::scale_colour_brewer(palette = "Set1") +
193
+        ggplot2::coord_cartesian(clip = "off")
194
+
195
+    kb_marker <- round(flank / 1000, 1)
196
+
197
+    labels <- c(glue::glue("-{kb_marker}kb"), "start", "end", glue::glue("+{kb_marker}kb"))
198
+
199
+    p +
200
+        geom_vline(xintercept = 0, linetype = "dashed", color = "grey80") +
201
+        geom_vline(xintercept = 1, linetype = "dashed", color = "grey80") +
202
+        ggplot2::scale_x_continuous(
203
+            name = "Relative Position",
204
+            breaks = c(-.33, 0, 1, 1.33),
205
+            limits = c(-0.33, 1.33),
206
+            labels = labels) +
207
+        ggplot2::ylab("Average Methylation Probability")
208
+}
209
+
103 210
 .split_rows <- function(x) {
104 211
     split(x, 1:nrow(x))
105 212
 }
106 213
 
107 214
 #' @importFrom purrr map2
108
-.get_features_with_rel_pos <- function(features, methy, flank, stranded, feature_ids = NULL) {
215
+.get_features_with_rel_pos <- function(features, methy, flank, stranded, by_sample = FALSE, feature_ids = NULL) {
109 216
     .scale_to_feature <- function(x, feature, stranded = stranded) {
110 217
         within <- x >= feature$start & x <= feature$end
111 218
         upstream <- x < feature$start
... ...
@@ -128,6 +235,10 @@ plot_aggregate_regions <- function(x, regions, groups = NULL, flank = 2000, stra
128 235
         features$end + flank * 1.10,
129 236
         simplify = FALSE)
130 237
 
238
+    empty <- purrr::map_lgl(out, function(x) nrow(x) == 0)
239
+    out <- out[!empty]
240
+    features <- features[!empty, ]
241
+
131 242
     out <- purrr::map2(out, .split_rows(features), function(x, y) {
132 243
         if (nrow(x) == 0) {
133 244
             return(tibble::add_column(x, rel_pos = numeric()))
... ...
@@ -135,10 +246,17 @@ plot_aggregate_regions <- function(x, regions, groups = NULL, flank = 2000, stra
135 246
 
136 247
         x$rel_pos <- .scale_to_feature(x$pos, y, stranded = stranded)
137 248
 
138
-        x %>%
139
-            group_by(rel_pos) %>%
140
-            summarise(methy_prob = mean(e1071::sigmoid(statistic))) %>%
141
-            ungroup()
249
+        if (by_sample) {
250
+            x %>%
251
+                group_by(sample, rel_pos) %>%
252
+                summarise(methy_prob = mean(e1071::sigmoid(statistic))) %>%
253
+                ungroup()
254
+        } else {
255
+            x %>%
256
+                group_by(rel_pos) %>%
257
+                summarise(methy_prob = mean(e1071::sigmoid(statistic))) %>%
258
+                ungroup()
259
+        }
142 260
     })
143 261
 
144 262
     if (!is.null(feature_ids)) {
... ...
@@ -149,7 +267,7 @@ plot_aggregate_regions <- function(x, regions, groups = NULL, flank = 2000, stra
149 267
     out
150 268
 }
151 269
 
152
-.geom_smooth <- function(data, span, group) {
270
+.agg_geom_smooth <- function(data, span, group) {
153 271
     if (group) {
154 272
         ggplot2::stat_smooth(
155 273
             aes(x = .data$rel_pos,