...
|
...
|
@@ -14,15 +14,18 @@ plot_aggregate_regions <- function(x, regions, groups = NULL, flank = 2000, stra
|
14
|
14
|
|
15
|
15
|
assert_that(is_df_or_granges(regions) || is.list(regions))
|
16
|
16
|
|
|
17
|
+ # convert GRanges to tibble
|
17
|
18
|
if (is(regions, "GRanges")) {
|
18
|
19
|
regions <- tibble::as_tibble(regions) %>%
|
19
|
20
|
dplyr::rename(chr = "seqnames")
|
20
|
21
|
}
|
21
|
22
|
|
|
23
|
+ # convert data.frame regions to single element list
|
22
|
24
|
if (!is.null(dim(regions))) {
|
23
|
25
|
regions <- list(regions)
|
24
|
26
|
}
|
25
|
27
|
|
|
28
|
+ # query methylation data
|
26
|
29
|
methy_data <- purrr::map(
|
27
|
30
|
regions,
|
28
|
31
|
function(features) {
|
...
|
...
|
@@ -42,43 +45,43 @@ plot_aggregate_regions <- function(x, regions, groups = NULL, flank = 2000, stra
|
42
|
45
|
methy_data <- methy_data %>%
|
43
|
46
|
dplyr::bind_rows(.id = "group")
|
44
|
47
|
|
45
|
|
- methy_data <- methy_data %>%
|
|
48
|
+ if (!is.null(groups)) {
|
|
49
|
+ methy_data <- methy_data %>%
|
46
|
50
|
dplyr::group_by(.data$group, .data$rel_pos) %>%
|
47
|
|
- dplyr::summarise(statistic = mean(.data$statistic)) %>%
|
48
|
|
- dplyr::mutate(methy_prob = e1071::sigmoid(statistic)) %>%
|
|
51
|
+ dplyr::summarise(methy_prob = mean(.data$methy_prob)) %>%
|
49
|
52
|
dplyr::ungroup()
|
50
|
|
-
|
51
|
|
- methy_data_before <- filter(methy_data, rel_pos <= 0)
|
52
|
|
- if (nrow(methy_data_before) > 10000) {
|
53
|
|
- methy_data_before <- dplyr::sample_n(methy_data_before, 10000)
|
54
|
|
- }
|
55
|
|
-
|
56
|
|
- methy_data_after <- filter(methy_data, rel_pos >= 1)
|
57
|
|
- if (nrow(methy_data_after) > 10000) {
|
58
|
|
- methy_data_after <- dplyr::sample_n(methy_data_after, 10000)
|
59
|
|
- }
|
60
|
|
-
|
61
|
|
- methy_data <- filter(methy_data, between(rel_pos, 0, 1))
|
62
|
|
- if (nrow(methy_data) > 10000) {
|
63
|
|
- methy_data <- dplyr::sample_n(methy_data, 10000)
|
|
53
|
+ } else {
|
|
54
|
+ methy_data <- methy_data %>%
|
|
55
|
+ dplyr::group_by(.data$rel_pos) %>%
|
|
56
|
+ dplyr::summarise(methy_prob = mean(.data$methy_prob)) %>%
|
|
57
|
+ dplyr::ungroup()
|
64
|
58
|
}
|
65
|
59
|
|
|
60
|
+ # set up plot
|
66
|
61
|
p <- ggplot2::ggplot() +
|
67
|
62
|
ggplot2::ylim(c(0, 1)) +
|
68
|
63
|
ggplot2::theme_minimal()
|
69
|
64
|
|
|
65
|
+ # take binned means
|
|
66
|
+ grid_size <- 2^10
|
|
67
|
+ binned_pos <- seq(-1.1/3, 1 + 1.1/3, length.out = grid_size+ 2)[2:(1 + grid_size)]
|
|
68
|
+ methy_data <- methy_data %>%
|
|
69
|
+ mutate(interval = cut(rel_pos, breaks = grid_size)) %>%
|
|
70
|
+ group_by(interval, .drop = TRUE) %>%
|
|
71
|
+ summarise(methy_prob = mean(methy_prob)) %>%
|
|
72
|
+ ungroup() %>%
|
|
73
|
+ mutate(rel_pos = binned_pos)
|
|
74
|
+
|
|
75
|
+ span <- 0.10
|
|
76
|
+
|
70
|
77
|
if (!is.null(groups)) {
|
71
|
78
|
p <- p +
|
72
|
|
- .geom_smooth(methy_data, span = 0.3, group = TRUE) +
|
73
|
|
- .geom_smooth(methy_data_before, span = 0.35, group = TRUE) +
|
74
|
|
- .geom_smooth(methy_data_after, span = 0.35, group = TRUE) +
|
|
79
|
+ .geom_smooth(methy_data, sspan = span, group = TRUE) +
|
75
|
80
|
ggplot2::scale_colour_brewer(palette = "Set1") +
|
76
|
81
|
ggplot2::coord_cartesian(clip = "off")
|
77
|
82
|
} else {
|
78
|
83
|
p <- p +
|
79
|
|
- .geom_smooth(methy_data, span = 0.3, group = FALSE) +
|
80
|
|
- .geom_smooth(methy_data_before, span = 0.35, group = FALSE) +
|
81
|
|
- .geom_smooth(methy_data_after, span = 0.35, group = FALSE) +
|
|
84
|
+ .geom_smooth(methy_data, span = span, group = FALSE) +
|
82
|
85
|
ggplot2::coord_cartesian(clip = "off")
|
83
|
86
|
}
|
84
|
87
|
|
...
|
...
|
@@ -91,8 +94,8 @@ plot_aggregate_regions <- function(x, regions, groups = NULL, flank = 2000, stra
|
91
|
94
|
geom_vline(xintercept = 1, linetype = "dashed", color = "grey80") +
|
92
|
95
|
ggplot2::scale_x_continuous(
|
93
|
96
|
name = "Relative Position",
|
94
|
|
- breaks = c(-.25, 0, 1, 1.25),
|
95
|
|
- limits = c(-0.25, 1.25),
|
|
97
|
+ breaks = c(-.33, 0, 1, 1.33),
|
|
98
|
+ limits = c(-0.33, 1.33),
|
96
|
99
|
labels = labels) +
|
97
|
100
|
ggplot2::ylab("Average Methylation Probability")
|
98
|
101
|
}
|
...
|
...
|
@@ -110,8 +113,8 @@ plot_aggregate_regions <- function(x, regions, groups = NULL, flank = 2000, stra
|
110
|
113
|
|
111
|
114
|
out <- numeric(length(x))
|
112
|
115
|
out[within] <- (x[within] - feature$start) / (feature$end - feature$start)
|
113
|
|
- out[upstream] <- (x[upstream] - feature$start) / flank / 4
|
114
|
|
- out[downstream] <- 1 + ((x[downstream] - feature$end) / flank / 4)
|
|
116
|
+ out[upstream] <- (x[upstream] - feature$start) / flank / 3
|
|
117
|
+ out[downstream] <- 1 + ((x[downstream] - feature$end) / flank / 3)
|
115
|
118
|
if (stranded && length(feature$strand) > 0 && feature$strand == "-") {
|
116
|
119
|
out <- 1 - out
|
117
|
120
|
}
|
...
|
...
|
@@ -121,8 +124,8 @@ plot_aggregate_regions <- function(x, regions, groups = NULL, flank = 2000, stra
|
121
|
124
|
out <- query_methy(
|
122
|
125
|
methy,
|
123
|
126
|
features$chr,
|
124
|
|
- features$start - flank * 1.1,
|
125
|
|
- features$end + flank * 1.1,
|
|
127
|
+ features$start - flank * 1.10,
|
|
128
|
+ features$end + flank * 1.10,
|
126
|
129
|
simplify = FALSE)
|
127
|
130
|
|
128
|
131
|
out <- purrr::map2(out, .split_rows(features), function(x, y) {
|
...
|
...
|
@@ -133,8 +136,8 @@ plot_aggregate_regions <- function(x, regions, groups = NULL, flank = 2000, stra
|
133
|
136
|
x$rel_pos <- .scale_to_feature(x$pos, y, stranded = stranded)
|
134
|
137
|
|
135
|
138
|
x %>%
|
136
|
|
- group_by(chr, rel_pos) %>%
|
137
|
|
- summarise(statistic = mean(statistic)) %>%
|
|
139
|
+ group_by(rel_pos) %>%
|
|
140
|
+ summarise(methy_prob = mean(e1071::sigmoid(statistic))) %>%
|
138
|
141
|
ungroup()
|
139
|
142
|
})
|
140
|
143
|
|