Browse code

Implemented binary thresholding for plotting functions

Shians authored on 13/08/2021 06:50:42
Showing 12 changed files

... ...
@@ -1,7 +1,7 @@
1 1
 Package: NanoMethViz
2 2
 Type: Package
3 3
 Title: Visualise methlation data from Oxford Nanopore sequencing
4
-Version: 1.99.3
4
+Version: 1.99.4
5 5
 Authors@R: c(
6 6
     person("Shian", "Su", email = "[email protected]", role = c("cre", "aut")))
7 7
 Description: NanoMethViz is a toolkit for visualising methylation data from 
... ...
@@ -36,7 +36,7 @@ Imports:
36 36
     GenomicRanges,
37 37
     ggthemes,
38 38
     glue,
39
-    limma,
39
+    limma (>= 3.44.0),
40 40
     patchwork,
41 41
     purrr,
42 42
     rlang,
... ...
@@ -3,6 +3,9 @@
3 3
 #' @param x the NanoMethResult object.
4 4
 #' @param regions a table of regions containing at least columns chr, strand,
5 5
 #'   start and end. Any additiona columns can be used for grouping.
6
+#' @param binary_threshold the modification probability such that calls with
7
+#'   modification probability above the threshold are considered methylated, and
8
+#'   those with probability equal or below are considered unmethylated.
6 9
 #' @param group_col the column to group aggregated trends by. This column can
7 10
 #'   be in from the regions table or samples(x).
8 11
 #' @param flank the number of flanking bases to add to each side of each region.
... ...
@@ -24,6 +27,7 @@
24 27
 plot_agg_regions <- function(
25 28
     x,
26 29
     regions,
30
+    binary_threshold = 0.5,
27 31
     group_col = NULL,
28 32
     flank = 2000,
29 33
     stranded = TRUE,
... ...
@@ -41,7 +45,7 @@ plot_agg_regions <- function(
41 45
     # query methylation data
42 46
     methy_data <- .get_methy_data(x, regions, flank)
43 47
     methy_data <- .scale_methy_data(methy_data, stranded, flank)
44
-    methy_data <- .pos_avg(methy_data)
48
+    methy_data <- .pos_avg(methy_data, binary_threshold = binary_threshold)
45 49
     # unnest and annotate after position averaging to reduce data burden
46 50
     methy_data <- .unnest_anno(methy_data, NanoMethViz::samples(x))
47 51
     methy_data <- .bin_avg(methy_data, group_col = group_col)
... ...
@@ -132,12 +136,12 @@ plot_agg_regions <- function(
132 136
     methy_data
133 137
 }
134 138
 
135
-.pos_avg <- function(x) {
139
+.pos_avg <- function(x, binary_threshold = 0.5) {
136 140
     average_by_pos <- function(x) {
137 141
           x %>%
138 142
               dplyr::group_by(.data$sample, .data$chr, .data$strand, .data$rel_pos) %>%
139 143
               dplyr::summarise(
140
-                  methy_prop = mean(.data$statistic > 0),
144
+                  methy_prop = mean(e1071::sigmoid(.data$statistic) > binary_threshold),
141 145
                   .groups = "drop")
142 146
       }
143 147
 
... ...
@@ -22,6 +22,9 @@ setGeneric("plot_gene", function(x, gene, ...) {
22 22
 #'   values for left and right window size. Values indicate proportion of gene
23 23
 #'   length.
24 24
 #' @param anno_regions the data.frame of regions to annotate.
25
+#' @param binary_threshold the modification probability such that calls with
26
+#'   modification probability above the threshold are set to 1 and probabilities
27
+#'   equalt to or below the threshold are set to 0.
25 28
 #' @param spaghetti whether or not individual reads should be shown.
26 29
 #' @param span the span for loess smoothing.
27 30
 #' @param gene_anno whether or not gene annotation tracks are plotted.
... ...
@@ -40,6 +43,7 @@ setMethod("plot_gene", signature(x = "NanoMethResult", gene = "character"),
40 43
         gene,
41 44
         window_prop = 0.3,
42 45
         anno_regions = NULL,
46
+        binary_threshold = NULL,
43 47
         spaghetti = FALSE,
44 48
         span = NULL,
45 49
         gene_anno = TRUE
... ...
@@ -49,6 +53,7 @@ setMethod("plot_gene", signature(x = "NanoMethResult", gene = "character"),
49 53
             gene,
50 54
             window_prop = window_prop,
51 55
             anno_regions = anno_regions,
56
+            binary_threshold = binary_threshold,
52 57
             spaghetti = spaghetti,
53 58
             span = span,
54 59
             gene_anno = gene_anno
... ...
@@ -61,6 +66,7 @@ setMethod("plot_gene", signature(x = "NanoMethResult", gene = "character"),
61 66
     gene,
62 67
     window_prop,
63 68
     anno_regions,
69
+    binary_threshold,
64 70
     spaghetti,
65 71
     span,
66 72
     gene_anno
... ...
@@ -91,6 +97,7 @@ setMethod("plot_gene", signature(x = "NanoMethResult", gene = "character"),
91 97
             window_prop = window_prop,
92 98
             sample_anno = sample_anno,
93 99
             anno_regions = anno_regions,
100
+            binary_threshold = binary_threshold,
94 101
             spaghetti = spaghetti,
95 102
             span = span
96 103
         )
... ...
@@ -14,6 +14,7 @@ plot_grange <- function(
14 14
     x,
15 15
     grange,
16 16
     anno_regions = NULL,
17
+    binary_threshold = NULL,
17 18
     spaghetti = FALSE,
18 19
     span = NULL,
19 20
     window_prop = 0.3
... ...
@@ -34,6 +35,7 @@ plot_grange <- function(
34 35
         start = start,
35 36
         end = end,
36 37
         anno_regions = anno_regions,
38
+        binary_threshold = binary_threshold,
37 39
         spaghetti = spaghetti,
38 40
         span = span,
39 41
         window_prop = window_prop
... ...
@@ -38,16 +38,25 @@ plot_mds <- function(x, top = 500, plot_dims = c(1, 2), labels = colnames(x), gr
38 38
     var_exp1 <- round(100 * mds_res$var.explained[plot_dims[1]])
39 39
     var_exp2 <- round(100 * mds_res$var.explained[plot_dims[2]])
40 40
 
41
-    plot_data <- data.frame(
42
-        dim1 = mds_res$eigen.vectors[, plot_dims[1]],
43
-        dim2 = mds_res$eigen.vectors[, plot_dims[2]]
44
-    )
41
+    if ("eigen.vectors" %in% names(mds_res)) {
42
+        plot_data <- data.frame(
43
+            dim1 = mds_res$eigen.vectors[, plot_dims[1]],
44
+            dim2 = mds_res$eigen.vectors[, plot_dims[2]]
45
+        )
46
+        xlabel <- glue::glue("Leading logFC Dim {plot_dims[1]} ({var_exp1}%)")
47
+        ylabel <- glue::glue("Leading logFC Dim {plot_dims[2]} ({var_exp2}%)")
48
+    } else {
49
+        plot_data <- data.frame(
50
+            dim1 = mds_res$cmdscale.out[, plot_dims[1]],
51
+            dim2 = mds_res$cmdscale.out[, plot_dims[2]]
52
+        )
53
+        xlabel <- glue::glue("Leading logFC Dim {plot_dims[1]}")
54
+        ylabel <- glue::glue("Leading logFC Dim {plot_dims[2]}")
55
+    }
45 56
 
46 57
     plot_data$labels <- labels
47 58
     plot_data$groups <- groups
48 59
 
49
-    xlabel <- glue::glue("Leading logFC Dim {plot_dims[1]} ({var_exp1}%)")
50
-    ylabel <- glue::glue("Leading logFC Dim {plot_dims[2]} ({var_exp2}%)")
51 60
 
52 61
     if (!is.null(groups)) {
53 62
         p <- ggplot2::ggplot(plot_data, ggplot2::aes_string(x = "dim1", y = "dim2", col = "groups"))
... ...
@@ -8,6 +8,7 @@ plot_methylation_internal <- function(
8 8
     title,
9 9
     palette_col = ggplot2::scale_colour_brewer(palette = "Set1"),
10 10
     anno_regions = NULL,
11
+    binary_threshold = NULL,
11 12
     spaghetti = FALSE,
12 13
     span = NULL
13 14
 ) {
... ...
@@ -26,11 +27,21 @@ plot_methylation_internal <- function(
26 27
     }
27 28
 
28 29
     # extract group information and convert probabilities
29
-    plot_data <- methy_data %>%
30
-        dplyr::inner_join(sample_anno, by = "sample") %>%
31
-        dplyr::mutate(
32
-            mod_prob = e1071::sigmoid(.data$statistic)
33
-        )
30
+    if (is.null(binary_threshold)) {
31
+        plot_data <- methy_data %>%
32
+            dplyr::inner_join(sample_anno, by = "sample") %>%
33
+            dplyr::mutate(
34
+                mod_prob = e1071::sigmoid(.data$statistic)
35
+            )
36
+    } else {
37
+        plot_data <- methy_data %>%
38
+            dplyr::inner_join(sample_anno, by = "sample") %>%
39
+            dplyr::mutate(
40
+                mod_prob = as.numeric(
41
+                    e1071::sigmoid(.data$statistic) > binary_threshold
42
+                )
43
+            )
44
+    }
34 45
 
35 46
     # set up plot
36 47
     p <- ggplot(plot_data, aes(x = .data$pos, col = .data$group))
... ...
@@ -99,7 +110,8 @@ plot_feature <- function(
99 110
     sample_anno,
100 111
     anno_regions = NULL,
101 112
     window_prop = c(0.3, 0.3),
102
-    spaghetti = TRUE,
113
+    binary_threshold = NULL,
114
+    spaghetti = FALSE,
103 115
     span = NULL
104 116
 ) {
105 117
     chr <- feature$chr
... ...
@@ -135,6 +147,7 @@ plot_feature <- function(
135 147
         xlim = xlim,
136 148
         title = title,
137 149
         anno_regions = anno_regions,
150
+        binary_threshold = binary_threshold,
138 151
         spaghetti = spaghetti,
139 152
         sample_anno = sample_anno,
140 153
         span = span
... ...
@@ -21,6 +21,9 @@ setGeneric("plot_region", function(x, chr, start, end, ...) {
21 21
 #' @rdname plot_region
22 22
 #'
23 23
 #' @param anno_regions the data.frame of regions to be annotated.
24
+#' @param binary_threshold the modification probability such that calls with
25
+#'   modification probability above the threshold are set to 1 and probabilities
26
+#'   equalt to or below the threshold are set to 0.
24 27
 #' @param spaghetti whether or not individual reads should be shown.
25 28
 #' @param span the span for loess smoothing.
26 29
 #' @param window_prop the size of flanking region to plot. Can be a vector of two
... ...
@@ -47,6 +50,7 @@ setMethod("plot_region",
47 50
         start,
48 51
         end,
49 52
         anno_regions = NULL,
53
+        binary_threshold = NULL,
50 54
         spaghetti = FALSE,
51 55
         span = NULL,
52 56
         window_prop = 0
... ...
@@ -57,6 +61,7 @@ setMethod("plot_region",
57 61
             start = start,
58 62
             end = end,
59 63
             anno_regions = anno_regions,
64
+            binary_threshold = binary_threshold,
60 65
             spaghetti = spaghetti,
61 66
             span = span,
62 67
             window_prop = window_prop
... ...
@@ -80,6 +85,7 @@ setMethod("plot_region",
80 85
         start,
81 86
         end,
82 87
         anno_regions = NULL,
88
+        binary_threshold = NULL,
83 89
         spaghetti = FALSE,
84 90
         span = NULL,
85 91
         window_prop = 0
... ...
@@ -91,6 +97,7 @@ setMethod("plot_region",
91 97
             start = start,
92 98
             end = end,
93 99
             anno_regions = anno_regions,
100
+            binary_threshold = binary_threshold,
94 101
             spaghetti = spaghetti,
95 102
             span = span,
96 103
             window_prop = window_prop
... ...
@@ -105,6 +112,7 @@ setMethod("plot_region",
105 112
     start,
106 113
     end,
107 114
     anno_regions = NULL,
115
+    binary_threshold = NULL,
108 116
     spaghetti = FALSE,
109 117
     span = NULL,
110 118
     window_prop = 0
... ...
@@ -150,6 +158,7 @@ setMethod("plot_region",
150 158
         xlim = xlim,
151 159
         title = title,
152 160
         anno_regions = anno_regions,
161
+        binary_threshold = binary_threshold,
153 162
         spaghetti = spaghetti,
154 163
         sample_anno = sample_anno,
155 164
         span = span
... ...
@@ -7,6 +7,7 @@
7 7
 plot_agg_regions(
8 8
   x,
9 9
   regions,
10
+  binary_threshold = 0.5,
10 11
   group_col = NULL,
11 12
   flank = 2000,
12 13
   stranded = TRUE,
... ...
@@ -20,6 +21,10 @@ plot_agg_regions(
20 21
 \item{regions}{a table of regions containing at least columns chr, strand,
21 22
 start and end. Any additiona columns can be used for grouping.}
22 23
 
24
+\item{binary_threshold}{the modification probability such that calls with
25
+modification probability above the threshold are considered methylated, and
26
+those with probability equal or below are considered unmethylated.}
27
+
23 28
 \item{group_col}{the column to group aggregated trends by. This column can
24 29
 be in from the regions table or samples(x).}
25 30
 
... ...
@@ -12,6 +12,7 @@ plot_gene(x, gene, ...)
12 12
   gene,
13 13
   window_prop = 0.3,
14 14
   anno_regions = NULL,
15
+  binary_threshold = NULL,
15 16
   spaghetti = FALSE,
16 17
   span = NULL,
17 18
   gene_anno = TRUE
... ...
@@ -30,6 +31,10 @@ length.}
30 31
 
31 32
 \item{anno_regions}{the data.frame of regions to annotate.}
32 33
 
34
+\item{binary_threshold}{the modification probability such that calls with
35
+modification probability above the threshold are set to 1 and probabilities
36
+equalt to or below the threshold are set to 0.}
37
+
33 38
 \item{spaghetti}{whether or not individual reads should be shown.}
34 39
 
35 40
 \item{span}{the span for loess smoothing.}
... ...
@@ -8,6 +8,7 @@ plot_grange(
8 8
   x,
9 9
   grange,
10 10
   anno_regions = NULL,
11
+  binary_threshold = NULL,
11 12
   spaghetti = FALSE,
12 13
   span = NULL,
13 14
   window_prop = 0.3
... ...
@@ -20,6 +21,10 @@ plot_grange(
20 21
 
21 22
 \item{anno_regions}{the data.frame of regions to be annotated.}
22 23
 
24
+\item{binary_threshold}{the modification probability such that calls with
25
+modification probability above the threshold are set to 1 and probabilities
26
+equalt to or below the threshold are set to 0.}
27
+
23 28
 \item{spaghetti}{whether or not individual reads should be shown.}
24 29
 
25 30
 \item{span}{the span for loess smoothing.}
... ...
@@ -14,6 +14,7 @@ plot_region(x, chr, start, end, ...)
14 14
   start,
15 15
   end,
16 16
   anno_regions = NULL,
17
+  binary_threshold = NULL,
17 18
   spaghetti = FALSE,
18 19
   span = NULL,
19 20
   window_prop = 0
... ...
@@ -25,6 +26,7 @@ plot_region(x, chr, start, end, ...)
25 26
   start,
26 27
   end,
27 28
   anno_regions = NULL,
29
+  binary_threshold = NULL,
28 30
   spaghetti = FALSE,
29 31
   span = NULL,
30 32
   window_prop = 0
... ...
@@ -43,6 +45,10 @@ plot_region(x, chr, start, end, ...)
43 45
 
44 46
 \item{anno_regions}{the data.frame of regions to be annotated.}
45 47
 
48
+\item{binary_threshold}{the modification probability such that calls with
49
+modification probability above the threshold are set to 1 and probabilities
50
+equalt to or below the threshold are set to 0.}
51
+
46 52
 \item{spaghetti}{whether or not individual reads should be shown.}
47 53
 
48 54
 \item{span}{the span for loess smoothing.}
... ...
@@ -5,6 +5,11 @@
5 5
 
6 6
 using namespace Rcpp;
7 7
 
8
+#ifdef RCPP_USE_GLOBAL_ROSTREAM
9
+Rcpp::Rostream<true>&  Rcpp::Rcout = Rcpp::Rcpp_cout_get();
10
+Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
11
+#endif
12
+
8 13
 // convert_methy_to_dss_cpp
9 14
 std::vector<std::string> convert_methy_to_dss_cpp(std::string input, std::string output_dir);
10 15
 RcppExport SEXP _NanoMethViz_convert_methy_to_dss_cpp(SEXP inputSEXP, SEXP output_dirSEXP) {