Browse code

plot_region no longer reserves space for transcripts not visible in plotting area

Shians authored on 18/08/2023 06:48:42
Showing 2 changed files

... ...
@@ -1,7 +1,7 @@
1 1
 Package: NanoMethViz
2 2
 Type: Package
3 3
 Title: Visualise methlation data from Oxford Nanopore sequencing
4
-Version: 2.7.6
4
+Version: 2.7.7
5 5
 Authors@R: c(
6 6
     person("Shian", "Su", email = "[email protected]", role = c("cre", "aut")))
7 7
 Description: NanoMethViz is a toolkit for visualising methylation data from 
... ...
@@ -1,84 +1,5 @@
1 1
 plot_gene_annotation <- function(exons_df, plot_start, plot_end) {
2
-    if (nrow(exons_df) == 0) {
3
-        p <- ggplot() + theme_void()
4
-        attr(p, "plot_height") <- 0
5
-        return(p)
6
-    }
7
-
8
-    exons_df <- exons_df %>%
9
-        dplyr::mutate(
10
-            uid = factor(paste(.data$gene_id, .data$transcript_id, sep = ".")),
11
-            y_offset = as.integer(.data$uid) - 1
12
-        )
13
-
14
-    exons_count <- exons_df %>%
15
-        dplyr::group_by(.data$uid, .data$y_offset) %>%
16
-        dplyr::summarise(exons = dplyr::n())
17
-
18
-    gap <- exons_df %>%
19
-        dplyr::inner_join(exons_count, by = c("uid", "y_offset"), multiple = "all") %>%
20
-        dplyr::filter(.data$exons > 1) %>%
21
-        dplyr::group_by("transcript_id") %>%
22
-        dplyr::arrange(.data$start) %>%
23
-        dplyr::ungroup()
24
-
25
-    if (nrow(gap) > 0) {
26
-        gap <- gap %>%
27
-            dplyr::group_by(.data$uid, .data$y_offset, .data$strand) %>%
28
-            dplyr::summarise(
29
-                gap_start = list(.data$end[-length(.data$end)]),
30
-                gap_end = list(.data$start[-1])
31
-            ) %>%
32
-            tidyr::unnest(cols = c("gap_start", "gap_end"))
33
-    } else {
34
-        gap <- tibble::tibble(
35
-            uid = character(),
36
-            y_offset = numeric(),
37
-            strand = character(),
38
-            gap_start = numeric(),
39
-            gap_end = numeric()
40
-        )
41
-    }
42
-
43
-    .get_gaps <- function(gaps, strand = c("+", "-", "*")) {
44
-        strand <- match.arg(strand)
45
-
46
-        gaps <- gaps %>%
47
-            split(gaps$strand)
48
-
49
-        if (!is.null(gaps[[strand]])) {
50
-            out <- gaps[[strand]]
51
-
52
-            if (strand == "-") {
53
-                temp <- out$gap_start
54
-                out$gap_start <- out$gap_end
55
-                out$gap_end <- temp
56
-            }
57
-        } else {
58
-            out <- tibble::tibble(
59
-                uid = character(0),
60
-                y_offset = numeric(0),
61
-                strand = character(0),
62
-                gap_start = integer(0),
63
-                gap_end = integer(0)
64
-            )
65
-        }
66
-
67
-        dplyr::rename(
68
-            out,
69
-            start = "gap_start",
70
-            end = "gap_end"
71
-        )
72
-    }
73
-
74
-    gap_pos <- .get_gaps(gap, "+")
75
-    gap_neg <- .get_gaps(gap, "-")
76
-    gap_none <- .get_gaps(gap, "*")
77
-
78
-    gene_labels <- exons_df %>%
79
-        dplyr::group_by(.data$gene_id, .data$symbol, .data$transcript_id, .data$y_offset, .data$strand) %>%
80
-        dplyr::summarise(gene_middle = (min(.data$start) + max(.data$end)) / 2)
81
-
2
+    # helper functions ----
82 3
     .exons <- function(exons_df) {
83 4
         ggplot2::geom_rect(
84 5
             ggplot2::aes(
... ...
@@ -157,6 +78,25 @@ plot_gene_annotation <- function(exons_df, plot_start, plot_end) {
157 78
         )
158 79
     }
159 80
 
81
+    .filter_regions <- function(exons_df, plot_start, plot_end) {
82
+        transcripts <- exons_df %>%
83
+            dplyr::summarise(
84
+                .by = .data$transcript_id,
85
+                start = min(.data$start),
86
+                end = max(.data$end)
87
+            )
88
+
89
+        transcripts <- transcripts %>%
90
+            dplyr::filter(
91
+                .data$start <= plot_end & .data$end >= plot_start
92
+            )
93
+
94
+        exons_df %>%
95
+            dplyr::filter(
96
+                .data$transcript_id %in% transcripts$transcript_id
97
+            )
98
+    }
99
+
160 100
     .truncate_region <- function(x, plot_start, plot_end, strand) {
161 101
         if (strand == "-") {
162 102
             x <- x %>%
... ...
@@ -173,7 +113,92 @@ plot_gene_annotation <- function(exons_df, plot_start, plot_end) {
173 113
         x
174 114
     }
175 115
 
116
+    .get_gaps <- function(gaps, strand = c("+", "-", "*")) {
117
+        strand <- match.arg(strand)
118
+
119
+        gaps <- gaps %>%
120
+            split(gaps$strand)
121
+
122
+        if (!is.null(gaps[[strand]])) {
123
+            out <- gaps[[strand]]
124
+
125
+            if (strand == "-") {
126
+                temp <- out$gap_start
127
+                out$gap_start <- out$gap_end
128
+                out$gap_end <- temp
129
+            }
130
+        } else {
131
+            out <- tibble::tibble(
132
+                uid = character(0),
133
+                y_offset = numeric(0),
134
+                strand = character(0),
135
+                gap_start = integer(0),
136
+                gap_end = integer(0)
137
+            )
138
+        }
139
+
140
+        dplyr::rename(
141
+            out,
142
+            start = "gap_start",
143
+            end = "gap_end"
144
+        )
145
+    }
146
+
147
+    # function body ----
148
+    if (nrow(exons_df) == 0) {
149
+        p <- ggplot() + theme_void()
150
+        attr(p, "plot_height") <- 0
151
+        return(p)
152
+    }
153
+
154
+    # remove transcripts outside plot area
155
+    exons_df <- .filter_regions(exons_df, plot_start, plot_end)
156
+
157
+    exons_df <- exons_df %>%
158
+        dplyr::mutate(
159
+            uid = factor(paste(.data$gene_id, .data$transcript_id, sep = ".")),
160
+            y_offset = as.integer(.data$uid) - 1
161
+        )
162
+
163
+    exons_count <- exons_df %>%
164
+        dplyr::group_by(.data$uid) %>%
165
+        dplyr::summarise(exons = dplyr::n())
166
+
167
+    gap <- exons_df %>%
168
+        dplyr::inner_join(exons_count, by = c("uid"), multiple = "all") %>%
169
+        dplyr::filter(.data$exons > 1) %>%
170
+        dplyr::group_by("transcript_id") %>%
171
+        dplyr::arrange(.data$start) %>%
172
+        dplyr::ungroup()
173
+
174
+    if (nrow(gap) > 0) {
175
+        gap <- gap %>%
176
+            dplyr::group_by(.data$uid, .data$strand, .data$y_offset) %>%
177
+            dplyr::summarise(
178
+                gap_start = list(.data$end[-length(.data$end)]),
179
+                gap_end = list(.data$start[-1])
180
+            ) %>%
181
+            tidyr::unnest(cols = c("gap_start", "gap_end"))
182
+    } else {
183
+        gap <- tibble::tibble(
184
+            uid = character(),
185
+            y_offset = numeric(),
186
+            strand = character(),
187
+            gap_start = numeric(),
188
+            gap_end = numeric()
189
+        )
190
+    }
191
+
192
+    gap_pos <- .get_gaps(gap, "+")
193
+    gap_neg <- .get_gaps(gap, "-")
194
+    gap_none <- .get_gaps(gap, "*")
195
+
196
+    gene_labels <- exons_df %>%
197
+        dplyr::group_by(.data$gene_id, .data$symbol, .data$transcript_id, .data$y_offset, .data$strand) %>%
198
+        dplyr::summarise(gene_middle = (min(.data$start) + max(.data$end)) / 2)
199
+
176 200
     exons_df <- .truncate_region(exons_df, plot_start, plot_end, "*")
201
+
177 202
     gap_pos <- .truncate_region(gap_pos, plot_start, plot_end, "+")
178 203
     gap_neg <- .truncate_region(gap_neg, plot_start, plot_end, "-")
179 204
     gap_none <- .truncate_region(gap_none, plot_start, plot_end, "*")