R/plot_gene_annotation.R
53ac984f
 plot_gene_annotation <- function(exons_df, plot_start, plot_end) {
c1b1c7b3
     # helper functions ----
7aa2a454
     .exons <- function(exons_df) {
         ggplot2::geom_rect(
             ggplot2::aes(
ffbf02cf
                 xmin = .data$start,
                 xmax = .data$end,
                 ymin = .data$y_offset + 0.05,
                 ymax = .data$y_offset + 0.55
7aa2a454
             ),
             data = exons_df,
bf865a5a
             fill = "#696969",
             colour = "black"
7aa2a454
         )
     }
 
     .connector_arrows <- function(gaps) {
8cbe8942
         list(
             # first half with arrow
             ggplot2::geom_segment(
                 ggplot2::aes(
                     x = .data$start,
                     xend = (.data$start + .data$end)/2,
                     y = .data$y_offset + 0.275,
                     yend = .data$y_offset + 0.275
                 ),
                 lineend = "butt",
                 linejoin = "mitre",
                 data = gaps,
                 arrow = grid::arrow(
                     type = "open",
                     length = ggplot2::unit(5, "points")
                 )
53ac984f
             ),
8cbe8942
             # second half without arrow
             ggplot2::geom_segment(
                 ggplot2::aes(
                     x = (.data$start + .data$end)/2,
                     xend = .data$end,
                     y = .data$y_offset + 0.275,
                     yend = .data$y_offset + 0.275
                 ),
                 lineend = "butt",
                 linejoin = "mitre",
                 data = gaps
53ac984f
             )
7aa2a454
         )
     }
 
     .connector_lines <- function(gaps) {
53ac984f
         ggplot2::geom_segment(
             ggplot2::aes(
c5e99e08
                 x = .data$end,
                 xend = .data$start,
ffbf02cf
                 y = .data$y_offset + 0.275,
                 yend = .data$y_offset + 0.275
53ac984f
             ),
7aa2a454
             data = gaps
         )
     }
 
c5e99e08
     .gene_labels <- function(gene_labels) {
         gene_labels$symbol[gene_labels$strand == "+"] <- paste(
             gene_labels$symbol[gene_labels$strand == "+"],
e94ac713
             ">"
d3bded2d
         )
 
c5e99e08
         gene_labels$symbol[gene_labels$strand == "-"] <- paste(
e94ac713
             "<",
c5e99e08
             gene_labels$symbol[gene_labels$strand == "-"]
d3bded2d
         )
 
7aa2a454
         ggplot2::geom_text(
158ba1d4
             aes(x = .data$label_pos, y = .data$y_offset + 0.8, label = .data$symbol),
c5e99e08
             data = gene_labels,
7aa2a454
             hjust = "center",
d92421a1
             size = ggplot2::rel(2.5)
7aa2a454
         )
     }
 
c1b1c7b3
     .filter_regions <- function(exons_df, plot_start, plot_end) {
         transcripts <- exons_df %>%
             dplyr::summarise(
f3668239
                 .by = "transcript_id",
c1b1c7b3
                 start = min(.data$start),
                 end = max(.data$end)
             )
 
         transcripts <- transcripts %>%
             dplyr::filter(
                 .data$start <= plot_end & .data$end >= plot_start
             )
 
         exons_df %>%
             dplyr::filter(
                 .data$transcript_id %in% transcripts$transcript_id
             )
     }
 
c5e99e08
     .truncate_region <- function(x, plot_start, plot_end, strand) {
         if (strand == "-") {
             x <- x %>%
e94ac713
                 dplyr::filter(.data$end <= plot_end, .data$start >= plot_start)
c5e99e08
             x$end[x$end < plot_start] <- plot_start
             x$start[x$start > plot_end] <- plot_end
         } else {
             x <- x %>%
e94ac713
                 dplyr::filter(.data$start <= plot_end, .data$end >= plot_start)
c5e99e08
             x$start[x$start < plot_start] <- plot_start
             x$end[x$end > plot_end] <- plot_end
         }
 
         x
     }
 
c1b1c7b3
     .get_gaps <- function(gaps, strand = c("+", "-", "*")) {
         strand <- match.arg(strand)
 
         gaps <- gaps %>%
             split(gaps$strand)
 
         if (!is.null(gaps[[strand]])) {
             out <- gaps[[strand]]
 
             if (strand == "-") {
                 temp <- out$gap_start
                 out$gap_start <- out$gap_end
                 out$gap_end <- temp
             }
         } else {
             out <- tibble::tibble(
                 uid = character(0),
                 y_offset = numeric(0),
                 strand = character(0),
                 gap_start = integer(0),
                 gap_end = integer(0)
             )
         }
 
         dplyr::rename(
             out,
             start = "gap_start",
             end = "gap_end"
         )
     }
 
     # function body ----
     if (nrow(exons_df) == 0) {
         p <- ggplot() + theme_void()
         attr(p, "plot_height") <- 0
         return(p)
     }
 
     # remove transcripts outside plot area
     exons_df <- .filter_regions(exons_df, plot_start, plot_end)
 
     exons_df <- exons_df %>%
         dplyr::mutate(
             uid = factor(paste(.data$gene_id, .data$transcript_id, sep = ".")),
             y_offset = as.integer(.data$uid) - 1
         )
 
     exons_count <- exons_df %>%
         dplyr::group_by(.data$uid) %>%
         dplyr::summarise(exons = dplyr::n())
 
     gap <- exons_df %>%
         dplyr::inner_join(exons_count, by = c("uid"), multiple = "all") %>%
         dplyr::filter(.data$exons > 1) %>%
         dplyr::group_by("transcript_id") %>%
         dplyr::arrange(.data$start) %>%
         dplyr::ungroup()
 
     if (nrow(gap) > 0) {
         gap <- gap %>%
             dplyr::group_by(.data$uid, .data$strand, .data$y_offset) %>%
             dplyr::summarise(
                 gap_start = list(.data$end[-length(.data$end)]),
                 gap_end = list(.data$start[-1])
             ) %>%
             tidyr::unnest(cols = c("gap_start", "gap_end"))
     } else {
         gap <- tibble::tibble(
             uid = character(),
             y_offset = numeric(),
             strand = character(),
             gap_start = numeric(),
             gap_end = numeric()
         )
     }
 
     gap_pos <- .get_gaps(gap, "+")
     gap_neg <- .get_gaps(gap, "-")
     gap_none <- .get_gaps(gap, "*")
 
158ba1d4
     region_width <- plot_end - plot_start
c1b1c7b3
     gene_labels <- exons_df %>%
         dplyr::group_by(.data$gene_id, .data$symbol, .data$transcript_id, .data$y_offset, .data$strand) %>%
158ba1d4
         dplyr::summarise(
             gene_middle = (min(.data$start) + max(.data$end)) / 2,
19dc3f0c
             label_pos = .data$gene_middle,
             label_pos = pmin(.data$label_pos, plot_end - region_width * 0.05),
             label_pos = pmax(.data$label_pos, plot_start +  region_width * 0.05)
158ba1d4
         )
c1b1c7b3
 
c5e99e08
     exons_df <- .truncate_region(exons_df, plot_start, plot_end, "*")
c1b1c7b3
 
c5e99e08
     gap_pos <- .truncate_region(gap_pos, plot_start, plot_end, "+")
     gap_neg <- .truncate_region(gap_neg, plot_start, plot_end, "-")
     gap_none <- .truncate_region(gap_none, plot_start, plot_end, "*")
     gene_labels <- gene_labels %>%
         dplyr::filter(
158ba1d4
             dplyr::between(.data$label_pos, plot_start, plot_end)
c5e99e08
         )
 
c7faa0fa
     if (length(exons_df$y_offset) > 0) {
         plot_height <- 1 + max(exons_df$y_offset)
     } else {
         plot_height <- 0
     }
 
     p <- ggplot2::ggplot() +
7aa2a454
         ggplot2::theme_void() +
         .connector_arrows(gap_pos) +
         .connector_arrows(gap_neg) +
         .connector_lines(gap_none) +
d100a89a
         .exons(exons_df) +
c5e99e08
         .gene_labels(gene_labels) +
c7faa0fa
         ggplot2::ylim(0, plot_height)
 
     attr(p, "plot_height") <- plot_height
 
     p
53ac984f
 }