plot_gene_annotation <- function(exons_df, plot_start, plot_end) { # helper functions ---- .exons <- function(exons_df) { ggplot2::geom_rect( ggplot2::aes( xmin = .data$start, xmax = .data$end, ymin = .data$y_offset + 0.05, ymax = .data$y_offset + 0.55 ), data = exons_df, fill = "#696969", colour = "black" ) } .connector_arrows <- function(gaps) { list( # first half with arrow ggplot2::geom_segment( ggplot2::aes( x = .data$start, xend = (.data$start + .data$end)/2, y = .data$y_offset + 0.275, yend = .data$y_offset + 0.275 ), lineend = "butt", linejoin = "mitre", data = gaps, arrow = grid::arrow( type = "open", length = ggplot2::unit(5, "points") ) ), # second half without arrow ggplot2::geom_segment( ggplot2::aes( x = (.data$start + .data$end)/2, xend = .data$end, y = .data$y_offset + 0.275, yend = .data$y_offset + 0.275 ), lineend = "butt", linejoin = "mitre", data = gaps ) ) } .connector_lines <- function(gaps) { ggplot2::geom_segment( ggplot2::aes( x = .data$end, xend = .data$start, y = .data$y_offset + 0.275, yend = .data$y_offset + 0.275 ), data = gaps ) } .gene_labels <- function(gene_labels) { gene_labels$symbol[gene_labels$strand == "+"] <- paste( gene_labels$symbol[gene_labels$strand == "+"], ">" ) gene_labels$symbol[gene_labels$strand == "-"] <- paste( "<", gene_labels$symbol[gene_labels$strand == "-"] ) ggplot2::geom_text( aes(x = .data$label_pos, y = .data$y_offset + 0.8, label = .data$symbol), data = gene_labels, hjust = "center", size = ggplot2::rel(2.5) ) } .filter_regions <- function(exons_df, plot_start, plot_end) { transcripts <- exons_df %>% dplyr::summarise( .by = "transcript_id", start = min(.data$start), end = max(.data$end) ) transcripts <- transcripts %>% dplyr::filter( .data$start <= plot_end & .data$end >= plot_start ) exons_df %>% dplyr::filter( .data$transcript_id %in% transcripts$transcript_id ) } .truncate_region <- function(x, plot_start, plot_end, strand) { if (strand == "-") { x <- x %>% dplyr::filter(.data$end <= plot_end, .data$start >= plot_start) x$end[x$end < plot_start] <- plot_start x$start[x$start > plot_end] <- plot_end } else { x <- x %>% dplyr::filter(.data$start <= plot_end, .data$end >= plot_start) x$start[x$start < plot_start] <- plot_start x$end[x$end > plot_end] <- plot_end } x } .get_gaps <- function(gaps, strand = c("+", "-", "*")) { strand <- match.arg(strand) gaps <- gaps %>% split(gaps$strand) if (!is.null(gaps[[strand]])) { out <- gaps[[strand]] if (strand == "-") { temp <- out$gap_start out$gap_start <- out$gap_end out$gap_end <- temp } } else { out <- tibble::tibble( uid = character(0), y_offset = numeric(0), strand = character(0), gap_start = integer(0), gap_end = integer(0) ) } dplyr::rename( out, start = "gap_start", end = "gap_end" ) } # function body ---- if (nrow(exons_df) == 0) { p <- ggplot() + theme_void() attr(p, "plot_height") <- 0 return(p) } # remove transcripts outside plot area exons_df <- .filter_regions(exons_df, plot_start, plot_end) exons_df <- exons_df %>% dplyr::mutate( uid = factor(paste(.data$gene_id, .data$transcript_id, sep = ".")), y_offset = as.integer(.data$uid) - 1 ) exons_count <- exons_df %>% dplyr::group_by(.data$uid) %>% dplyr::summarise(exons = dplyr::n()) gap <- exons_df %>% dplyr::inner_join(exons_count, by = c("uid"), multiple = "all") %>% dplyr::filter(.data$exons > 1) %>% dplyr::group_by("transcript_id") %>% dplyr::arrange(.data$start) %>% dplyr::ungroup() if (nrow(gap) > 0) { gap <- gap %>% dplyr::group_by(.data$uid, .data$strand, .data$y_offset) %>% dplyr::summarise( gap_start = list(.data$end[-length(.data$end)]), gap_end = list(.data$start[-1]) ) %>% tidyr::unnest(cols = c("gap_start", "gap_end")) } else { gap <- tibble::tibble( uid = character(), y_offset = numeric(), strand = character(), gap_start = numeric(), gap_end = numeric() ) } gap_pos <- .get_gaps(gap, "+") gap_neg <- .get_gaps(gap, "-") gap_none <- .get_gaps(gap, "*") region_width <- plot_end - plot_start gene_labels <- exons_df %>% dplyr::group_by(.data$gene_id, .data$symbol, .data$transcript_id, .data$y_offset, .data$strand) %>% dplyr::summarise( gene_middle = (min(.data$start) + max(.data$end)) / 2, label_pos = .data$gene_middle, label_pos = pmin(.data$label_pos, plot_end - region_width * 0.05), label_pos = pmax(.data$label_pos, plot_start + region_width * 0.05) ) exons_df <- .truncate_region(exons_df, plot_start, plot_end, "*") gap_pos <- .truncate_region(gap_pos, plot_start, plot_end, "+") gap_neg <- .truncate_region(gap_neg, plot_start, plot_end, "-") gap_none <- .truncate_region(gap_none, plot_start, plot_end, "*") gene_labels <- gene_labels %>% dplyr::filter( dplyr::between(.data$label_pos, plot_start, plot_end) ) if (length(exons_df$y_offset) > 0) { plot_height <- 1 + max(exons_df$y_offset) } else { plot_height <- 0 } p <- ggplot2::ggplot() + ggplot2::theme_void() + .connector_arrows(gap_pos) + .connector_arrows(gap_neg) + .connector_lines(gap_none) + .exons(exons_df) + .gene_labels(gene_labels) + ggplot2::ylim(0, plot_height) attr(p, "plot_height") <- plot_height p }