R/sgedgesByGene-methods.R
6c1f4100
 ### =========================================================================
942a5908
 ### "sgedgesByGene" (and related) methods
6c1f4100
 ### -------------------------------------------------------------------------
 
 
9cd225cb
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 ### intronsByTranscript()
 ###
 
66886f49
 setMethod("intronsByTranscript", "SplicingGraphs", function(x) x@in_by_tx)
c6222c7f
 
9cd225cb
 
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 ### sgedgesByTranscript()
 ###
 
 setGeneric("sgedgesByTranscript", signature="x",
1dab2c6e
     function(x, with.exon.mcols=FALSE, with.hits.mcols=FALSE)
9cd225cb
         standardGeneric("sgedgesByTranscript")
6c1f4100
 )
 
 setMethod("sgedgesByTranscript", "SplicingGraphs",
1dab2c6e
     function(x, with.exon.mcols=FALSE, with.hits.mcols=FALSE)
6c1f4100
     {
1dab2c6e
         if (!isTRUEorFALSE(with.exon.mcols))
             stop("'with.exon.mcols' must be TRUE or FALSE")
         if (!isTRUEorFALSE(with.hits.mcols))
             stop("'with.hits.mcols' must be TRUE or FALSE")
9cd225cb
 
6c1f4100
         ex_by_tx <- unlist(x)
         ex_partitioning <- PartitioningByEnd(ex_by_tx)
         gene_ids <- names(ex_partitioning)
         in_by_tx <- intronsByTranscript(x)
         in_partitioning <- PartitioningByEnd(in_by_tx)
         stopifnot(identical(gene_ids, names(in_partitioning)))
 
         ## Compute 'ans_partitioning'.
         nex_by_tx <- width(ex_partitioning)
         nin_by_tx <- width(in_partitioning)
         stopifnot(identical(nin_by_tx + 1L, nex_by_tx))
         ans_partitioning <- PartitioningByEnd(end(ex_partitioning) +
                                               end(in_partitioning),
                                               names=gene_ids)
 
c6222c7f
         ## Add missing metadata cols to 'in_unlistData'.
6c1f4100
         ex_unlistData <- ex_by_tx@unlistData
         ex_unlistData_len <- length(ex_unlistData)
7341eb9a
         ex_unlistData_mcols <- mcols(ex_unlistData)
43e1ae98
         check_exon_mcolnames(colnames(ex_unlistData_mcols))
9cd225cb
 
6c1f4100
         in_unlistData <- in_by_tx@unlistData
         in_unlistData_len <- length(in_unlistData)
7341eb9a
         in_unlistData_mcols <- mcols(in_unlistData)
 
6c1f4100
         in_missing_mcols <- DataFrame(exon_id=NA_integer_,
                                       exon_name=NA_character_,
                                       exon_rank=NA_integer_,
                                       start_SSid=NA_integer_,
                                       end_SSid=NA_integer_)
         idx <- rep.int(1L, in_unlistData_len)
         in_missing_mcols <- in_missing_mcols[idx, , drop=FALSE]
7341eb9a
         in_unlistData_mcols <- cbind(in_missing_mcols, in_unlistData_mcols)
6c1f4100
 
7341eb9a
         ## Make "from" and "to" metadata cols.
c6222c7f
         from <- as.character(ex_unlistData_mcols$start_SSid)
         to <- as.character(ex_unlistData_mcols$end_SSid)
         idx <- which(strand(ex_unlistData) == "-")
         tmp <- from[idx]
         from[idx] <- to[idx]
         to[idx] <- tmp
7341eb9a
         ex_prepend_mcols <- DataFrame(from=from, to=to)
c6222c7f
 
         from <- to <- rep.int(NA_character_, in_unlistData_len)
7341eb9a
         in_prepend_mcols <- DataFrame(from=from, to=to)
 
         ## Make "ex_or_in" and "tx_id" metadata cols.
         ex_or_in <- rep.int(factor("ex", levels=EX_OR_IN_LEVELS),
                             ex_unlistData_len)
         ex_prepend_mcols$ex_or_in <- ex_or_in
         ex_or_in <- rep.int(factor("in", levels=EX_OR_IN_LEVELS),
                             in_unlistData_len)
         in_prepend_mcols$ex_or_in <- ex_or_in
c6222c7f
 
         tx_id <- mcols(ex_by_tx)[ , "tx_id"]
7341eb9a
         ex_prepend_mcols$tx_id <- rep.int(tx_id, nex_by_tx)
         in_prepend_mcols$tx_id <- rep.int(tx_id, nin_by_tx)
 
         mcols(ex_unlistData) <- cbind(ex_prepend_mcols, ex_unlistData_mcols)
         mcols(in_unlistData) <- cbind(in_prepend_mcols, in_unlistData_mcols)
c6222c7f
 
         ## Compute 'ans_unlistData'. We need to reorder 'c(ex_unlistData,
7341eb9a
         ## in_unlistData)' to bring introns between their flanking exons.
c6222c7f
         ans_unlistData <- c(ex_unlistData, in_unlistData)
6c1f4100
         seq0 <- seq_along(ex_partitioning)
         roidx <- integer(ex_unlistData_len + in_unlistData_len)
         seq1 <- seq_len(ex_unlistData_len)
         idx1 <- seq1 * 2L - rep.int(seq0, nex_by_tx)
         roidx[idx1] <- seq1
         seq2 <- seq_len(in_unlistData_len)
         idx2 <- seq2 * 2L + rep.int(seq0, nin_by_tx) - 1L
         roidx[idx2] <- seq2 + ex_unlistData_len
         ans_unlistData <- ans_unlistData[roidx]
 
c6222c7f
         ## Fill gaps in "from" and "to" metadata cols.
         ans_unlistData_mcols <- mcols(ans_unlistData)
         from <- ans_unlistData_mcols$from
         to <- ans_unlistData_mcols$to
         introns_idx <- which(is.na(from))  # same as 'which(is.na(to))'
         from[introns_idx] <- to[introns_idx - 1L]
         to[introns_idx] <- from[introns_idx + 1L]
7341eb9a
         ans_unlistData_mcols$from <- from
         ans_unlistData_mcols$to <- to
c6222c7f
 
7341eb9a
         ## Sanity check: exons must be flanking introns.
c6222c7f
         ans_unlistData_start <- start(ans_unlistData)
         ans_unlistData_end <- end(ans_unlistData)
         ans_unlistData_strand <- strand(ans_unlistData)
 
         plus_idx <- which(ans_unlistData_strand == "+")
         plus_introns_idx <- intersect(introns_idx, plus_idx)
         stopifnot(identical(ans_unlistData_start[plus_introns_idx] - 1L,
                             ans_unlistData_end[plus_introns_idx - 1L]))
         stopifnot(identical(ans_unlistData_end[plus_introns_idx] + 1L,
                             ans_unlistData_start[plus_introns_idx + 1L]))
 
         minus_idx <- which(ans_unlistData_strand == "-")
         minus_introns_idx <- intersect(introns_idx, minus_idx)
         stopifnot(identical(ans_unlistData_start[minus_introns_idx] - 1L,
                             ans_unlistData_end[minus_introns_idx + 1L]))
         stopifnot(identical(ans_unlistData_end[minus_introns_idx] + 1L,
                             ans_unlistData_start[minus_introns_idx - 1L]))
 
e3f9cd93
         ## Insert "sgedge_id" metadata col after first 2 metadata cols
         ## ("from" and "to").
         sgedge_id <- make_global_sgedge_id(
                          rep.int(gene_ids, width(ans_partitioning)),
                          from, to)
         ans_unlistData_mcols <- c(ans_unlistData_mcols[1:2],
                                   DataFrame(sgedge_id=sgedge_id),
                                   ans_unlistData_mcols[-(1:2)])
43e1ae98
         check_all_edge_mcolnames(colnames(ans_unlistData_mcols))
9cd225cb
 
         ## Drop unwanted columns.
43e1ae98
         mcol_idx <- get_index_of_group_of_mcols(
9cd225cb
                         colnames(ans_unlistData_mcols),
1dab2c6e
                         with.exon.mcols, with.hits.mcols)
9cd225cb
         if (length(mcol_idx) != 0L)
             ans_unlistData_mcols <- ans_unlistData_mcols[ , -mcol_idx,
                                                          drop=FALSE]
         mcols(ans_unlistData) <- ans_unlistData_mcols
 
         ## Relist 'ans_unlistData' and return.
         ans <- relist(ans_unlistData, ans_partitioning)
         ans
     }
 )
 
 
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 ### sgedgesByGene()
 ###
 
 setGeneric("sgedgesByGene", signature="x",
942a5908
     function(x, with.exon.mcols=FALSE, with.hits.mcols=FALSE,
                 keep.dup.edges=FALSE)
9cd225cb
         standardGeneric("sgedgesByGene")
 )
 
 setMethod("sgedgesByGene", "SplicingGraphs",
942a5908
     function(x, with.exon.mcols=FALSE, with.hits.mcols=FALSE,
                 keep.dup.edges=FALSE)
9cd225cb
     {
942a5908
         if (!isTRUEorFALSE(keep.dup.edges))
             stop("'keep.dup.edges' must be TRUE or FALSE")
1dab2c6e
         edges_by_tx <- sgedgesByTranscript(x, with.exon.mcols=with.exon.mcols,
                                               with.hits.mcols=with.hits.mcols)
9cd225cb
         edges0 <- unlist(edges_by_tx)
         edges0_mcols <- mcols(edges0)
         edges0_mcolnames <- colnames(edges0_mcols)
 
         sgedge_id <- edges0_mcols[ , "sgedge_id"]
         sm <- match(sgedge_id, sgedge_id)
 
         ## Sanity checks.
         stopifnot(all(edges0 == edges0[sm]))
43e1ae98
         invar_mcol_idx <- get_index_of_invariant_edge_mcols(edges0_mcolnames)
9cd225cb
         stopifnot(identical(
942a5908
                     edges0_mcols[ , invar_mcol_idx, drop=FALSE],
                     edges0_mcols[sm , invar_mcol_idx, drop=FALSE]))
9cd225cb
 
         ## Compute 'ans_partitioning'.
942a5908
         ans_unlistData <- edges0
         if (!keep.dup.edges) {
             keep_idx <- which(sm == seq_along(sm))
             ans_unlistData <- ans_unlistData[keep_idx]
         }
9cd225cb
         ans_grouping <- Rle(names(ans_unlistData))
         ans_eltlens <- runLength(ans_grouping)
         ans_partitioning <- PartitioningByEnd(cumsum(ans_eltlens),
                                               names=runValue(ans_grouping))
 
         ## Compute 'ans_unlistData'.
         names(ans_unlistData) <- NULL
942a5908
         if (!keep.dup.edges) {
             ans_unlistData_mcols <- mcols(ans_unlistData)
             var_mcol_idx <- seq_along(edges0_mcolnames)[- invar_mcol_idx]
             f <- factor(sgedge_id, levels=sgedge_id[keep_idx])
             for (i in var_mcol_idx) {
                 old_col <- edges0_mcols[ , i]
43e1ae98
                 new_col <- unname(unique(unlistAndSplit(old_col, f)))
942a5908
                 ans_unlistData_mcols[ , i] <- new_col
             }
             mcols(ans_unlistData) <- ans_unlistData_mcols
9cd225cb
         }
7341eb9a
 
6c1f4100
         ## Relist 'ans_unlistData' and return.
         ans <- relist(ans_unlistData, ans_partitioning)
         ans
     }
 )