Bioconductor Code: SplicingGraphs

Raw Blame Patch Log History
### =========================================================================
### Functions for counting compatible hits per transcript, and for assigning
### compatible hits per exon or per intron
### -------------------------------------------------------------------------


### 'query': a named GRangesList object containing gapped reads.
### 'subject': a GRangesList object containing some subfeature (e.g. exons
###     or introns) grouped by their parent feature (e.g. transcripts).
### 'hits': a Hits object compatible with 'query' and 'subject'.
### Returns 'subject' with additional inner metadata col "hits"
### (CharacterList) reporting the hits for each subfeature.
### TODO: Current implementation is messy and inefficient. There must be
### a better way...
assignSubfeatureHits <- function(query, subject, hits, ignore.strand=FALSE)
{
    if (!is(query, "GRangesList"))
        stop("'query' must be a GRangesList object")
    query_names <- names(query)
    if (is.null(query_names))
        stop("'query' must have names")
    if (anyDuplicated(query_names))
        stop("'query' has duplicated names")
    if (!is(subject, "GRangesList"))
        stop("'subject' must be a GRangesList object")
    if (!is(hits, "Hits"))
        stop("'hits' must be a Hits object")
    if (queryLength(hits) != length(query)
     || subjectLength(hits) !=  length(subject))
        stop("'hits' is not compatible with 'query' and 'subject'")
    if (!isTRUEorFALSE(ignore.strand))
        stop("'ignore.strand' must be TRUE or FALSE")

    subject_eltlens <- elementLengths(subject)  # nb of subfeatures per subject
    s_hits <- subjectHits(hits)
    tx1 <- subject[s_hits]
    tx1_eltlens <- subject_eltlens[s_hits]
    ex11 <- unlist(tx1, use.names=FALSE)

    q_hits <- queryHits(hits)
    q_hits11 <- rep.int(q_hits, tx1_eltlens)
    gr11 <- unlist(range(query), use.names=FALSE)[q_hits11]

    if (ignore.strand)
        strand(gr11) <- strand(ex11) <- "*"

    cmp11 <- compare(gr11, ex11)
    is_hit11 <- -4L <= cmp11 & cmp11 <= 4L
    ex11_hit <- ifelse(is_hit11, query_names[q_hits11], NA_character_)
    mcols(tx1@unlistData)$hit <- ex11_hit

    unq_s_hits <- unique(s_hits)
    subfeature_hits <- lapply(unq_s_hits,
                         function(i) {
                           nsubfeatures <- subject_eltlens[i]
                           hits <- splitAsList(
                                     mcols(tx1[s_hits == i]@unlistData)$hit,
                                     seq_len(nsubfeatures))
                           if (length(hits) != 0L)
                               hits <- hits[!is.na(hits)]
                           hits
                         })

    mcols(subject@unlistData)$hits <- CharacterList(character(0))
    mcols(subject[unq_s_hits]@unlistData)$hits <- do.call(c, subfeature_hits)
    subject
}