R/SplicingGraphs-class.R
50d0360b
 ### =========================================================================
4f96879b
 ### SplicingGraphs objects
50d0360b
 ### -------------------------------------------------------------------------
 
5712a4d7
 
24537e93
 ### With the 2-class design we choose below, the SplicingGraphs class does
 ### NOT extend the CompressedList class, but has the slot ('genes') that
 ### extends the CompressedList class. This avoids having the SplicingGraphs
 ### class inherit a very rich API (Vector + List), which would be a problem
 ### because many operations (like c() or relist()) would be broken, unless
 ### we re-implement them for SplicingGraphs objects. But: (a) that's a lot of
 ### work (the API is huge), and (b) we don't need those operations in the
09cffc8a
 ### first place. All we need are: length(), names(), [, [[, elementNROWS(),
24537e93
 ### and unlist().
a48698ec
 ### The GeneModel class is an internal class that is not intended to be
 ### exposed to the user. It's a list-like class where the elements are
 ### GRangesList objects, each of them representing a gene. It's currently
 ### defined exactly how the GRangesListList class would be if we decided to
 ### have one in the GenomicRanges package.
69a8543c
 
a48698ec
 setClass("GeneModel",
77742862
     contains="CompressedList",
4f96879b
     representation(
77742862
         unlistData="GRangesList",
         elementMetadata="DataFrame"
     ),
     prototype(
         elementType="GRangesList"
4f96879b
     )
 )
 
69a8543c
 setClass("SplicingGraphs",
     representation(
a48698ec
         genes="GeneModel",
24537e93
         in_by_tx="GRangesList",
         .bubbles_cache="environment"
69a8543c
     )
 )
 
50d0360b
 
4f96879b
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
a48698ec
 ### GeneModel validity.
69a8543c
 ###
4f96879b
 
a48698ec
 .valid.GeneModel.names <- function(x)
77742862
 {
     x_names <- names(x)
     if (is.null(x_names)) {
         if (length(x) == 1L)
             return(NULL)
         return("'x' must have names")
     }
     if (anyDuplicated(x_names))
         return("'x' has duplicated names")
     NULL
 }
 
a48698ec
 .valid.GeneModel.ex_by_tx <- function(x)
77742862
 {
f66f7195
     ex_by_tx <- x@unlistData
     if (!is.null(names(ex_by_tx)))
77742862
         return("'x@unlistData' must be unnamed")
f66f7195
     ex_mcols <- mcols(ex_by_tx@unlistData)
     valid_exon_mcolnames(colnames(ex_mcols))
77742862
 }
4f96879b
 
a48698ec
 .valid.GeneModel <- function(x)
77742862
 {
a48698ec
     c(.valid.GeneModel.names(x),
       .valid.GeneModel.ex_by_tx(x))
77742862
 }
4f96879b
 
a48698ec
 setValidity2("GeneModel", .valid.GeneModel)
69a8543c
 
ddf79f2c
 
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
a48698ec
 ### GeneModel API.
ddf79f2c
 ###
937fc96c
 
 ### From the CompressedList API:
a48698ec
 ### GeneModel objects inherit the CompressedList API i.e. anything that works
 ### on a CompressedList object works on a GeneModel object. But the only
 ### things we're using/supporting in the SplicingGraphs package are: length(),
09cffc8a
 ### names(), [, [[, elementNROWS(), and unlist().
937fc96c
 
 ### From the GRanges API:
66886f49
 ### We only need seqnames(), strand(), and seqinfo() from the GRanges API.
937fc96c
 
a48698ec
 setMethod("seqnames", "GeneModel",
937fc96c
     function(x)
     {
         x_unlisted <- unlist(x)
         unlisted_seqnames <- commonSeqnames.GRangesList(x_unlisted)
         relisted_seqnames <- relist(unlisted_seqnames, x)
         ans <- commonSeqnames.RleList(relisted_seqnames)
         names(ans) <- names(x)
         ans
     }
 )
 
a48698ec
 setMethod("strand", "GeneModel",
937fc96c
     function(x)
     {
         x_unlisted <- unlist(x)
         unlisted_strand <- commonStrand.GRangesList(x_unlisted)
         relisted_strand <- relist(unlisted_strand, x)
         ans <- commonStrand.RleList(relisted_strand)
         names(ans) <- names(x)
         ans
     }
 )
ddf79f2c
 
a48698ec
 setMethod("seqinfo", "GeneModel",
66886f49
     function(x) seqinfo(unlist(x, use.names=FALSE))
 )
 
a3a942cd
 ### The only GeneModel setter supported at the moment.
 setReplaceMethod("isCircular", "GeneModel",
     function(x, value)
     {
         isCircular(x@unlistData) <- value
         x
     }
 )
 
ddf79f2c
 
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
a48698ec
 ### GeneModel() constructor.
ddf79f2c
 ###
 ### Used only in this file.
 ###
 
a48698ec
 ### 'ex_by_tx' must be a *named* GRangesList object containing exons grouped
 ### by transcript. The names must be the gene ids, NOT the transcript ids or
 ### transcript names.
 GeneModel <- function(ex_by_tx)
ddf79f2c
 {
     ex_by_tx_names <- names(ex_by_tx)
     if (is.null(ex_by_tx_names)) {
         ans_partitioning <- PartitioningByEnd(length(ex_by_tx))
     } else {
         names(ex_by_tx) <- NULL
         ans_end <- end(Rle(ex_by_tx_names))
         ans_names <- ex_by_tx_names[ans_end]
         ans_partitioning <- PartitioningByEnd(ans_end, names=ans_names)
     }
a48698ec
     IRanges:::newCompressedList0(getClass("GeneModel"),
ddf79f2c
                                  ex_by_tx, ans_partitioning)
 }
 
 
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 ### SplicingGraphs validity.
 ###
 ### A SplicingGraphs object 'x' should be validated with:
 ###
 ###   validObject(x, complete=TRUE)
 ###
 
808fe39e
 .valid.SplicingGraphs.in_by_tx <- function(x)
 {
     x_in_by_tx <- x@in_by_tx
ddf79f2c
     x_in_by_tx_names <- names(x_in_by_tx)
     if (is.null(x_in_by_tx_names))
         return("'x@in_by_tx' must have names")
     x_ex_by_tx <- unlist(x@genes)
     if (length(x_in_by_tx) != length(x_ex_by_tx))
         return("'x@in_by_tx' must have the same length as 'unlist(x@genes)'")
     if (!identical(x_in_by_tx_names, names(x_ex_by_tx)))
         return("'x@in_by_tx' must have the same names as 'unlist(x@genes)'")
09cffc8a
     if (!identical(elementNROWS(x_in_by_tx) + 1L,
                    elementNROWS(x_ex_by_tx))) {
808fe39e
         msg <- c("the shape of 'x@in_by_tx' is not compatible ",
ddf79f2c
                  "with the shape of 'unlist(x@genes)'")
808fe39e
         return(paste0(msg, collapse=""))
     }
     NULL
 }
 
 .valid.SplicingGraphs <- function(x)
 {
     c(.valid.SplicingGraphs.in_by_tx(x))
 }
 
 setValidity2("SplicingGraphs", .valid.SplicingGraphs)
 
69a8543c
 
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 ### Restricted SplicingGraphs API.
 ###
66886f49
 ### Here we only implement:
 ###   - a few core methods from the List API,
 ###   - seqnames(), strand(), seqinfo() from the GRanges API.
dd0bd6a7
 ###
69a8543c
 
 setMethod("length", "SplicingGraphs", function(x) length(x@genes))
 
 setMethod("names", "SplicingGraphs", function(x) names(x@genes))
 
 setMethod("[", "SplicingGraphs",
dd0bd6a7
     function(x, i, j, ..., drop=TRUE)
69a8543c
     {
         if (!missing(j) || length(list(...)) > 0L)
             stop("invalid subsetting")
         if (missing(i))
             return(x)
808fe39e
         if (is.null(names(x)))
             stop("subsetting a SplicingGraphs with no names is not supported")
ca31837e
         x_genes <- x@genes
0c406896
         seqx <- seq_along(x_genes)
         names(seqx) <- names(x_genes)
         i <- seqx[i]
ca31837e
         ii <- unlist(IRanges(PartitioningByEnd(x_genes))[i], use.names=FALSE)
0c406896
         x@genes <- x_genes[i]
ca31837e
         x@in_by_tx <- x@in_by_tx[ii]
69a8543c
         x
     }
 )
 
 setMethod("[[", "SplicingGraphs",
     function (x, i, j, ...)
     {
         if (!missing(j) || length(list(...)) > 0L)
             stop("invalid subsetting")
         x@genes[[i]]
     }
 )
 
09cffc8a
 setMethod("elementNROWS", "SplicingGraphs",
     function(x) elementNROWS(x@genes)
69a8543c
 )
 
 setMethod("unlist", "SplicingGraphs",
     function(x, recursive=TRUE, use.names=TRUE)
     {
         unlist(x@genes, recursive=recursive, use.names=use.names)
     }
 )
4f96879b
 
937fc96c
 setMethod("seqnames", "SplicingGraphs", function(x) seqnames(x@genes))
 
 setMethod("strand", "SplicingGraphs", function(x) strand(x@genes))
 
66886f49
 setMethod("seqinfo", "SplicingGraphs", function(x) seqinfo(x@genes))
dd0bd6a7
 
a3a942cd
 ### isCircular<- and txweight<- are the only SplicingGraphs setters supported
 ### at the moment.
 setReplaceMethod("isCircular", "SplicingGraphs",
     function(x, value)
     {
         isCircular(x@genes) <- isCircular(x@in_by_tx) <- value
         x
     }
 )
 
4f96879b
 
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 ### "show" method.
 ###
 
 setMethod("show", "SplicingGraphs",
     function(object)
     {
77742862
         ngene <- length(object)
         ntx <- length(unlist(object, use.names=FALSE))
         cat(class(object), " object with ", ngene, " gene(s) ",
4f96879b
             "and ", ntx, " transcript(s)\n", sep="")
     }
 )
 
 
50d0360b
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
4f96879b
 ### SplicingGraphs() constructor
50d0360b
 ###
 
 ### 'exons_start' and 'exons_end' must be 2 integer vectors of the same length
 ### N representing the start and end positions of N exons that belong to the
 ### same gene. Returns 2 integer vectors of length N containing the splicing
 ### site ids assigned to the start and end positions, respectively.
 .make_SSids <- function(exons_start, exons_end, on.minus.strand=FALSE)
 {
     if (!is.numeric(exons_start))
         stop("'exons_start' must be an integer vector")
     if (!is.integer(exons_start))
         exons_start <- as.integer(exons_start)
     if (!is.numeric(exons_end))
         stop("'exons_end' must be an integer vector")
     if (!is.integer(exons_end))
         exons_end <- as.integer(exons_end)
     N <- length(exons_start)
     if (length(exons_end) != N)
         stop("'exons_start' and 'exons_end' must have the same length")
     ## splicing sites
     ssites <- data.frame(pos=c(exons_start, exons_end),
                          type=rep.int(1:2, c(N, N)))
ab342602
     ssites_sm <- S4Vectors:::selfmatchIntegerPairs(ssites$pos, ssites$type)
50d0360b
     ## unique splicing sites
     ussites <- ssites[ssites_sm == seq_along(ssites_sm), , drop=FALSE]
ab342602
     oo <- S4Vectors:::orderIntegerPairs(ussites$pos, ussites$type,
                                         decreasing=on.minus.strand)
50d0360b
     ussites <- ussites[oo, , drop=FALSE]
ab342602
     SSid <- S4Vectors:::matchIntegerPairs(ssites$pos, ssites$type,
                                           ussites$pos, ussites$type)
50d0360b
     exons_start_SSid <- head(SSid, n=N)
     exons_end_SSid <- tail(SSid, n=N)
     list(start_SSid=exons_start_SSid, end_SSid=exons_end_SSid)
 }
 
cb383bfe
 ### 'gene' must be a GRangesList object containing the exons of a *single*
28ea1605
 ### gene grouped by transcript. More precisely, each top-level element
cb383bfe
 ### in 'gene' contains the genomic ranges of the exons for a particular
50d0360b
 ### transcript of the gene.
ddf79f2c
 ### Should be able to deal with a GRangesList object of length 0 (i.e. a
 ### gene with no transcripts).
cb383bfe
 .setSplicingGraphInfo <- function(gene, check.introns=TRUE)
50d0360b
 {
cb383bfe
     if (!is(gene, "GRangesList"))
         stop("'gene' must be a GRangesList object")
50d0360b
     if (!isTRUEorFALSE(check.introns))
         stop("'check.introns' must be TRUE or FALSE")
cb383bfe
     exons <- gene@unlistData
6c1f4100
     common_strand <- commonStrand.GRanges(exons, what="exons in the gene")
04621cd6
     on.minus.strand <- common_strand == "-"
     if (check.introns && length(exons) != 0L) {
         ## We check that, within each transcript, exons are ordered from
         ## 5' to 3' with gaps of at least 1 nucleotide between them.
         ranges_by_tx <- ranges(gene)
         if (on.minus.strand)
             ranges_by_tx <- revElements(ranges_by_tx)
         if (!all(isNormal(ranges_by_tx)))
             stop("some transcripts in the gene don't have their exons ",
                  "ordered from 5' to 3' with gaps of at least 1 ",
                  "nucleotide between them (introns)")
50d0360b
     }
 
     ## Set splicing site ids.
     SSids <- .make_SSids(start(exons), end(exons),
                          on.minus.strand=on.minus.strand)
     exons_mcols <- mcols(exons)
     if (any(names(SSids) %in% colnames(exons_mcols))) {
         in_1_string <- paste(names(SSids), collapse=", ")
cb383bfe
         stop("'unlist(gene)' already has metadata columns: ", in_1_string)
50d0360b
     }
     mcols(exons) <- cbind(mcols(exons), DataFrame(SSids))
cb383bfe
     gene@unlistData <- exons
     gene_mcols <- mcols(gene)
50d0360b
 
ddf79f2c
     ## Set "tx_id" metadata col.
cb383bfe
     if ("tx_id" %in% colnames(gene_mcols))
         stop("'gene' already has metadata column tx_id")
     tx_id <- names(gene)
50d0360b
     if (!is.null(tx_id))
cb383bfe
         gene_mcols$tx_id <- tx_id
50d0360b
 
7ff5c4a7
     ## Set "txpath" metadata col.
     if ("txpath" %in% colnames(gene_mcols))
         stop("'gene' already has metadata column txpath")
50d0360b
     if (on.minus.strand) {
7ff5c4a7
         txpath <- rbind(SSids$end_SSid, SSids$start_SSid)
50d0360b
     } else {
7ff5c4a7
         txpath <- rbind(SSids$start_SSid, SSids$end_SSid)
50d0360b
     }
7ff5c4a7
     txpath_partitioning_end <- end(PartitioningByEnd(gene)) * 2L
     txpath_partitioning <- PartitioningByEnd(txpath_partitioning_end)
     names(txpath_partitioning) <- tx_id
fc60d3aa
     txpath <- relist(as.vector(txpath), txpath_partitioning)
7ff5c4a7
     gene_mcols$txpath <- txpath
50d0360b
 
cb383bfe
     mcols(gene) <- gene_mcols
     gene
50d0360b
 }
 
cb383bfe
 ### 'x' must be a GRangesList object containing the exons of one or more
28ea1605
 ### genes grouped by transcript. More precisely, each top-level element
cb383bfe
 ### in 'x' contains the genomic ranges of the exons for a particular
be6c86ff
 ### transcript. Typically 'x' will be obtained from a TxDb object
50d0360b
 ### 'txdb' with 'exonsBy(txdb, by="tx")'.
 ### 'grouping' is an optional object that represents the grouping by gene of
cb383bfe
 ### the top-level elements (i.e. transcripts) in 'x'. It can be either:
 ###   (a) Missing (i.e. NULL). In that case, all the transcripts in 'x'
4f96879b
 ###       are considered to belong to the same gene and the SplicingGraphs
 ###       object returned by SplicingGraphs() will be unnamed.
50d0360b
 ###   (b) A list of integer or character vectors, or an IntegerList, or a
 ###       CharacterList object, of length the number of genes to process,
cb383bfe
 ###       and where 'grouping[[i]]' is a vector of valid subscripts in 'x'
c5b71e02
 ###       pointing to all the transcripts of the i-th gene.
cb383bfe
 ###   (c) A factor, character vector, or integer vector, of length 'x'
50d0360b
 ###       with 1 level per gene.
28ea1605
 ###   (d) A named GRangesList object containing transcripts grouped by gene
50d0360b
 ###       i.e. each top-level element in 'grouping' contains the genomic ranges
 ###       of the transcripts for a particular gene. In that case, the grouping
 ###       is inferred from the tx_id (or alternatively tx_name) metadata
 ###       column of 'unlist(grouping)' and all the values in that column must
cb383bfe
 ###       be in 'names(x)'.
 ###       If 'x' was obtained with 'exonsBy(txdb, by="tx")', then the
50d0360b
 ###       GRangesList object used for grouping would typically be obtained with
 ###       'transcriptsBy(txdb, by="gene")'.
 ###   (e) A data.frame or DataFrame with 2 character vector columns: a
 ###       gene_id column (factor, character vector, or integer vector),
cb383bfe
 ###       and a tx_id (or alternatively tx_name) column. In that case, 'x'
50d0360b
 ###       must be named and all the values in the tx_id (or tx_name) column
cb383bfe
 ###       must be in 'names(x)'.
50d0360b
 
 .checkOrMakeUniqueGroupingNames <- function(grouping)
 {
     grouping_names <- names(grouping)
     if (is.null(grouping_names)) {
ddf79f2c
         if (length(grouping) != 0L)
             warning("'grouping' is unnamed. Setting names on it (with ",
                     "'names(grouping) <- seq_along(grouping)') as artificial ",
                     "gene ids.")
50d0360b
         names(grouping) <- seq_along(grouping)
         return(grouping)
     }
     if (anyDuplicated(grouping_names))
         stop("'grouping' has duplicated names")
     if (any(c(NA, "") %in% grouping_names))
         stop("'grouping' names contains NAs or \"\" (empty string)")
     grouping
 }
 
 ### Returns a list or IntegerList or CharacterList. Always *named* with the
 ### gene ids.
cb383bfe
 .normargGrouping <- function(grouping, x)
50d0360b
 {
     ## (b)
     if (is.list(grouping) || is(grouping, "IntegerList")
      || is(grouping, "CharacterList")) {
         return(.checkOrMakeUniqueGroupingNames(grouping))
     }
     ## (c)
     if (is.factor(grouping) || is.character(grouping) || is.integer(grouping)) {
cb383bfe
         if (length(grouping) != length(x))
50d0360b
             stop("when 'grouping' is a factor, character vector, or integer ",
cb383bfe
                  "vector, it must have the same length as 'x'")
         return(split(seq_along(x), grouping))
50d0360b
     }
cb383bfe
     x_names <- names(x)
50d0360b
     ## (d)
     if (is(grouping, "GRangesList")) {
cb383bfe
         if (is.null(x_names))
             stop("when 'grouping' is a GRangesList, 'x' must be named ",
50d0360b
                  "with transcript ids (tx_id) or transcript names (tx_name)")
         grouping <- .checkOrMakeUniqueGroupingNames(grouping)
         mcols <- mcols(grouping@unlistData)
         mcolnames <- colnames(mcols)
         for (colname in c("tx_id", "tx_name")) {
             idx <- which(mcolnames == colname)
             if (length(idx) == 0L)
                 next
             if (length(idx) >= 2L)
                 stop("'unlist(grouping)' has more than 1 ",
                      colname, " metadata column")
cb383bfe
             m <- match(mcols[[idx]], x_names)
50d0360b
             if (any(is.na(m)))
                 next
fc60d3aa
             return(relist(m, PartitioningByEnd(grouping)))
50d0360b
         }
         stop("'unlist(grouping)' has no tx_id or tx_name column, ",
cb383bfe
              "or they contain values that are not in 'names(x)'")
50d0360b
     }
     ## (e)
     if (is.data.frame(grouping) || is(grouping, "DataFrame")) {
cb383bfe
         if (is.null(x_names))
             stop("when 'grouping' is a data.frame or a DataFrame, 'x' ",
50d0360b
                  "must be named with transcript ids (tx_id) or transcript ",
                  "names (tx_name)")
         grouping_colnames <- colnames(grouping)
         idx <- which(grouping_colnames == "gene_id")
         if (length(idx) != 1L)
             stop("when 'grouping' is a data.frame or a DataFrame, it must ",
                  "have exactly 1 gene_id column")
         gene_id <- grouping[[idx]]
         if (!is.factor(gene_id) && !is.character(gene_id)
          && !is.integer(gene_id))
             stop("'grouping$gene_id' must be a factor, character vector, ",
                  "or integer vector")
         for (colname in c("tx_id", "tx_name")) {
             idx <- which(grouping_colnames == colname)
             if (length(idx) == 0L)
                 next
             if (length(idx) >= 2L)
                 stop("'grouping' has more than 1 ", colname, " column")
cb383bfe
             m <- match(grouping[[idx]], x_names)
50d0360b
             if (any(is.na(m)))
                 next
             return(split(m, gene_id))
         }
         stop("'grouping' has no tx_id or tx_name column, ",
cb383bfe
              "or they contain values that are not in 'names(x)'")
50d0360b
     }
     stop("invalid 'grouping'")
 
 }
 
 ### TODO: Improve handling of invalid genes i.e. provide more details about
 ### which genes were considered invalid and why.
ddf79f2c
 .make_ex_by_tx_from_GRangesList <- function(x, grouping=NULL,
                                                min.ntx=2, max.ntx=NA,
                                                check.introns=TRUE)
50d0360b
 {
cb383bfe
     if (!is(x, "GRangesList"))
         stop("'x' must be a GRangesList object")
50d0360b
     if (is.null(grouping)) {
9d0598b5
         if (!identical(min.ntx, 2) || !identical(max.ntx, NA))
             stop("the 'min.ntx' and 'max.ntx' args are not supported ",
                  "when 'grouping' is not supplied or NULL")
cb383bfe
         ans <- .setSplicingGraphInfo(x, check.introns=check.introns)
f4c6c266
         names(ans) <- NULL
50d0360b
         return(ans)
     }
9d0598b5
 
     ## Check 'min.ntx'.
     if (!isSingleNumber(min.ntx))
         stop("'min.ntx' must be a single number")
     if (!is.integer(min.ntx))
         min.ntx <- as.integer(min.ntx)
     if (min.ntx < 1L)
         stop("'min.ntx' must be >= 1")
 
     ## Check 'max.ntx'.
     if (!isSingleNumberOrNA(max.ntx))
         stop("'max.ntx' must be a single number or NA")
     if (!is.integer(max.ntx))
         max.ntx <- as.integer(max.ntx)
     if (!is.na(max.ntx) && max.ntx < min.ntx)
         stop("'max.ntx' must be >= 'min.ntx'")
 
cb383bfe
     grouping <- .normargGrouping(grouping, x)
9d0598b5
 
     ## Keep genes with nb of transcripts >= min.ntx and <= max.ntx.
09cffc8a
     grouping_eltNROWS <- elementNROWS(grouping)
     keep <- grouping_eltNROWS >= min.ntx
9d0598b5
     if (!is.na(max.ntx))
09cffc8a
         keep <- keep & grouping_eltNROWS <= max.ntx
9d0598b5
     grouping <- grouping[keep]
ddf79f2c
     if (length(grouping) == 0L) {
         ## We need to return an empty GRangesList but it cannot be simply
         ## made with GRangesList() or it wouldn't have the expected metadata
         ## cols (outer and inner). So we call .setSplicingGraphInfo() on an
         ## empty gene instead.
         ans <- .setSplicingGraphInfo(x[NULL], check.introns=check.introns)
         names(ans) <- character(0)
         return(ans)
     }
9d0598b5
 
     ## Main loop.
50d0360b
     ans <- lapply(seq_along(grouping),
                   function(i) {
                       ii <- grouping[[i]]
cb383bfe
                       gene <- x[ii]
50d0360b
                       gene2 <- try(.setSplicingGraphInfo(gene,
                                        check.introns=check.introns),
                                    silent=TRUE)
                       if (inherits(gene2, "try-error"))
                           return(NULL)
                       gene2
                   })
9d0598b5
 
50d0360b
     invalid_genes_idx <- which(sapply(ans, is.null))
     nb_invalid_genes <- length(invalid_genes_idx)
     if (nb_invalid_genes != 0L) {
         warning(nb_invalid_genes, " invalid ",
                 ifelse(nb_invalid_genes == 1L, "gene was", "genes were"),
                 " dropped")
         grouping <- grouping[-invalid_genes_idx]
         ans <- ans[-invalid_genes_idx]
     }
     ans <- do.call(c, ans)
     grouping_names <- names(grouping)
     if (!is.null(grouping_names)) {
09cffc8a
         ans_names <- rep.int(grouping_names, elementNROWS(grouping))
50d0360b
         names(ans) <- ans_names
     }
     ans
 }
 
cb383bfe
 setGeneric("SplicingGraphs", signature="x",
9d0598b5
     function(x, grouping=NULL, min.ntx=2, max.ntx=NA, check.introns=TRUE)
cb383bfe
         standardGeneric("SplicingGraphs")
 )
 
 setMethod("SplicingGraphs", "GRangesList",
9d0598b5
     function(x, grouping=NULL, min.ntx=2, max.ntx=NA, check.introns=TRUE)
cb383bfe
     {
ddf79f2c
         ans_ex_by_tx <- .make_ex_by_tx_from_GRangesList(x, grouping=grouping,
                             min.ntx=min.ntx, max.ntx=max.ntx,
77742862
                             check.introns=check.introns)
a48698ec
         ans_genes <- GeneModel(ans_ex_by_tx)
ddf79f2c
         mcols(ans_ex_by_tx) <- NULL
008c1d5a
         ans_in_by_tx <- setdiff(range(ans_ex_by_tx), ans_ex_by_tx)
6c1f4100
         common_strand <- commonStrand.GRangesList(ans_in_by_tx)
         ans_in_by_tx <- revElements(ans_in_by_tx, common_strand == "-")
24537e93
         ans_bubbles_cache <- new.env(parent=emptyenv())
         new("SplicingGraphs", genes=ans_genes,
                               in_by_tx=ans_in_by_tx,
                               .bubbles_cache=ans_bubbles_cache)
cb383bfe
     }
 )
 
be6c86ff
 setMethod("SplicingGraphs", "TxDb",
9d0598b5
     function(x, grouping=NULL, min.ntx=2, max.ntx=NA, check.introns=TRUE)
cb383bfe
     {
         if (!is.null(grouping))
             stop("the 'grouping' arg is not supported ",
be6c86ff
                  "when 'x' is a TxDb object")
cb383bfe
         ex_by_tx <- exonsBy(x, by="tx", use.names=TRUE)
         tx_by_gn <- transcriptsBy(x, by="gene")
9d0598b5
         SplicingGraphs(ex_by_tx, tx_by_gn, min.ntx=min.ntx, max.ntx=max.ntx,
                        check.introns=check.introns)
cb383bfe
     }
 )
4f96879b
 
ddf79f2c
 ### Not exported. Only used in the .onLoad hook (see zzz.R file) for fixing
a48698ec
 ### the prototypes of the GeneModel and SplicingGraphs classes.
ddf79f2c
 emptySplicingGraphs <- function()
 {
f66f7195
     ex_by_tx <- GRangesList()
     names(ex_by_tx) <- character(0)
     mcols(ex_by_tx@unlistData) <- DataFrame(exon_id=integer(0),
                                             exon_name=character(0),
                                             exon_rank=integer(0))
     ans <- SplicingGraphs(ex_by_tx, IntegerList())
ddf79f2c
     ## We want to make sure 'ans' is *completely* valid.
     validObject(ans, complete=TRUE)
     ans
 }