50d0360b |
### =========================================================================
|
4f96879b |
### SplicingGraphs objects
|
50d0360b |
### -------------------------------------------------------------------------
|
5712a4d7 |
|
24537e93 |
### With the 2-class design we choose below, the SplicingGraphs class does
### NOT extend the CompressedList class, but has the slot ('genes') that
### extends the CompressedList class. This avoids having the SplicingGraphs
### class inherit a very rich API (Vector + List), which would be a problem
### because many operations (like c() or relist()) would be broken, unless
### we re-implement them for SplicingGraphs objects. But: (a) that's a lot of
### work (the API is huge), and (b) we don't need those operations in the
|
09cffc8a |
### first place. All we need are: length(), names(), [, [[, elementNROWS(),
|
24537e93 |
### and unlist().
|
a48698ec |
### The GeneModel class is an internal class that is not intended to be
### exposed to the user. It's a list-like class where the elements are
### GRangesList objects, each of them representing a gene. It's currently
### defined exactly how the GRangesListList class would be if we decided to
### have one in the GenomicRanges package.
|
69a8543c |
|
a48698ec |
setClass("GeneModel",
|
77742862 |
contains="CompressedList",
|
4f96879b |
representation(
|
77742862 |
unlistData="GRangesList",
elementMetadata="DataFrame"
),
prototype(
elementType="GRangesList"
|
4f96879b |
)
)
|
69a8543c |
setClass("SplicingGraphs",
representation(
|
a48698ec |
genes="GeneModel",
|
24537e93 |
in_by_tx="GRangesList",
.bubbles_cache="environment"
|
69a8543c |
)
)
|
50d0360b |
|
4f96879b |
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
a48698ec |
### GeneModel validity.
|
69a8543c |
###
|
4f96879b |
|
a48698ec |
.valid.GeneModel.names <- function(x)
|
77742862 |
{
x_names <- names(x)
if (is.null(x_names)) {
if (length(x) == 1L)
return(NULL)
return("'x' must have names")
}
if (anyDuplicated(x_names))
return("'x' has duplicated names")
NULL
}
|
a48698ec |
.valid.GeneModel.ex_by_tx <- function(x)
|
77742862 |
{
|
f66f7195 |
ex_by_tx <- x@unlistData
if (!is.null(names(ex_by_tx)))
|
77742862 |
return("'x@unlistData' must be unnamed")
|
f66f7195 |
ex_mcols <- mcols(ex_by_tx@unlistData)
valid_exon_mcolnames(colnames(ex_mcols))
|
77742862 |
}
|
4f96879b |
|
a48698ec |
.valid.GeneModel <- function(x)
|
77742862 |
{
|
a48698ec |
c(.valid.GeneModel.names(x),
.valid.GeneModel.ex_by_tx(x))
|
77742862 |
}
|
4f96879b |
|
a48698ec |
setValidity2("GeneModel", .valid.GeneModel)
|
69a8543c |
|
ddf79f2c |
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
a48698ec |
### GeneModel API.
|
ddf79f2c |
###
|
937fc96c |
### From the CompressedList API:
|
a48698ec |
### GeneModel objects inherit the CompressedList API i.e. anything that works
### on a CompressedList object works on a GeneModel object. But the only
### things we're using/supporting in the SplicingGraphs package are: length(),
|
09cffc8a |
### names(), [, [[, elementNROWS(), and unlist().
|
937fc96c |
### From the GRanges API:
|
66886f49 |
### We only need seqnames(), strand(), and seqinfo() from the GRanges API.
|
937fc96c |
|
a48698ec |
setMethod("seqnames", "GeneModel",
|
937fc96c |
function(x)
{
x_unlisted <- unlist(x)
unlisted_seqnames <- commonSeqnames.GRangesList(x_unlisted)
relisted_seqnames <- relist(unlisted_seqnames, x)
ans <- commonSeqnames.RleList(relisted_seqnames)
names(ans) <- names(x)
ans
}
)
|
a48698ec |
setMethod("strand", "GeneModel",
|
937fc96c |
function(x)
{
x_unlisted <- unlist(x)
unlisted_strand <- commonStrand.GRangesList(x_unlisted)
relisted_strand <- relist(unlisted_strand, x)
ans <- commonStrand.RleList(relisted_strand)
names(ans) <- names(x)
ans
}
)
|
ddf79f2c |
|
a48698ec |
setMethod("seqinfo", "GeneModel",
|
66886f49 |
function(x) seqinfo(unlist(x, use.names=FALSE))
)
|
a3a942cd |
### The only GeneModel setter supported at the moment.
setReplaceMethod("isCircular", "GeneModel",
function(x, value)
{
isCircular(x@unlistData) <- value
x
}
)
|
ddf79f2c |
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
a48698ec |
### GeneModel() constructor.
|
ddf79f2c |
###
### Used only in this file.
###
|
a48698ec |
### 'ex_by_tx' must be a *named* GRangesList object containing exons grouped
### by transcript. The names must be the gene ids, NOT the transcript ids or
### transcript names.
GeneModel <- function(ex_by_tx)
|
ddf79f2c |
{
ex_by_tx_names <- names(ex_by_tx)
if (is.null(ex_by_tx_names)) {
ans_partitioning <- PartitioningByEnd(length(ex_by_tx))
} else {
names(ex_by_tx) <- NULL
ans_end <- end(Rle(ex_by_tx_names))
ans_names <- ex_by_tx_names[ans_end]
ans_partitioning <- PartitioningByEnd(ans_end, names=ans_names)
}
|
a48698ec |
IRanges:::newCompressedList0(getClass("GeneModel"),
|
ddf79f2c |
ex_by_tx, ans_partitioning)
}
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### SplicingGraphs validity.
###
### A SplicingGraphs object 'x' should be validated with:
###
### validObject(x, complete=TRUE)
###
|
808fe39e |
.valid.SplicingGraphs.in_by_tx <- function(x)
{
x_in_by_tx <- x@in_by_tx
|
ddf79f2c |
x_in_by_tx_names <- names(x_in_by_tx)
if (is.null(x_in_by_tx_names))
return("'x@in_by_tx' must have names")
x_ex_by_tx <- unlist(x@genes)
if (length(x_in_by_tx) != length(x_ex_by_tx))
return("'x@in_by_tx' must have the same length as 'unlist(x@genes)'")
if (!identical(x_in_by_tx_names, names(x_ex_by_tx)))
return("'x@in_by_tx' must have the same names as 'unlist(x@genes)'")
|
09cffc8a |
if (!identical(elementNROWS(x_in_by_tx) + 1L,
elementNROWS(x_ex_by_tx))) {
|
808fe39e |
msg <- c("the shape of 'x@in_by_tx' is not compatible ",
|
ddf79f2c |
"with the shape of 'unlist(x@genes)'")
|
808fe39e |
return(paste0(msg, collapse=""))
}
NULL
}
.valid.SplicingGraphs <- function(x)
{
c(.valid.SplicingGraphs.in_by_tx(x))
}
setValidity2("SplicingGraphs", .valid.SplicingGraphs)
|
69a8543c |
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Restricted SplicingGraphs API.
###
|
66886f49 |
### Here we only implement:
### - a few core methods from the List API,
### - seqnames(), strand(), seqinfo() from the GRanges API.
|
dd0bd6a7 |
###
|
69a8543c |
setMethod("length", "SplicingGraphs", function(x) length(x@genes))
setMethod("names", "SplicingGraphs", function(x) names(x@genes))
setMethod("[", "SplicingGraphs",
|
dd0bd6a7 |
function(x, i, j, ..., drop=TRUE)
|
69a8543c |
{
if (!missing(j) || length(list(...)) > 0L)
stop("invalid subsetting")
if (missing(i))
return(x)
|
808fe39e |
if (is.null(names(x)))
stop("subsetting a SplicingGraphs with no names is not supported")
|
ca31837e |
x_genes <- x@genes
|
0c406896 |
seqx <- seq_along(x_genes)
names(seqx) <- names(x_genes)
i <- seqx[i]
|
ca31837e |
ii <- unlist(IRanges(PartitioningByEnd(x_genes))[i], use.names=FALSE)
|
0c406896 |
x@genes <- x_genes[i]
|
ca31837e |
x@in_by_tx <- x@in_by_tx[ii]
|
69a8543c |
x
}
)
setMethod("[[", "SplicingGraphs",
function (x, i, j, ...)
{
if (!missing(j) || length(list(...)) > 0L)
stop("invalid subsetting")
x@genes[[i]]
}
)
|
09cffc8a |
setMethod("elementNROWS", "SplicingGraphs",
function(x) elementNROWS(x@genes)
|
69a8543c |
)
setMethod("unlist", "SplicingGraphs",
function(x, recursive=TRUE, use.names=TRUE)
{
unlist(x@genes, recursive=recursive, use.names=use.names)
}
)
|
4f96879b |
|
937fc96c |
setMethod("seqnames", "SplicingGraphs", function(x) seqnames(x@genes))
setMethod("strand", "SplicingGraphs", function(x) strand(x@genes))
|
66886f49 |
setMethod("seqinfo", "SplicingGraphs", function(x) seqinfo(x@genes))
|
dd0bd6a7 |
|
a3a942cd |
### isCircular<- and txweight<- are the only SplicingGraphs setters supported
### at the moment.
setReplaceMethod("isCircular", "SplicingGraphs",
function(x, value)
{
isCircular(x@genes) <- isCircular(x@in_by_tx) <- value
x
}
)
|
4f96879b |
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### "show" method.
###
setMethod("show", "SplicingGraphs",
function(object)
{
|
77742862 |
ngene <- length(object)
ntx <- length(unlist(object, use.names=FALSE))
cat(class(object), " object with ", ngene, " gene(s) ",
|
4f96879b |
"and ", ntx, " transcript(s)\n", sep="")
}
)
|
50d0360b |
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
4f96879b |
### SplicingGraphs() constructor
|
50d0360b |
###
### 'exons_start' and 'exons_end' must be 2 integer vectors of the same length
### N representing the start and end positions of N exons that belong to the
### same gene. Returns 2 integer vectors of length N containing the splicing
### site ids assigned to the start and end positions, respectively.
.make_SSids <- function(exons_start, exons_end, on.minus.strand=FALSE)
{
if (!is.numeric(exons_start))
stop("'exons_start' must be an integer vector")
if (!is.integer(exons_start))
exons_start <- as.integer(exons_start)
if (!is.numeric(exons_end))
stop("'exons_end' must be an integer vector")
if (!is.integer(exons_end))
exons_end <- as.integer(exons_end)
N <- length(exons_start)
if (length(exons_end) != N)
stop("'exons_start' and 'exons_end' must have the same length")
## splicing sites
ssites <- data.frame(pos=c(exons_start, exons_end),
type=rep.int(1:2, c(N, N)))
|
ab342602 |
ssites_sm <- S4Vectors:::selfmatchIntegerPairs(ssites$pos, ssites$type)
|
50d0360b |
## unique splicing sites
ussites <- ssites[ssites_sm == seq_along(ssites_sm), , drop=FALSE]
|
ab342602 |
oo <- S4Vectors:::orderIntegerPairs(ussites$pos, ussites$type,
decreasing=on.minus.strand)
|
50d0360b |
ussites <- ussites[oo, , drop=FALSE]
|
ab342602 |
SSid <- S4Vectors:::matchIntegerPairs(ssites$pos, ssites$type,
ussites$pos, ussites$type)
|
50d0360b |
exons_start_SSid <- head(SSid, n=N)
exons_end_SSid <- tail(SSid, n=N)
list(start_SSid=exons_start_SSid, end_SSid=exons_end_SSid)
}
|
cb383bfe |
### 'gene' must be a GRangesList object containing the exons of a *single*
|
28ea1605 |
### gene grouped by transcript. More precisely, each top-level element
|
cb383bfe |
### in 'gene' contains the genomic ranges of the exons for a particular
|
50d0360b |
### transcript of the gene.
|
ddf79f2c |
### Should be able to deal with a GRangesList object of length 0 (i.e. a
### gene with no transcripts).
|
cb383bfe |
.setSplicingGraphInfo <- function(gene, check.introns=TRUE)
|
50d0360b |
{
|
cb383bfe |
if (!is(gene, "GRangesList"))
stop("'gene' must be a GRangesList object")
|
50d0360b |
if (!isTRUEorFALSE(check.introns))
stop("'check.introns' must be TRUE or FALSE")
|
cb383bfe |
exons <- gene@unlistData
|
6c1f4100 |
common_strand <- commonStrand.GRanges(exons, what="exons in the gene")
|
04621cd6 |
on.minus.strand <- common_strand == "-"
if (check.introns && length(exons) != 0L) {
## We check that, within each transcript, exons are ordered from
## 5' to 3' with gaps of at least 1 nucleotide between them.
ranges_by_tx <- ranges(gene)
if (on.minus.strand)
ranges_by_tx <- revElements(ranges_by_tx)
if (!all(isNormal(ranges_by_tx)))
stop("some transcripts in the gene don't have their exons ",
"ordered from 5' to 3' with gaps of at least 1 ",
"nucleotide between them (introns)")
|
50d0360b |
}
## Set splicing site ids.
SSids <- .make_SSids(start(exons), end(exons),
on.minus.strand=on.minus.strand)
exons_mcols <- mcols(exons)
if (any(names(SSids) %in% colnames(exons_mcols))) {
in_1_string <- paste(names(SSids), collapse=", ")
|
cb383bfe |
stop("'unlist(gene)' already has metadata columns: ", in_1_string)
|
50d0360b |
}
mcols(exons) <- cbind(mcols(exons), DataFrame(SSids))
|
cb383bfe |
gene@unlistData <- exons
gene_mcols <- mcols(gene)
|
50d0360b |
|
ddf79f2c |
## Set "tx_id" metadata col.
|
cb383bfe |
if ("tx_id" %in% colnames(gene_mcols))
stop("'gene' already has metadata column tx_id")
tx_id <- names(gene)
|
50d0360b |
if (!is.null(tx_id))
|
cb383bfe |
gene_mcols$tx_id <- tx_id
|
50d0360b |
|
7ff5c4a7 |
## Set "txpath" metadata col.
if ("txpath" %in% colnames(gene_mcols))
stop("'gene' already has metadata column txpath")
|
50d0360b |
if (on.minus.strand) {
|
7ff5c4a7 |
txpath <- rbind(SSids$end_SSid, SSids$start_SSid)
|
50d0360b |
} else {
|
7ff5c4a7 |
txpath <- rbind(SSids$start_SSid, SSids$end_SSid)
|
50d0360b |
}
|
7ff5c4a7 |
txpath_partitioning_end <- end(PartitioningByEnd(gene)) * 2L
txpath_partitioning <- PartitioningByEnd(txpath_partitioning_end)
names(txpath_partitioning) <- tx_id
|
fc60d3aa |
txpath <- relist(as.vector(txpath), txpath_partitioning)
|
7ff5c4a7 |
gene_mcols$txpath <- txpath
|
50d0360b |
|
cb383bfe |
mcols(gene) <- gene_mcols
gene
|
50d0360b |
}
|
cb383bfe |
### 'x' must be a GRangesList object containing the exons of one or more
|
28ea1605 |
### genes grouped by transcript. More precisely, each top-level element
|
cb383bfe |
### in 'x' contains the genomic ranges of the exons for a particular
|
be6c86ff |
### transcript. Typically 'x' will be obtained from a TxDb object
|
50d0360b |
### 'txdb' with 'exonsBy(txdb, by="tx")'.
### 'grouping' is an optional object that represents the grouping by gene of
|
cb383bfe |
### the top-level elements (i.e. transcripts) in 'x'. It can be either:
### (a) Missing (i.e. NULL). In that case, all the transcripts in 'x'
|
4f96879b |
### are considered to belong to the same gene and the SplicingGraphs
### object returned by SplicingGraphs() will be unnamed.
|
50d0360b |
### (b) A list of integer or character vectors, or an IntegerList, or a
### CharacterList object, of length the number of genes to process,
|
cb383bfe |
### and where 'grouping[[i]]' is a vector of valid subscripts in 'x'
|
c5b71e02 |
### pointing to all the transcripts of the i-th gene.
|
cb383bfe |
### (c) A factor, character vector, or integer vector, of length 'x'
|
50d0360b |
### with 1 level per gene.
|
28ea1605 |
### (d) A named GRangesList object containing transcripts grouped by gene
|
50d0360b |
### i.e. each top-level element in 'grouping' contains the genomic ranges
### of the transcripts for a particular gene. In that case, the grouping
### is inferred from the tx_id (or alternatively tx_name) metadata
### column of 'unlist(grouping)' and all the values in that column must
|
cb383bfe |
### be in 'names(x)'.
### If 'x' was obtained with 'exonsBy(txdb, by="tx")', then the
|
50d0360b |
### GRangesList object used for grouping would typically be obtained with
### 'transcriptsBy(txdb, by="gene")'.
### (e) A data.frame or DataFrame with 2 character vector columns: a
### gene_id column (factor, character vector, or integer vector),
|
cb383bfe |
### and a tx_id (or alternatively tx_name) column. In that case, 'x'
|
50d0360b |
### must be named and all the values in the tx_id (or tx_name) column
|
cb383bfe |
### must be in 'names(x)'.
|
50d0360b |
.checkOrMakeUniqueGroupingNames <- function(grouping)
{
grouping_names <- names(grouping)
if (is.null(grouping_names)) {
|
ddf79f2c |
if (length(grouping) != 0L)
warning("'grouping' is unnamed. Setting names on it (with ",
"'names(grouping) <- seq_along(grouping)') as artificial ",
"gene ids.")
|
50d0360b |
names(grouping) <- seq_along(grouping)
return(grouping)
}
if (anyDuplicated(grouping_names))
stop("'grouping' has duplicated names")
if (any(c(NA, "") %in% grouping_names))
stop("'grouping' names contains NAs or \"\" (empty string)")
grouping
}
### Returns a list or IntegerList or CharacterList. Always *named* with the
### gene ids.
|
cb383bfe |
.normargGrouping <- function(grouping, x)
|
50d0360b |
{
## (b)
if (is.list(grouping) || is(grouping, "IntegerList")
|| is(grouping, "CharacterList")) {
return(.checkOrMakeUniqueGroupingNames(grouping))
}
## (c)
if (is.factor(grouping) || is.character(grouping) || is.integer(grouping)) {
|
cb383bfe |
if (length(grouping) != length(x))
|
50d0360b |
stop("when 'grouping' is a factor, character vector, or integer ",
|
cb383bfe |
"vector, it must have the same length as 'x'")
return(split(seq_along(x), grouping))
|
50d0360b |
}
|
cb383bfe |
x_names <- names(x)
|
50d0360b |
## (d)
if (is(grouping, "GRangesList")) {
|
cb383bfe |
if (is.null(x_names))
stop("when 'grouping' is a GRangesList, 'x' must be named ",
|
50d0360b |
"with transcript ids (tx_id) or transcript names (tx_name)")
grouping <- .checkOrMakeUniqueGroupingNames(grouping)
mcols <- mcols(grouping@unlistData)
mcolnames <- colnames(mcols)
for (colname in c("tx_id", "tx_name")) {
idx <- which(mcolnames == colname)
if (length(idx) == 0L)
next
if (length(idx) >= 2L)
stop("'unlist(grouping)' has more than 1 ",
colname, " metadata column")
|
cb383bfe |
m <- match(mcols[[idx]], x_names)
|
50d0360b |
if (any(is.na(m)))
next
|
fc60d3aa |
return(relist(m, PartitioningByEnd(grouping)))
|
50d0360b |
}
stop("'unlist(grouping)' has no tx_id or tx_name column, ",
|
cb383bfe |
"or they contain values that are not in 'names(x)'")
|
50d0360b |
}
## (e)
if (is.data.frame(grouping) || is(grouping, "DataFrame")) {
|
cb383bfe |
if (is.null(x_names))
stop("when 'grouping' is a data.frame or a DataFrame, 'x' ",
|
50d0360b |
"must be named with transcript ids (tx_id) or transcript ",
"names (tx_name)")
grouping_colnames <- colnames(grouping)
idx <- which(grouping_colnames == "gene_id")
if (length(idx) != 1L)
stop("when 'grouping' is a data.frame or a DataFrame, it must ",
"have exactly 1 gene_id column")
gene_id <- grouping[[idx]]
if (!is.factor(gene_id) && !is.character(gene_id)
&& !is.integer(gene_id))
stop("'grouping$gene_id' must be a factor, character vector, ",
"or integer vector")
for (colname in c("tx_id", "tx_name")) {
idx <- which(grouping_colnames == colname)
if (length(idx) == 0L)
next
if (length(idx) >= 2L)
stop("'grouping' has more than 1 ", colname, " column")
|
cb383bfe |
m <- match(grouping[[idx]], x_names)
|
50d0360b |
if (any(is.na(m)))
next
return(split(m, gene_id))
}
stop("'grouping' has no tx_id or tx_name column, ",
|
cb383bfe |
"or they contain values that are not in 'names(x)'")
|
50d0360b |
}
stop("invalid 'grouping'")
}
### TODO: Improve handling of invalid genes i.e. provide more details about
### which genes were considered invalid and why.
|
ddf79f2c |
.make_ex_by_tx_from_GRangesList <- function(x, grouping=NULL,
min.ntx=2, max.ntx=NA,
check.introns=TRUE)
|
50d0360b |
{
|
cb383bfe |
if (!is(x, "GRangesList"))
stop("'x' must be a GRangesList object")
|
50d0360b |
if (is.null(grouping)) {
|
9d0598b5 |
if (!identical(min.ntx, 2) || !identical(max.ntx, NA))
stop("the 'min.ntx' and 'max.ntx' args are not supported ",
"when 'grouping' is not supplied or NULL")
|
cb383bfe |
ans <- .setSplicingGraphInfo(x, check.introns=check.introns)
|
f4c6c266 |
names(ans) <- NULL
|
50d0360b |
return(ans)
}
|
9d0598b5 |
## Check 'min.ntx'.
if (!isSingleNumber(min.ntx))
stop("'min.ntx' must be a single number")
if (!is.integer(min.ntx))
min.ntx <- as.integer(min.ntx)
if (min.ntx < 1L)
stop("'min.ntx' must be >= 1")
## Check 'max.ntx'.
if (!isSingleNumberOrNA(max.ntx))
stop("'max.ntx' must be a single number or NA")
if (!is.integer(max.ntx))
max.ntx <- as.integer(max.ntx)
if (!is.na(max.ntx) && max.ntx < min.ntx)
stop("'max.ntx' must be >= 'min.ntx'")
|
cb383bfe |
grouping <- .normargGrouping(grouping, x)
|
9d0598b5 |
## Keep genes with nb of transcripts >= min.ntx and <= max.ntx.
|
09cffc8a |
grouping_eltNROWS <- elementNROWS(grouping)
keep <- grouping_eltNROWS >= min.ntx
|
9d0598b5 |
if (!is.na(max.ntx))
|
09cffc8a |
keep <- keep & grouping_eltNROWS <= max.ntx
|
9d0598b5 |
grouping <- grouping[keep]
|
ddf79f2c |
if (length(grouping) == 0L) {
## We need to return an empty GRangesList but it cannot be simply
## made with GRangesList() or it wouldn't have the expected metadata
## cols (outer and inner). So we call .setSplicingGraphInfo() on an
## empty gene instead.
ans <- .setSplicingGraphInfo(x[NULL], check.introns=check.introns)
names(ans) <- character(0)
return(ans)
}
|
9d0598b5 |
## Main loop.
|
50d0360b |
ans <- lapply(seq_along(grouping),
function(i) {
ii <- grouping[[i]]
|
cb383bfe |
gene <- x[ii]
|
50d0360b |
gene2 <- try(.setSplicingGraphInfo(gene,
check.introns=check.introns),
silent=TRUE)
if (inherits(gene2, "try-error"))
return(NULL)
gene2
})
|
9d0598b5 |
|
50d0360b |
invalid_genes_idx <- which(sapply(ans, is.null))
nb_invalid_genes <- length(invalid_genes_idx)
if (nb_invalid_genes != 0L) {
warning(nb_invalid_genes, " invalid ",
ifelse(nb_invalid_genes == 1L, "gene was", "genes were"),
" dropped")
grouping <- grouping[-invalid_genes_idx]
ans <- ans[-invalid_genes_idx]
}
ans <- do.call(c, ans)
grouping_names <- names(grouping)
if (!is.null(grouping_names)) {
|
09cffc8a |
ans_names <- rep.int(grouping_names, elementNROWS(grouping))
|
50d0360b |
names(ans) <- ans_names
}
ans
}
|
cb383bfe |
setGeneric("SplicingGraphs", signature="x",
|
9d0598b5 |
function(x, grouping=NULL, min.ntx=2, max.ntx=NA, check.introns=TRUE)
|
cb383bfe |
standardGeneric("SplicingGraphs")
)
setMethod("SplicingGraphs", "GRangesList",
|
9d0598b5 |
function(x, grouping=NULL, min.ntx=2, max.ntx=NA, check.introns=TRUE)
|
cb383bfe |
{
|
ddf79f2c |
ans_ex_by_tx <- .make_ex_by_tx_from_GRangesList(x, grouping=grouping,
min.ntx=min.ntx, max.ntx=max.ntx,
|
77742862 |
check.introns=check.introns)
|
a48698ec |
ans_genes <- GeneModel(ans_ex_by_tx)
|
ddf79f2c |
mcols(ans_ex_by_tx) <- NULL
|
008c1d5a |
ans_in_by_tx <- setdiff(range(ans_ex_by_tx), ans_ex_by_tx)
|
6c1f4100 |
common_strand <- commonStrand.GRangesList(ans_in_by_tx)
ans_in_by_tx <- revElements(ans_in_by_tx, common_strand == "-")
|
24537e93 |
ans_bubbles_cache <- new.env(parent=emptyenv())
new("SplicingGraphs", genes=ans_genes,
in_by_tx=ans_in_by_tx,
.bubbles_cache=ans_bubbles_cache)
|
cb383bfe |
}
)
|
be6c86ff |
setMethod("SplicingGraphs", "TxDb",
|
9d0598b5 |
function(x, grouping=NULL, min.ntx=2, max.ntx=NA, check.introns=TRUE)
|
cb383bfe |
{
if (!is.null(grouping))
stop("the 'grouping' arg is not supported ",
|
be6c86ff |
"when 'x' is a TxDb object")
|
cb383bfe |
ex_by_tx <- exonsBy(x, by="tx", use.names=TRUE)
tx_by_gn <- transcriptsBy(x, by="gene")
|
9d0598b5 |
SplicingGraphs(ex_by_tx, tx_by_gn, min.ntx=min.ntx, max.ntx=max.ntx,
check.introns=check.introns)
|
cb383bfe |
}
)
|
4f96879b |
|
ddf79f2c |
### Not exported. Only used in the .onLoad hook (see zzz.R file) for fixing
|
a48698ec |
### the prototypes of the GeneModel and SplicingGraphs classes.
|
ddf79f2c |
emptySplicingGraphs <- function()
{
|
f66f7195 |
ex_by_tx <- GRangesList()
names(ex_by_tx) <- character(0)
mcols(ex_by_tx@unlistData) <- DataFrame(exon_id=integer(0),
exon_name=character(0),
exon_rank=integer(0))
ans <- SplicingGraphs(ex_by_tx, IntegerList())
|
ddf79f2c |
## We want to make sure 'ans' is *completely* valid.
validObject(ans, complete=TRUE)
ans
}
|