Bioconductor Code: SplicingGraphs

R/sgedgesByGene-methods.R

6c1f4100	### =========================================================================
942a5908	### "sgedgesByGene" (and related) methods
6c1f4100	### -------------------------------------------------------------------------
9cd225cb	### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### intronsByTranscript() ###
66886f49	setMethod("intronsByTranscript", "SplicingGraphs", function(x) x@in_by_tx)
c6222c7f
9cd225cb	### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### sgedgesByTranscript() ### setGeneric("sgedgesByTranscript", signature="x",
1dab2c6e	function(x, with.exon.mcols=FALSE, with.hits.mcols=FALSE)
9cd225cb	standardGeneric("sgedgesByTranscript")
6c1f4100	) setMethod("sgedgesByTranscript", "SplicingGraphs",
1dab2c6e	function(x, with.exon.mcols=FALSE, with.hits.mcols=FALSE)
6c1f4100	{
1dab2c6e	if (!isTRUEorFALSE(with.exon.mcols)) stop("'with.exon.mcols' must be TRUE or FALSE") if (!isTRUEorFALSE(with.hits.mcols)) stop("'with.hits.mcols' must be TRUE or FALSE")
9cd225cb
6c1f4100	ex_by_tx <- unlist(x) ex_partitioning <- PartitioningByEnd(ex_by_tx) gene_ids <- names(ex_partitioning) in_by_tx <- intronsByTranscript(x) in_partitioning <- PartitioningByEnd(in_by_tx) stopifnot(identical(gene_ids, names(in_partitioning))) ## Compute 'ans_partitioning'. nex_by_tx <- width(ex_partitioning) nin_by_tx <- width(in_partitioning) stopifnot(identical(nin_by_tx + 1L, nex_by_tx)) ans_partitioning <- PartitioningByEnd(end(ex_partitioning) + end(in_partitioning), names=gene_ids)
c6222c7f	## Add missing metadata cols to 'in_unlistData'.
6c1f4100	ex_unlistData <- ex_by_tx@unlistData ex_unlistData_len <- length(ex_unlistData)
7341eb9a	ex_unlistData_mcols <- mcols(ex_unlistData)
43e1ae98	check_exon_mcolnames(colnames(ex_unlistData_mcols))
9cd225cb
6c1f4100	in_unlistData <- in_by_tx@unlistData in_unlistData_len <- length(in_unlistData)
7341eb9a	in_unlistData_mcols <- mcols(in_unlistData)
6c1f4100	in_missing_mcols <- DataFrame(exon_id=NA_integer_, exon_name=NA_character_, exon_rank=NA_integer_, start_SSid=NA_integer_, end_SSid=NA_integer_) idx <- rep.int(1L, in_unlistData_len) in_missing_mcols <- in_missing_mcols[idx, , drop=FALSE]
7341eb9a	in_unlistData_mcols <- cbind(in_missing_mcols, in_unlistData_mcols)
6c1f4100
7341eb9a	## Make "from" and "to" metadata cols.
c6222c7f	from <- as.character(ex_unlistData_mcols$start_SSid) to <- as.character(ex_unlistData_mcols$end_SSid) idx <- which(strand(ex_unlistData) == "-") tmp <- from[idx] from[idx] <- to[idx] to[idx] <- tmp
7341eb9a	ex_prepend_mcols <- DataFrame(from=from, to=to)
c6222c7f	from <- to <- rep.int(NA_character_, in_unlistData_len)
7341eb9a	in_prepend_mcols <- DataFrame(from=from, to=to) ## Make "ex_or_in" and "tx_id" metadata cols. ex_or_in <- rep.int(factor("ex", levels=EX_OR_IN_LEVELS), ex_unlistData_len) ex_prepend_mcols$ex_or_in <- ex_or_in ex_or_in <- rep.int(factor("in", levels=EX_OR_IN_LEVELS), in_unlistData_len) in_prepend_mcols$ex_or_in <- ex_or_in
c6222c7f	tx_id <- mcols(ex_by_tx)[ , "tx_id"]
7341eb9a	ex_prepend_mcols$tx_id <- rep.int(tx_id, nex_by_tx) in_prepend_mcols$tx_id <- rep.int(tx_id, nin_by_tx) mcols(ex_unlistData) <- cbind(ex_prepend_mcols, ex_unlistData_mcols) mcols(in_unlistData) <- cbind(in_prepend_mcols, in_unlistData_mcols)
c6222c7f	## Compute 'ans_unlistData'. We need to reorder 'c(ex_unlistData,
7341eb9a	## in_unlistData)' to bring introns between their flanking exons.
c6222c7f	ans_unlistData <- c(ex_unlistData, in_unlistData)
6c1f4100	seq0 <- seq_along(ex_partitioning) roidx <- integer(ex_unlistData_len + in_unlistData_len) seq1 <- seq_len(ex_unlistData_len) idx1 <- seq1 * 2L - rep.int(seq0, nex_by_tx) roidx[idx1] <- seq1 seq2 <- seq_len(in_unlistData_len) idx2 <- seq2 * 2L + rep.int(seq0, nin_by_tx) - 1L roidx[idx2] <- seq2 + ex_unlistData_len ans_unlistData <- ans_unlistData[roidx]
c6222c7f	## Fill gaps in "from" and "to" metadata cols. ans_unlistData_mcols <- mcols(ans_unlistData) from <- ans_unlistData_mcols$from to <- ans_unlistData_mcols$to introns_idx <- which(is.na(from)) # same as 'which(is.na(to))' from[introns_idx] <- to[introns_idx - 1L] to[introns_idx] <- from[introns_idx + 1L]
7341eb9a	ans_unlistData_mcols$from <- from ans_unlistData_mcols$to <- to
c6222c7f
7341eb9a	## Sanity check: exons must be flanking introns.
c6222c7f	ans_unlistData_start <- start(ans_unlistData) ans_unlistData_end <- end(ans_unlistData) ans_unlistData_strand <- strand(ans_unlistData) plus_idx <- which(ans_unlistData_strand == "+") plus_introns_idx <- intersect(introns_idx, plus_idx) stopifnot(identical(ans_unlistData_start[plus_introns_idx] - 1L, ans_unlistData_end[plus_introns_idx - 1L])) stopifnot(identical(ans_unlistData_end[plus_introns_idx] + 1L, ans_unlistData_start[plus_introns_idx + 1L])) minus_idx <- which(ans_unlistData_strand == "-") minus_introns_idx <- intersect(introns_idx, minus_idx) stopifnot(identical(ans_unlistData_start[minus_introns_idx] - 1L, ans_unlistData_end[minus_introns_idx + 1L])) stopifnot(identical(ans_unlistData_end[minus_introns_idx] + 1L, ans_unlistData_start[minus_introns_idx - 1L]))
e3f9cd93	## Insert "sgedge_id" metadata col after first 2 metadata cols ## ("from" and "to"). sgedge_id <- make_global_sgedge_id( rep.int(gene_ids, width(ans_partitioning)), from, to) ans_unlistData_mcols <- c(ans_unlistData_mcols[1:2], DataFrame(sgedge_id=sgedge_id), ans_unlistData_mcols[-(1:2)])
43e1ae98	check_all_edge_mcolnames(colnames(ans_unlistData_mcols))
9cd225cb	## Drop unwanted columns.
43e1ae98	mcol_idx <- get_index_of_group_of_mcols(
9cd225cb	colnames(ans_unlistData_mcols),
1dab2c6e	with.exon.mcols, with.hits.mcols)
9cd225cb	if (length(mcol_idx) != 0L) ans_unlistData_mcols <- ans_unlistData_mcols[ , -mcol_idx, drop=FALSE] mcols(ans_unlistData) <- ans_unlistData_mcols ## Relist 'ans_unlistData' and return. ans <- relist(ans_unlistData, ans_partitioning) ans } ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### sgedgesByGene() ### setGeneric("sgedgesByGene", signature="x",
942a5908	function(x, with.exon.mcols=FALSE, with.hits.mcols=FALSE, keep.dup.edges=FALSE)
9cd225cb	standardGeneric("sgedgesByGene") ) setMethod("sgedgesByGene", "SplicingGraphs",
942a5908	function(x, with.exon.mcols=FALSE, with.hits.mcols=FALSE, keep.dup.edges=FALSE)
9cd225cb	{
942a5908	if (!isTRUEorFALSE(keep.dup.edges)) stop("'keep.dup.edges' must be TRUE or FALSE")
1dab2c6e	edges_by_tx <- sgedgesByTranscript(x, with.exon.mcols=with.exon.mcols, with.hits.mcols=with.hits.mcols)
9cd225cb	edges0 <- unlist(edges_by_tx) edges0_mcols <- mcols(edges0) edges0_mcolnames <- colnames(edges0_mcols) sgedge_id <- edges0_mcols[ , "sgedge_id"] sm <- match(sgedge_id, sgedge_id) ## Sanity checks. stopifnot(all(edges0 == edges0[sm]))
43e1ae98	invar_mcol_idx <- get_index_of_invariant_edge_mcols(edges0_mcolnames)
9cd225cb	stopifnot(identical(
942a5908	edges0_mcols[ , invar_mcol_idx, drop=FALSE], edges0_mcols[sm , invar_mcol_idx, drop=FALSE]))
9cd225cb	## Compute 'ans_partitioning'.
942a5908	ans_unlistData <- edges0 if (!keep.dup.edges) { keep_idx <- which(sm == seq_along(sm)) ans_unlistData <- ans_unlistData[keep_idx] }
9cd225cb	ans_grouping <- Rle(names(ans_unlistData)) ans_eltlens <- runLength(ans_grouping) ans_partitioning <- PartitioningByEnd(cumsum(ans_eltlens), names=runValue(ans_grouping)) ## Compute 'ans_unlistData'. names(ans_unlistData) <- NULL
942a5908	if (!keep.dup.edges) { ans_unlistData_mcols <- mcols(ans_unlistData) var_mcol_idx <- seq_along(edges0_mcolnames)[- invar_mcol_idx] f <- factor(sgedge_id, levels=sgedge_id[keep_idx]) for (i in var_mcol_idx) { old_col <- edges0_mcols[ , i]
43e1ae98	new_col <- unname(unique(unlistAndSplit(old_col, f)))
942a5908	ans_unlistData_mcols[ , i] <- new_col } mcols(ans_unlistData) <- ans_unlistData_mcols
9cd225cb	}
7341eb9a
6c1f4100	## Relist 'ans_unlistData' and return. ans <- relist(ans_unlistData, ans_partitioning) ans } )