Bioconductor Code: motifTestR

Browse code

Added Poisson & NB capabilities when simulating sequences

Stevie Ped authored on 10/03/2025 13:19:19
Showing 1 changed files

@@ -12,6 +12,8 @@ simSeq(
                                        prob = rep(0.25, 4),
                                        shape1 = 1,
                                        shape2 = 1,
                                     +  rate = NULL,
                                     +  theta = NULL,
                                        as = "DNAStringSet",
                                        ...
+                                     )
@@ -25,10 +27,19 @@ simSeq(
                                      \item{nt}{Nucleotides to include}
                                     -\item{prob}{Sampling probablities for each nucleotide}
                                     +\item{prob}{Sampling probabilities for each nucleotide}
                                      \item{shape1, shape2}{Passed to \link[VGAM]{rbetabinom.ab}}
                                     +\item{rate}{The expected rate of motifs per sequence. Is equivalent to
                                     +\eqn{ \lambda } in \link[stats]{rpois}. If set to NULL, all sequences will
                                     +be simulated with a single motif, otherwise a Poisson distribution will be used}
+                                    +
                                     +\item{theta}{Overdispersion parameter passed to \link[MASS]{rnegbin}.
                                     +If set to NULL the rate parameter will be passed to \link[stats]{rpois}.
                                     +However if this value is set, the rate and theta parameters are passed to
                                     +\link[MASS]{rnegbin} to simulate overdispersed counts}
+                                    +
                                      \item{as}{ObjectClass to return objects as. Defaults to DNAStringSet, but
                                      other viable options may include 'character', 'CharacterList' or any
                                      other class from which a character vector may be coerced.}
@@ -52,9 +63,18 @@ If a PWM/PFM is supplied, the shape parameters are first passed to
                                      \link[VGAM]{rbetabinom.ab} to determine the random positions the motif will
                                      be placed, with the default parameters representing a discrete uniform
                                      distribution.
                                     -Once positions for the TFBM have been selected, nucleotides will be randomly
                                     -sampled using the probabilities provided in the PWM and these motifs will be
                                     -placed at the randomly sample positions
+                                    +
                                     +The sequences to have a motif inserted will be selected, along with the
                                     +number of motifs, using the rate and theta parameters.
                                     +If both are NULL, every sequence will have a single motif inserted.
                                     +If the rate is > 0 and theta is NULL, sequences will be selected to have
                                     +motifs inserted using a poisson distribution.
                                     +If theta is also provided, sequences will be selected to contain motifs
                                     +using a negative binomial distribution
+                                    +
                                     +Once positions and sequences for the TFBM have been selected, nucleotides
                                     +will be randomly sampled using the probabilities provided in the PWM and
                                     +these motifs will be placed at the randomly sampled positions
+                                     }
                                      \examples{
                                      ## Randomly generate 10x50nt sequences without any TFBMs present

Browse code

Added ComplexUpset to Suggests

Stevie Ped authored on 08/03/2025 03:35:00
Showing 1 changed files

man/simSeq.Rd

History View file @ d2b25a7

@@ -66,7 +66,9 @@ sim_seq <- simSeq(10, width = 20, pfm = ex_pfm$ESR1)
                                      sim_seq
                                      ## The position of the motif within each sequence is included in the mcols
                                      mcols(sim_seq)
+                                    +
                                      ## Use this to extract the random motifs from the random sequences
                                     +library(IRanges)
                                      i <- mcols(sim_seq)$pos + cumsum(width(sim_seq)) - width(sim_seq)
                                      Views(unlist(sim_seq), start = i, width = 10)

Browse code

Added simSeq

Stevie Ped authored on 12/01/2025 08:01:23
Showing 1 changed files

man/simSeq.Rd

History View file @ b1c54df

                                     new file mode 100644
@@ -0,0 +1,74 @@
                                     +% Generated by roxygen2: do not edit by hand
                                     +% Please edit documentation in R/simSeq.R
                                     +\name{simSeq}
                                     +\alias{simSeq}
                                     +\title{Simulate sequences using optional TFBMs}
                                     +\usage{
                                     +simSeq(
                                     +  n,
                                     +  width,
                                     +  pfm = NULL,
                                     +  nt = c("A", "C", "G", "T"),
                                     +  prob = rep(0.25, 4),
                                     +  shape1 = 1,
                                     +  shape2 = 1,
                                     +  as = "DNAStringSet",
                                     +  ...
                                     +)
                                     +}
                                     +\arguments{
                                     +\item{n}{The number of sequences to simulate}
+                                    +
                                     +\item{width}{Width of sequences to simulate}
+                                    +
                                     +\item{pfm}{Probability Weight/Frequency Matrix}
+                                    +
                                     +\item{nt}{Nucleotides to include}
+                                    +
                                     +\item{prob}{Sampling probablities for each nucleotide}
+                                    +
                                     +\item{shape1, shape2}{Passed to \link[VGAM]{rbetabinom.ab}}
+                                    +
                                     +\item{as}{ObjectClass to return objects as. Defaults to DNAStringSet, but
                                     +other viable options may include 'character', 'CharacterList' or any
                                     +other class from which a character vector may be coerced.}
+                                    +
                                     +\item{...}{Not used}
                                     +}
                                     +\value{
                                     +By default a DNAStringSet will be returned.
                                     +If possible, the position of any randomly sampled motifs will be included
                                     +in the mcols element of the returned object.
                                     +}
                                     +\description{
                                     +Simulate a set of fixed-width sequences using optional TFBMs
                                     +}
                                     +\details{
                                     +Using the nucleotide and probabilities provided as set of sequences can be
                                     +simulated. By default, this will effectively be a set of 'background'
                                     +sequences, with letters effectively chosen at random.
+                                    +
                                     +If a PWM/PFM is supplied, the shape parameters are first passed to
                                     +\link[VGAM]{rbetabinom.ab} to determine the random positions the motif will
                                     +be placed, with the default parameters representing a discrete uniform
                                     +distribution.
                                     +Once positions for the TFBM have been selected, nucleotides will be randomly
                                     +sampled using the probabilities provided in the PWM and these motifs will be
                                     +placed at the randomly sample positions
                                     +}
                                     +\examples{
                                     +## Randomly generate 10x50nt sequences without any TFBMs present
                                     +simSeq(10, 50)
+                                    +
                                     +## Now place a motif at random positions
                                     +data('ex_pfm')
                                     +sim_seq <- simSeq(10, width = 20, pfm = ex_pfm$ESR1)
                                     +sim_seq
                                     +## The position of the motif within each sequence is included in the mcols
                                     +mcols(sim_seq)
                                     +## Use this to extract the random motifs from the random sequences
                                     +i <- mcols(sim_seq)$pos + cumsum(width(sim_seq)) - width(sim_seq)
                                     +Views(unlist(sim_seq), start = i, width = 10)
+                                    +
+                                    +
                                     +}