Bioconductor Code: motifTestR

Raw Blame Patch Log History
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/testClusterEnrich.R
\name{testClusterEnrich}
\alias{testClusterEnrich}
\title{Test enrichment across a cluster of motifs using a background set of sequences}
\usage{
testClusterEnrich(
  cl,
  stringset,
  bg,
  var = "iteration",
  model = c("quasipoisson", "hypergeometric", "poisson", "iteration"),
  sort_by = c("p", "none"),
  mc.cores = 1,
  prior.count = 1,
  seed = 100,
  ...
)
}
\arguments{
\item{cl}{A list of Position Weight Matrices, universalmotifs, with each
element representing clusters of related matrices}

\item{stringset}{An XStringSet with equal sequence widths}

\item{bg}{An XStringSet with the same sequence widths as the test XStringset}

\item{var}{A column in the mcols element of bg, usually denoting an iteration
number}

\item{model}{The model used for analysis}

\item{sort_by}{Column to sort results by}

\item{mc.cores}{Passed to \link[parallel]{mclapply}}

\item{prior.count}{Added to all counts to better manage zero counts in
background sequences. For analysis under QuasiPoisson models prior counts
are added as Poisson noise using this value as expected counts}

\item{seed}{Used for reproducibility when adding Poisson noise}

\item{...}{Passed to \link{getPwmMatches} or \link{countPwmMatches}}
}
\value{
See \link{testMotifEnrich}
}
\description{
Test for enrichment of any motif within a cluster across a set of sequences
using a background set to
derive a NULL hypothesis
}
\details{
This extends the analytic methods offered by \link{testMotifEnrich} using
PWMs grouped into a set of clusters.
As with all cluster-level approaches, hits from multiple PWMs which overlap
are counted as a single hit ensuring that duplicated matches are not
double-counted, and that only individual positions within the sequences are.
}
\examples{
## Load the example peaks & the sequences
data("ar_er_peaks")
data("ar_er_seq")
sq <- seqinfo(ar_er_peaks)
## Now sample size-matched ranges 10 times larger. In real-world analyses,
## this set should be sampled as at least 1000x larger, ensuring features
## are matched to your requirements. This example masks regions with known N
## content, including centromeres & telomeres
data("hg19_mask")
set.seed(305)
bg_ranges <- makeRMRanges(
  ar_er_peaks, GRanges(sq)[1], exclude = hg19_mask, n_iter = 10
)

## Convert ranges to DNAStringSets
library(BSgenome.Hsapiens.UCSC.hg19)
genome <- BSgenome.Hsapiens.UCSC.hg19
bg_seq <- getSeq(genome, bg_ranges)

## Test for enrichment of clustered motifs
data("ex_pfm")
cl <- list(A = ex_pfm[1], B = ex_pfm[2:3])
testClusterEnrich(cl, ar_er_seq, bg_seq, model = "poisson")


}
\seealso{
\code{\link[=makeRMRanges]{makeRMRanges()}}, \code{\link[=getClusterMatches]{getClusterMatches()}}, \code{\link[=countClusterMatches]{countClusterMatches()}}, \code{\link[=testMotifEnrich]{testMotifEnrich()}}
}