% Generated by roxygen2: do not edit by hand % Please edit documentation in R/testClusterEnrich.R \name{testClusterEnrich} \alias{testClusterEnrich} \title{Test enrichment across a cluster of motifs using a background set of sequences} \usage{ testClusterEnrich( cl, stringset, bg, var = "iteration", model = c("quasipoisson", "hypergeometric", "poisson", "iteration"), sort_by = c("p", "none"), mc.cores = 1, prior.count = 1, seed = 100, ... ) } \arguments{ \item{cl}{A list of Position Weight Matrices, universalmotifs, with each element representing clusters of related matrices} \item{stringset}{An XStringSet with equal sequence widths} \item{bg}{An XStringSet with the same sequence widths as the test XStringset} \item{var}{A column in the mcols element of bg, usually denoting an iteration number} \item{model}{The model used for analysis} \item{sort_by}{Column to sort results by} \item{mc.cores}{Passed to \link[parallel]{mclapply}} \item{prior.count}{Added to all counts to better manage zero counts in background sequences. For analysis under QuasiPoisson models prior counts are added as Poisson noise using this value as expected counts} \item{seed}{Used for reproducibility when adding Poisson noise} \item{...}{Passed to \link{getPwmMatches} or \link{countPwmMatches}} } \value{ See \link{testMotifEnrich} } \description{ Test for enrichment of any motif within a cluster across a set of sequences using a background set to derive a NULL hypothesis } \details{ This extends the analytic methods offered by \link{testMotifEnrich} using PWMs grouped into a set of clusters. As with all cluster-level approaches, hits from multiple PWMs which overlap are counted as a single hit ensuring that duplicated matches are not double-counted, and that only individual positions within the sequences are. } \examples{ ## Load the example peaks & the sequences data("ar_er_peaks") data("ar_er_seq") sq <- seqinfo(ar_er_peaks) ## Now sample size-matched ranges 10 times larger. In real-world analyses, ## this set should be sampled as at least 1000x larger, ensuring features ## are matched to your requirements. This example masks regions with known N ## content, including centromeres & telomeres data("hg19_mask") set.seed(305) bg_ranges <- makeRMRanges( ar_er_peaks, GRanges(sq)[1], exclude = hg19_mask, n_iter = 10 ) ## Convert ranges to DNAStringSets library(BSgenome.Hsapiens.UCSC.hg19) genome <- BSgenome.Hsapiens.UCSC.hg19 bg_seq <- getSeq(genome, bg_ranges) ## Test for enrichment of clustered motifs data("ex_pfm") cl <- list(A = ex_pfm[1], B = ex_pfm[2:3]) testClusterEnrich(cl, ar_er_seq, bg_seq, model = "poisson") } \seealso{ \code{\link[=makeRMRanges]{makeRMRanges()}}, \code{\link[=getClusterMatches]{getClusterMatches()}}, \code{\link[=countClusterMatches]{countClusterMatches()}}, \code{\link[=testMotifEnrich]{testMotifEnrich()}} }