be7d4113 |
#'Provides Coverage by the CpG density. CpG Density is defined as the number
#'of CpGs observed in certain base pair long region.
|
05333069 |
#'@param bs bsseq object
|
ba93b446 |
#'@param organism scientific name of the organism of interest,
|
1e49b4be |
#'e.g. Mmusculus or Hsapiens
|
662e4406 |
#'@param windowLength Length of the window to calculate the density
|
6a4a0d3d |
#'@param small Indicator for a small dataset, cpg density is calculated more
#'memory efficiently for large dataset but for small dataset a different quicker
#'method is used
|
be7d4113 |
#'Default value for window length is 1000 basepairs.
|
05333069 |
#'@return Data frame with sample name and coverage in repeat masker regions
#'@examples
|
8061babb |
#'library(BSgenome.Hsapiens.NCBI.GRCh38)
|
be7d4113 |
#'directory <- system.file("extdata/bismark_data",package='scmeth')
#'bs <- HDF5Array::loadHDF5SummarizedExperiment(directory)
|
6a4a0d3d |
#'cpgDensity(bs,Hsapiens,1000,small=TRUE)
|
cb356578 |
#'@import BSgenome
|
efa5aeca |
#'@importFrom bsseq getCoverage
|
12ffa4ac |
#'@importFrom Biostrings DNAString
#'@importFrom Biostrings vmatchPattern
|
05333069 |
#'@export
|
f260e769 |
|
6a4a0d3d |
cpgDensity <- function(bs,organism,windowLength=1000,small=FALSE){
|
05333069 |
|
be7d4113 |
#GenomeInfoDb::seqlevelsStyle(gr) <- GenomeInfoDb::seqlevelsStyle(organism)[1]
|
12ffa4ac |
cov <- bsseq::getCoverage(bs)
gr <- GenomicRanges::granges(bs)
cpg <- Biostrings::DNAString("CG")
|
6a4a0d3d |
if (!small){
cpg_gr <- Biostrings::vmatchPattern(cpg, organism)
cpg_gr <- GenomeInfoDb::keepStandardChromosomes(cpg_gr, pruning.mode= "coarse")
r_cpg_gr <- GenomicRanges::resize(cpg_gr,width=(windowLength/2),fix='center')
cpgd <- GenomicRanges::countOverlaps(gr,r_cpg_gr)
}else{
gr_resized <- GenomicRanges::resize(gr, width=(500/2),fix='center')
v <- Biostrings::Views(organism, gr_resized)
dnFrequency <- Biostrings::dinucleotideFrequency(v)
cpgd <- dnFrequency[,'CG']
}
|
12ffa4ac |
#cpgd <- Repitools::cpgDensityCalc(gr, organism, window = windowLength)
|
2351755b |
|
be7d4113 |
maxcpgd <- max(cpgd)
cpgdCov <- sapply(seq_len(ncol(cov)), function(i) {
|
e8f2b9b4 |
cv = as.vector(cov[,i])
|
be7d4113 |
cpgdCell <- cpgd[cv>0 ]
|
e8f2b9b4 |
tab <- table(cpgdCell)
x <- rep(0, maxcpgd)
x[as.numeric(names(tab))] <- tab
x
|
2351755b |
})
|
be7d4113 |
rownames(cpgdCov) <- seq_len(maxcpgd)
|
7a46bec0 |
return(cpgdCov)
|
05333069 |
}
|