R/cpgDensity.R
be7d4113
 #'Provides Coverage by the CpG density. CpG Density is defined as the number
 #'of CpGs observed in certain base pair long region.
05333069
 #'@param bs bsseq object
ba93b446
 #'@param organism scientific name of the organism of interest,
1e49b4be
 #'e.g. Mmusculus or Hsapiens
662e4406
 #'@param windowLength Length of the window to calculate the density
be7d4113
 #'Default value for window length is 1000 basepairs.
05333069
 #'@return Data frame with sample name and coverage in repeat masker regions
 #'@examples
8061babb
 #'library(BSgenome.Hsapiens.NCBI.GRCh38)
be7d4113
 #'directory <- system.file("extdata/bismark_data",package='scmeth')
 #'bs <- HDF5Array::loadHDF5SummarizedExperiment(directory)
8061babb
 #'cpgDensity(bs,Hsapiens,1000)
cb356578
 #'@import BSgenome
efa5aeca
 #'@importFrom bsseq getCoverage
05333069
 #'@export
f260e769
 
be7d4113
 cpgDensity <- function(bs,organism,windowLength=1000){
05333069
 
be7d4113
     cov <- bsseq::getCoverage(bs)
69fbca4d
     gr <- GenomicRanges::granges(bs)
be7d4113
     #GenomeInfoDb::seqlevelsStyle(gr) <- GenomeInfoDb::seqlevelsStyle(organism)[1]
     cpgd <- Repitools::cpgDensityCalc(gr, organism, window = windowLength)
2351755b
 
be7d4113
     maxcpgd <- max(cpgd)
     cpgdCov <- sapply(seq_len(ncol(cov)), function(i) {
e8f2b9b4
         cv = as.vector(cov[,i])
be7d4113
         cpgdCell <- cpgd[cv>0 ]
e8f2b9b4
         tab <- table(cpgdCell)
         x <- rep(0, maxcpgd)
         x[as.numeric(names(tab))] <- tab
         x
2351755b
     })
 
be7d4113
     rownames(cpgdCov) <- seq_len(maxcpgd)
7a46bec0
     return(cpgdCov)
05333069
 }