Bioconductor Code: RAIDS

Browse code

Update doc and format processPileupChrBin() function

adeschen authored on 11/04/2025 17:28:56
Showing 2 changed files

R/tools_internal.R index 8978ccc..ca2f9df 100644
man/processPileupChrBin.Rd index 3bc8f89..3a87ea6 100644

History View file @ 8cc8b65

@@ -478,42 +478,34 @@ extractNucleotide <- function(nucleotide, count, curNucleo) {
+                     }
                     -#' @title Read a VCF file with the genotypes use for the ancestry call
                     +#' @title Extract SNV information from pileup file for a selected chromosome
                      #'
                     -#' @description The function reads VCF file and
                     -#' returns a data frame
                     +#' @description The function reads pileup file and
                     +#' returns a \code{data.frame}
                      #' containing the information about the read counts for the SNVs present in
                     -#' the file.
                     -#'
                     -#' @param chr a \code{character} string representing the name, including
                     -#' the path, of a VCF file containing the SNV read counts.
                     -#' The VCF must contain those genotype fields: GT, AD, DP.
                     +#' the selected chromosome.
                      #'
                     +#' @param chr a \code{character} string representing the name of the
                     +#' chromosome to keep
                      #'
                     -#' @param resPileup result from pileup
                     +#' @param resPileup a \code{data.frame} as generated by the \code{pileup}
                     +#' function from \code{Rsamtools} package
                      #'
                     -#' @param varDf a \code{data.frame} representing the position to keep
                     +#' @param varDf a \code{data.frame} representing the positions to keep
                      #'
                      #' @param verbose a \code{logical} indicating if messages should be printed
                     -#' to show how the different steps in the function. Default: \code{FALSE}.
                      #'
                      #' @return a \code{data.frame} containing at least:
                      #' \describe{
                     -#' \item{seqnames}{ a  \code{character} representing the name of
                     -#' the chromosome}
                     -#' \item{pos}{ a \code{numeric} representing the position on the
                     -#' chromosome}
                     +#' \item{seqnames}{ a \code{character} representing the name of the chromosome}
                     +#' \item{pos}{ a \code{numeric} representing the position on the chromosome}
                      #' \item{REF}{ a \code{character} string representing the reference nucleotide}
                      #' \item{ALT}{ a \code{character} string representing the alternative
                      #' nucleotide}
                     -#' \item{A}{ a \code{numeric} representing the count for
                     -#' the A nucleotide}
                     -#' \item{C}{ a \code{numeric} representing the count for
                     -#' the C nucleotide}
                     -#' \item{G}{ a \code{numeric} representing the count for
                     -#' the G nucleotide}
                     -#' \item{T}{ a \code{numeric} representing the count for
                     -#' the T nucleotide}
                     +#' \item{A}{ a \code{numeric} representing the count for the A nucleotide}
                     +#' \item{C}{ a \code{numeric} representing the count for the C nucleotide}
                     +#' \item{G}{ a \code{numeric} representing the count for the G nucleotide}
                     +#' \item{T}{ a \code{numeric} representing the count for the T nucleotide}
                      #' \item{count}{ a \code{numeric} representing the total count}
                      #' }
                      #'
@@ -528,16 +520,20 @@ extractNucleotide <- function(nucleotide, count, curNucleo) {
                      #' @importFrom rlang .data
                      #' @encoding UTF-8
                      #' @keywords internal
                     -processPileupChrBin <- function(chr,
                     -                            resPileup, varDf,
                     -                            verbose=FALSE) {
                     +processPileupChrBin <- function(chr, resPileup, varDf, verbose) {
                          resCur <- NULL
                     -    if(chr %in% names(varDf)){
+                    +
                     +    ## Assign FALSE to verbose by default
                     +    if (is.null(verbose)) {
                     +        verbose <- FALSE
                     +    }
+                    +
                     +    if (chr %in% names(varDf)) {
                              keep <- which(resPileup$seqnames == chr)
                     -        if(length(keep) > 0){
                     +        if (length(keep) > 0) {
                                  # restrict the resPileup to the chromosome chr
                                  snpO <- resPileup[keep,]
                                  rm(keep)
@@ -546,57 +542,55 @@ processPileupChrBin <- function(chr,
                                  vcfCur <- varDf[[chr]][varDf[[chr]]$start >= min(snpO$pos) &
                                                           varDf[[chr]]$start <= max(snpO$pos),]
                     -            if(nrow(vcfCur) > 0){
                     +            if (nrow(vcfCur) > 0) {
                                      # Get the positions to keep in resPileup (snpO)
                     -                tmpTime <- system.time({z <- cbind( c(vcfCur$start,
                     -                                                snpO$pos,
                     -                                                vcfCur$start),
                     -                                            c(rep(-1, nrow(vcfCur)),
                     -                                                rep(0, nrow(snpO)),
                     -                                                rep(1, nrow(vcfCur))),
                     -                                            c(seq_len(nrow(vcfCur)),
                     -                                                seq_len(nrow(snpO)),
                     -                                                seq_len(nrow(vcfCur)))
                     -                                            )
                     -                                z <- z[order(z[,1]),]
                     -                                listKeep <- which(cumsum(z[,2]) < 0 & z[,2]==0)})
                     -                if(verbose) {message("processPileupChrBin selected pos user ",
                     -                                round(tmpTime[1],3),
                     -                                " system ", round(tmpTime[2],3),
                     -                                " elapsed ", round(tmpTime[3],3))}
+                    -
                     -                # print("Match ")
                     -                # summarize by possition with the for base
                     +                tmpTime <- system.time( {z <- cbind( c(vcfCur$start,
                     +                                                snpO$pos, vcfCur$start),
                     +                    c(rep(-1, nrow(vcfCur)), rep(0, nrow(snpO)),
                     +                        rep(1, nrow(vcfCur))),
                     +                    c(seq_len(nrow(vcfCur)), seq_len(nrow(snpO)),
                     +                        seq_len(nrow(vcfCur))))
                     +                    z <- z[order(z[,1]),]
                     +                    listKeep <- which(cumsum(z[,2]) < 0 & z[,2]==0)} )
+                    +
                     +                if (verbose) {
                     +                    message("processPileupChrBin selected pos user ",
                     +                        round(tmpTime[1],3), " system ", round(tmpTime[2],3),
                     +                        " elapsed ", round(tmpTime[3],3)) }
+                    +
                     +                # summarize by position with the for base
                                      tmpTime <- system.time(resCur <- as.data.frame(snpO[z[listKeep, 3],] %>%
                     -                                                              group_by(.data$pos) %>%
                     -                                                              summarize(seqnames=.data$seqnames[1],
                     -                                                                        A=extractNucleotide(.data$nucleotide,.data$count, "A"),
                     -                                                                        C=extractNucleotide(.data$nucleotide,.data$count, "C"),
                     -                                                                        G=extractNucleotide(.data$nucleotide,.data$count, "G"),
                     -                                                                        T=extractNucleotide(.data$nucleotide,.data$count, "T"),
                     -                                                                        count=sum(.data$count))))
                     -                if(verbose) {message("processPileupChrBin extracted nucleotides user ",
                     -                                     round(tmpTime[1],3),
                     -                                     " system ", round(tmpTime[2],3),
                     -                                     " elapsed ", round(tmpTime[3],3))}
                     -                # print("Second Z")
                     +                    group_by(.data$pos) %>%
                     +                        summarize(seqnames=.data$seqnames[1],
                     +                        A=extractNucleotide(.data$nucleotide,.data$count, "A"),
                     +                        C=extractNucleotide(.data$nucleotide,.data$count, "C"),
                     +                        G=extractNucleotide(.data$nucleotide,.data$count, "G"),
                     +                        T=extractNucleotide(.data$nucleotide,.data$count, "T"),
                     +                        count=sum(.data$count))))
+                    +
                     +                if (verbose) {
                     +                    message("processPileupChrBin extracted nucleotides user ",
                     +                        round(tmpTime[1],3), " system ", round(tmpTime[2],3),
                     +                        " elapsed ", round(tmpTime[3],3)) }
+                    +
                                      # Add the reference allele and the alternative allele
                                      tmpTime <- system.time({z <- cbind(c(resCur$pos, vcfCur$start, resCur$pos),
                     -                                    c(rep(-1,nrow(resCur)),rep(0, nrow(vcfCur)),rep(1, nrow(resCur))),
                     -                                    c(seq_len(nrow(resCur)), seq_len(nrow(vcfCur)), seq_len(nrow(resCur))))
                     -                                z <- z[order(z[,1]),]
                     -                                listKeep <- which(cumsum(z[,2]) < 0 & z[,2]==0)
                     -                                resCur$REF <- rep("N", nrow(resCur))
                     -                                resCur$ALT <- rep("N", nrow(resCur))
                     -                                resCur$REF[z[listKeep-1,3]] <- vcfCur[z[listKeep,3], "REF"]
                     -                                resCur$ALT[z[listKeep-1,3]] <- vcfCur[z[listKeep,3], "ALT"]
                     -                                resCur <- resCur[,c("seqnames", "pos", "REF", "ALT", "A", "C", "G", "T", "count")]})
+                    -
                     -                if(verbose) {message("processPileupChrBin add ref and alt allele user ",
                     -                                     round(tmpTime[1],3),
                     -                                     " system ", round(tmpTime[2],3),
                     -                                     " elapsed ", round(tmpTime[3],3))}
                     +                    c(rep(-1, nrow(resCur)), rep(0, nrow(vcfCur)), rep(1, nrow(resCur))),
                     +                    c(seq_len(nrow(resCur)), seq_len(nrow(vcfCur)), seq_len(nrow(resCur))))
                     +                    z <- z[order(z[,1]),]
                     +                    listKeep <- which(cumsum(z[,2]) < 0 & z[,2]==0)
                     +                    resCur$REF <- rep("N", nrow(resCur))
                     +                    resCur$ALT <- rep("N", nrow(resCur))
                     +                    resCur$REF[z[listKeep-1,3]] <- vcfCur[z[listKeep,3], "REF"]
                     +                    resCur$ALT[z[listKeep-1,3]] <- vcfCur[z[listKeep,3], "ALT"]
                     +                    resCur <- resCur[,c("seqnames", "pos", "REF", "ALT", "A",
                     +                                            "C", "G", "T", "count")]} )
+                    +
                     +                if(verbose) {
                     +                    message("processPileupChrBin add ref and alt allele user ",
                     +                        round(tmpTime[1],3), " system ", round(tmpTime[2],3),
                     +                        " elapsed ", round(tmpTime[3],3)) }
+                                 }
+                             }
+                         }

man/processPileupChrBin.Rd

History View file @ 8cc8b65

@@ -5,7 +5,7 @@
                      \alias{processPileupChrBin}
                      \title{Read a VCF file with the genotypes use for the ancestry call}
                      \usage{
                     -processPileupChrBin(chr, resPileup, varDf, verbose = FALSE)
                     +processPileupChrBin(chr, resPileup, varDf, verbose)
+                     }
                      \arguments{
                      \item{chr}{a \code{character} string representing the name, including
@@ -14,37 +14,30 @@ The VCF must contain those genotype fields: GT, AD, DP.}
                      \item{resPileup}{result from pileup}
                     -\item{varDf}{a \code{data.frame} representing the position to keep}
                     +\item{varDf}{a \code{data.frame} representing the positions to keep}
                     -\item{verbose}{a \code{logical} indicating if messages should be printed
                     -to show how the different steps in the function. Default: \code{FALSE}.}
                     +\item{verbose}{a \code{logical} indicating if messages should be printed}
+                     }
                      \value{
                      a \code{data.frame} containing at least:
                      \describe{
                     -\item{seqnames}{ a  \code{character} representing the name of
                     -the chromosome}
                     -\item{pos}{ a \code{numeric} representing the position on the
                     -chromosome}
                     +\item{seqnames}{ a \code{character} representing the name of the chromosome}
                     +\item{pos}{ a \code{numeric} representing the position on the chromosome}
                      \item{REF}{ a \code{character} string representing the reference nucleotide}
                      \item{ALT}{ a \code{character} string representing the alternative
                      nucleotide}
                     -\item{A}{ a \code{numeric} representing the count for
                     -the A nucleotide}
                     -\item{C}{ a \code{numeric} representing the count for
                     -the C nucleotide}
                     -\item{G}{ a \code{numeric} representing the count for
                     -the G nucleotide}
                     -\item{T}{ a \code{numeric} representing the count for
                     -the T nucleotide}
                     +\item{A}{ a \code{numeric} representing the count for the A nucleotide}
                     +\item{C}{ a \code{numeric} representing the count for the C nucleotide}
                     +\item{G}{ a \code{numeric} representing the count for the G nucleotide}
                     +\item{T}{ a \code{numeric} representing the count for the T nucleotide}
                      \item{count}{ a \code{numeric} representing the total count}
+                     }
+                     }
                      \description{
                      The function reads VCF file and
                     -returns a data frame
                     +returns a \code{data.frame}
                      containing the information about the read counts for the SNVs present in
                     -the file.
                     +the VCF.
+                     }
                      \examples{