#' The Parameters for BCL2FastQparams object. #' #' Parameter class and accessors for use with basecallQC #' #' @aliases BCL2FastQparams BCL2FastQparams-BCL2FastQparams #' #' @rdname BCL2FastQparams #' @docType class #' @export #' setClass("BCL2FastQparams", representation(RunDir="character",OutDir="character",RunParameters = "list")) #' The basecallQC object and constructor. #' #' Object and method to handle Illumina basecalling/demultiplexing #' inputs and output files. #' Provides sample sheet cleanup, basecall command and #' summary QC statistics for basecalling/demultiplexing. #' #' @aliases basecallQC basecallQC-basecallQC #' #' @rdname basecallQC #' @docType class #' @export setClass("basecallQC", representation(BCL2FastQparams="BCL2FastQparams",RunMetadata = "data.frame", cleanedSampleSheet="data.frame",BCLCommand="character",baseMasks= "data.frame", baseCallMetrics="list",demultiplexMetrics="list",fqQCmetrics="list")) #' Set Parameters for BCL2FastQparameters object. #' #' Parameter class and accessors #' #' @aliases BCL2FastQparams BCL2FastQparams-BCL2FastQparams #' #' @rdname BCL2FastQparams #' @docType methods #' @param runXML file path to runParameters.xml ,if not specified #' looks in top level of run directory. #' @param config file path to config.ini ,if not specified #' looks in top level of run directory. #' @param runDir file path to run directory. #' @param outDir file path to out directory. #' @param verbose TRUE or FALSE. Messages on or off. Warnings/errors persist #' @details The BCL2FastQparams object contains slots RunDir, OutDir and RunParameters #' \itemize{ #' \item{"RunDir"}{ Character string specifying the top level Run directory} #' \item{"OutDir"}{ Character string specifying the output directory} #' \item{"RunParameters"}{ A data.frame containing the information from runParameters.xml (See vignette for more details). #' } #' } #' @return A BCL2FastQparams object (See details). #' @examples #' fileLocations <- system.file("extdata",package="basecallQC") #' runXML <- dir(fileLocations,pattern="runParameters.xml",full.names=TRUE) #' config <- dir(fileLocations,pattern="config.ini",full.names=TRUE) #' BCL2FastQparams(runXML,config,runDir=getwd(),verbose=FALSE) #' @export BCL2FastQparams <- function(runXML=NULL,config=NULL,runDir=NULL,outDir=NULL,verbose=TRUE){ if(is.null(runDir)) runDir <- getwd(); if(verbose) message("No runDir specified, run directory set to working directory"); if(is.null(runXML)){ if(verbose) message("No location for runParameters.xml specified") runParameters <- file.path(runDir,"runParameters.xml") if(!file.exists(runParameters)) stop("No runParameters.xml found in run directory") } if(is.null(config)){ if(verbose) message("No location for config.ini specified") config <- file.path(runDir,"config.ini") if(!file.exists(config)) stop("No config.ini found in run directory") } runParameters = runParams(runXML,config) if(is.null(outDir)){ if(verbose) message("No location for outDir specified") outDir <- file.path(runDir,runParameters$runParams$Barcode) message("outDir set to",outDir) } new("BCL2FastQparams", RunDir=runDir, OutDir=outDir, RunParameters=runParameters ) } #' The basecallQC object and constructor. #' #' #' @name basecallQC #' @rdname basecallQC #' @param bcl2fastqparams A BCL2FastQparams object as created by BCL2FastQparams() constructor. #' @param RunMetaData Any run metadata to attach (data.frame) #' @param sampleSheet A sample sheet for Illumina basecalling using bcl2Fastq (See vignette for more details). #' @param doFQMetric TRUE or FALSE. Perform ShortRead FastQ quality assessment #' using ShortRead's qa and report function #' @return basecallQC a basecallQC object (See details for more information) #' @details The basecallQC object contains slots BCL2FastQparams, #' cleanedSampleSheet, baseMasks, BCLCommand, baseCallMetrics, demultiplexMetrics and fqQCmetrics. #' \itemize{ #' \item{"BCL2FastQparams"}{ A BCL2FastQparams object} #' \item{"cleanedSampleSheet"}{ A data.frame containing the cleaned sample sheet for #' Illumina basecalling using bcl2Fastq versions >= 2.1.7 #' } #' \item{"baseMasks"}{ A data.frame containing basecall masks per lane for use with bcl2Fastq versions >= 2.1.7. Basemasks in data.frame for reads and indexes as well as the total basemasks for each lane. #' } #' \item{"BCLCommand"}{ A character string containing the command to be used for basecalling using bcl2Fastq (versions >= 2.1.7). #' } #' \item{"baseCallMetrics"}{ A list containing the full basecalling metrics from ConversionStats.xml. Contains an unsummarised data.frame and basecalling metrics summarised to Sample, Lane, Sample by lane, and Sample by Lane and Tile #' } #' \item{"demultiplexMetrics"}{ A list containing the full demultiplexing metrics from DemultiplexingStats.xml. Contains an unsummarised data.frame and demultiplexing metrics filtered to per Sample metrics #' } #' \item{"fqQCmetrics"}{ A list containing a data.frame of read counts and links to ShortRead QA reports and a ShortRead QA object containing quality information for generated fastQs. #' } #' } #' @examples #' fileLocations <- system.file("extdata",package="basecallQC") #' runXML <- dir(fileLocations,pattern="runParameters.xml",full.names=TRUE) #' config <- dir(fileLocations,pattern="config.ini",full.names=TRUE) #' sampleSheet <- dir(fileLocations,pattern="*\\.csv",full.names=TRUE) #' outDir <- file.path(fileLocations,"Runs/161105_D00467_0205_AC9L0AANXX/C9L0AANXX/") #' bcl2fastqparams <- BCL2FastQparams(runXML,config,runDir=getwd(),outDir,verbose=FALSE) #' bclQC <- basecallQC(bcl2fastqparams,RunMetaData=NULL,sampleSheet) #' @export basecallQC <- function(bcl2fastqparams,RunMetaData=NULL,sampleSheet=NULL,doFQMetric=FALSE){ cleanedSampleSheet <- validateBCLSheet(sampleSheet,bcl2fastqparams) baseMasks <- createBasemasks(cleanedSampleSheet,bcl2fastqparams) toSubmit <- createBCLcommand(bcl2fastqparams,cleanedSampleSheet,baseMasks) basecallmetrics <- baseCallMetrics(bcl2fastqparams) demultiplexmetrics <- demultiplexMetrics(bcl2fastqparams) fastqs <- dir(bcl2fastqparams@OutDir,pattern="*.fastq.gz",full.names=TRUE) if(doFQMetric==TRUE & length(fastqs) > 0){ fqQCmetrics <- qcShortRead(fastqs) }else{ fqQCmetrics <- list(FQQC_Table = NULL,ShortReadQC=NULL) } basecallQC <- new("basecallQC", BCL2FastQparams = bcl2fastqparams, cleanedSampleSheet = cleanedSampleSheet, baseMasks = baseMasks, BCLCommand=toSubmit, baseCallMetrics = basecallmetrics, demultiplexMetrics = demultiplexmetrics, fqQCmetrics=fqQCmetrics) return(basecallQC) } runParams <- function(runXML=NULL,config=NULL){ runParams <- runParameters(runXML) configParams <- configParams(config) return(list(runParams=runParams,configParams=configParams)) } #' Gather basecalling metrics from a Run (using Run's ConversionStats.xml file). #' #' @name baseCallMetrics #' @rdname baseCallMetrics #' @param bcl2fastqparams A BCL2FastQparams object as created by BCL2FastQparams() constructor. #' @return A list of length two containing the full basecalling metrics from a Run (using Run's ConversionStats.xml file). Contains an unsummarised data.frame and basecalling metrics summarised to Sample, Lane, Sample by lane, and Sample by Lane and Tile. #' @examples #' fileLocations <- system.file("extdata",package="basecallQC") #' runXML <- dir(fileLocations,pattern="runParameters.xml",full.names=TRUE) #' config <- dir(fileLocations,pattern="config.ini",full.names=TRUE) #' sampleSheet <- dir(fileLocations,pattern="*\\.csv",full.names=TRUE) #' outDir <- file.path(fileLocations,"Runs/161105_D00467_0205_AC9L0AANXX/C9L0AANXX/") #' bcl2fastqparams <- BCL2FastQparams(runXML,config,runDir=getwd(),outDir,verbose=FALSE) #' convMetrics <- baseCallMetrics(bcl2fastqparams) #' @export baseCallMetrics <- function(bcl2fastqparams){ convStatsXML <- file.path(bcl2fastqparams@OutDir,"Stats","ConversionStats.xml") if(!file.exists(convStatsXML)) return(list(convStatsProcessed=NULL,summarisedConvStats=NULL)) convStatsProcessed <- processConvStats(convStatsXML) summarisedConvStats <- summariseConvStats(convStatsProcessed) return(list(convStatsProcessed=convStatsProcessed, summarisedConvStats=summarisedConvStats)) } #' Gather demultiplexing metrics from a Run (using Run's DemultiplexingStats.xml file). #' #' @name demultiplexMetrics #' @rdname demultiplexMetrics #' @param bcl2fastqparams A BCL2FastQparams object as created by BCL2FastQparams() constructor. #' @return A list of length two containing the full demultiplexing metrics from a Run (using Run's DemultiplexingStats.xml file). Contains an unsummarised data.frame and demultiplexing metrics filtered to per Sample metrics #' @examples #' fileLocations <- system.file("extdata",package="basecallQC") #' runXML <- dir(fileLocations,pattern="runParameters.xml",full.names=TRUE) #' config <- dir(fileLocations,pattern="config.ini",full.names=TRUE) #' sampleSheet <- dir(fileLocations,pattern="*\\.csv",full.names=TRUE) #' outDir <- file.path(fileLocations,"Runs/161105_D00467_0205_AC9L0AANXX/C9L0AANXX/") #' bcl2fastqparams <- BCL2FastQparams(runXML,config,runDir=getwd(),outDir,verbose=FALSE) #' demuxMetrics <- demultiplexMetrics(bcl2fastqparams) #' @export demultiplexMetrics <- function(bcl2fastqparams){ demuxStatsXML <- file.path(bcl2fastqparams@OutDir,"Stats","DemultiplexingStats.xml") if(!file.exists(demuxStatsXML)) return(list(demuxStatsProcessed=NULL,summarisedDemuxStats=NULL)) demuxStatsProcessed <- processDemultiplex(demuxStatsXML) summarisedDemuxStats <- summariseDemuxStats(demuxStatsProcessed) return(list(demuxStatsProcessed=demuxStatsProcessed, summarisedDemuxStats=summarisedDemuxStats)) }