Browse code

Fixes and updates

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/basecallQC@130196 bc3139a8-67e5-0310-9ffc-ced21a209358

Tom Carroll authored on 08/06/2017 16:16:28
Showing 1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,487 @@
1
+readPattern <- function(bcl2fastqparams){
2
+  c(readlengths(bcl2fastqparams)[1],indexlengths(bcl2fastqparams)[1],
3
+    indexlengths(bcl2fastqparams)[2],readlengths(bcl2fastqparams)[2])
4
+}
5
+readBCLStatFile <- function(read.filename){
6
+  read.filename <- file(read.filename, "rb")
7
+  clusterNumber <- readBin(read.filename,"int",size=4,n = 1)
8
+  ACI <- readBin(read.filename,"double",size=8,n = 1)
9
+  AIAOverAIAA <- readBin(read.filename,"double",size=8,n = 1)
10
+  AICOverAICA <- readBin(read.filename,"double",size=8,n = 1)
11
+  AIGOverAIGA <- readBin(read.filename,"double",size=8,n = 1)
12
+  AITOverAITA <- readBin(read.filename,"double",size=8,n = 1)
13
+  AIAOverA <- readBin(read.filename,"double",size=8,n = 1)
14
+  AICOverC <- readBin(read.filename,"double",size=8,n = 1)
15
+  AIGOverG <- readBin(read.filename,"double",size=8,n = 1)
16
+  AITOverT <- readBin(read.filename,"double",size=8,n = 1)
17
+  NCA <- readBin(read.filename,"int",size=4,n = 1)
18
+  NCC <- readBin(read.filename,"int",size=4,n = 1)
19
+  NCG <- readBin(read.filename,"int",size=4,n = 1)
20
+  NCT <- readBin(read.filename,"int",size=4,n = 1)
21
+  NCX <- readBin(read.filename,"int",size=4,n = 1)
22
+  NCIA <- readBin(read.filename,"int",size=4,n = 1)
23
+  NCIC <- readBin(read.filename,"int",size=4,n = 1)
24
+  NCIG <- readBin(read.filename,"int",size=4,n = 1)
25
+  NCIT <- readBin(read.filename,"int",size=4,n = 1)
26
+  
27
+  
28
+  bclClusterTileStats <- c(clusterNumber,ACI,AIAOverAIAA,AICOverAICA,AIGOverAIGA,
29
+                           AITOverAITA,AIAOverA,AICOverC,AIGOverG,
30
+                           AITOverT,NCA,NCC,NCG,NCT,NCX,NCIA,NCIC,NCIG,NCIT)
31
+  close(read.filename)
32
+  return(bclClusterTileStats)
33
+}
34
+readBCLStatFiles <- function(read.filenames){
35
+  bclStatList <- lapply(read.filenames,readBCLStatFile)
36
+  bclStatMat <- do.call(rbind,bclStatList)
37
+}
38
+readExtractionMetrics <- function(extractionMetricsBin){
39
+  bytesOfFile <- file.info(extractionMetricsBin)$size
40
+  read.filename <- file(extractionMetricsBin, "rb")
41
+  bytesRead <- 0
42
+  k <- 1
43
+  versionNumber <- readBin(read.filename,"int",size=1,n = 1)
44
+  recordSize <- readBin(read.filename,"int",size=1,n = 1,signed = FALSE)
45
+  bytesRead <- bytesRead+1+1
46
+  extractionMetricsOut <- list()
47
+  while(bytesRead < bytesOfFile){
48
+    laneNumber <- readBin(read.filename,"int",size=2,n = 1)
49
+    tileNumber <- readBin(read.filename,"int",size=2,n = 1)
50
+    cycleNumber <- readBin(read.filename,"int",size=2,n = 1)
51
+    focusForChannelA <- readBin(read.filename,"double",size=4,n = 1)
52
+    focusForChannelC <- readBin(read.filename,"double",size=4,n = 1)
53
+    focusForChannelG <- readBin(read.filename,"double",size=4,n = 1)
54
+    focusForChannelT <- readBin(read.filename,"double",size=4,n = 1)
55
+    maxIntensityForChannelA <- readBin(read.filename,"int",size=2,n = 1,signed = FALSE)
56
+    maxIntensityForChannelC <- readBin(read.filename,"int",size=2,n = 1,signed = FALSE)
57
+    maxIntensityForChannelG <- readBin(read.filename,"int",size=2,n = 1,signed = FALSE)
58
+    maxIntensityForChannelT <- readBin(read.filename,"int",size=2,n = 1,signed = FALSE)
59
+    timeStamp <- readBin(read.filename,"int",size=8,n = 1)
60
+    bytesRead <- bytesRead+(2*3)+(4*4)+(2*4)+8
61
+    extractionMetricsOut[[k]] <- c(
62
+      laneNumber,tileNumber,cycleNumber,
63
+      focusForChannelA,focusForChannelC,
64
+      focusForChannelG,focusForChannelT,
65
+      maxIntensityForChannelA,maxIntensityForChannelC,
66
+      maxIntensityForChannelG,maxIntensityForChannelT,
67
+      timeStamp
68
+    )
69
+    k <- k+1
70
+  }
71
+  extractionMetricsOutMat <- do.call(rbind,extractionMetricsOut)
72
+  colnames(extractionMetricsOutMat) <-  c(
73
+    "laneNumber","tileNumber","cycleNumber",
74
+    "focusForChannelA","focusForChannelC",
75
+    "focusForChannelG","focusForChannelT",
76
+    "maxIntensityForChannelA","maxIntensityForChannelC",
77
+    "maxIntensityForChannelG","maxIntensityForChannelT",
78
+    "timeStamp"
79
+  )
80
+  close(read.filename)
81
+  extractionMetricsOutMat <- tbl_df(extractionMetricsOutMat)
82
+}
83
+readIndexMetrics <- function(indexMetricsBin){
84
+  bytesOfFile <- file.info(indexMetricsBin)$size
85
+  indexMetricsBin <- file(indexMetricsBin, "rb")
86
+  
87
+  bytesRead <- 0
88
+  versionNumber <- readBin(indexMetricsBin,"int",size=1,n = 1)
89
+  bytesRead <- bytesRead+1
90
+  k <- 1
91
+  IndexMetricsOut <- list()
92
+  while(bytesRead < bytesOfFile){
93
+    laneNumber <- readBin(indexMetricsBin,"int",size=2,n = 1)
94
+    tileNumber <- readBin(indexMetricsBin,"int",size=2,n = 1)
95
+    readNumber <- readBin(indexMetricsBin,"int",size=2,n = 1)
96
+    indexNameLength <- readBin(indexMetricsBin,"int",size=2,n = 1)
97
+    indexName <- rawToChar(readBin(indexMetricsBin,"raw",n=indexNameLength))
98
+    identifiedAsIndex <- readBin(indexMetricsBin,"int",size=4,n = 1)
99
+    sampleNameLength <- readBin(indexMetricsBin,"int",size=2,n = 1)
100
+    sampleName <- rawToChar(readBin(indexMetricsBin,"raw",n=sampleNameLength))
101
+    projectNameLength <- readBin(indexMetricsBin,"int",size=2,n = 1)
102
+    projectName <- rawToChar(readBin(indexMetricsBin,"raw",n=projectNameLength))
103
+    bytesRead <- bytesRead + 2+2+2+2+indexNameLength+4+2+sampleNameLength+2+projectNameLength
104
+    IndexMetricsOut[[k]] <- c(
105
+      laneNumber,tileNumber,readNumber,indexName,identifiedAsIndex,sampleName, projectName
106
+    )
107
+    k  <- k+1
108
+  }
109
+  close(indexMetricsBin)
110
+  IndexMetricsFrame <- do.call(rbind,IndexMetricsOut)
111
+  colnames(IndexMetricsFrame) <- c(
112
+    "laneNumber","tileNumber","readNumber","indexName","identifiedAsIndex","sampleName","projectName"
113
+  )
114
+  tbl_df(IndexMetricsFrame)
115
+}
116
+readQMetrics <- function(qMetricsBin){
117
+  bytesOfFile <- file.info(qMetricsBin)$size
118
+  read.filename <- file(qMetricsBin, "rb")
119
+  bytesRead <- 0
120
+  versionNumber <- readBin(read.filename,"int",size=1,n = 1)
121
+  recordSize <- readBin(read.filename,"int",size=1,n = 1,signed = FALSE)
122
+  hasBins <- readBin(read.filename,"logical",size=1,n = 1)
123
+  numberOfBins <- readBin(read.filename,"int",size=1,n = 1,signed = FALSE)
124
+  lowEndsOfBins <- readBin(read.filename,"int",size=1,n = numberOfBins,signed = FALSE)
125
+  highEndsOfBins <- readBin(read.filename,"int",size=1,n = numberOfBins,signed = FALSE)
126
+  valueOfBins <- readBin(read.filename,"int",size=1,n = numberOfBins,signed = FALSE)
127
+  bytesRead <- bytesRead+1+1+1+1+numberOfBins*3
128
+  k <- 1
129
+  qMetricsOut <- list()
130
+  while(bytesRead < bytesOfFile){
131
+    Lane <- readBin(read.filename,"int",size=2,n = 1,signed = FALSE)
132
+    Tile <- readBin(read.filename,"int",size=2,n = 1,signed = FALSE)
133
+    Cycle <- readBin(read.filename,"int",size=2,n = 1,signed = FALSE)
134
+    qhist <- readBin(read.filename,"integer",size=4,n = 50)
135
+    qMetricsOut[[k]] <-
136
+      c(Lane,Tile,Cycle,qhist)
137
+    bytesRead <- bytesRead+2+2+2+4*50
138
+    k <- k+1
139
+  }
140
+  
141
+  close(read.filename)
142
+  qMetricsFrame <- do.call(rbind,qMetricsOut)
143
+  colnames(qMetricsFrame) <- c("Lane","Tile","Cycle",paste0("Q",1:50))
144
+  tbl_df(qMetricsFrame)
145
+}
146
+readCorrectedIntMetrics <- function(correctedIntMetricsBin){
147
+  bytesOfFile <- file.info(correctedIntMetricsBin)$size
148
+  read.filename <- file(correctedIntMetricsBin, "rb")
149
+  bytesRead <- 0
150
+  k <- 1
151
+  versionNumber <- readBin(read.filename,"int",size=1,n = 1)
152
+  recordSize <- readBin(read.filename,"int",size=1,n = 1,signed = FALSE)
153
+  bytesRead <- bytesRead+1+1
154
+  correctedIntMetricsOut <- list()
155
+  while(bytesRead < bytesOfFile){
156
+    laneNumber <- readBin(read.filename,"int",size=2,n = 1)
157
+    tileNumber <- readBin(read.filename,"int",size=2,n = 1)
158
+    cycleNumber <- readBin(read.filename,"int",size=2,n = 1)
159
+    averageCycleIntensity <- readBin(read.filename,"int",size=2,n = 1)
160
+    averageCorrectedIntensityForChannelA <- readBin(read.filename,"int",size=2,n = 1)
161
+    averageCorrectedIntensityForChannelC <- readBin(read.filename,"int",size=2,n = 1)
162
+    averageCorrectedIntensityForChannelG <- readBin(read.filename,"int",size=2,n = 1)
163
+    averageCorrectedIntensityForChannelT <- readBin(read.filename,"int",size=2,n = 1)
164
+    averageCorrectedIntForCalledClustersForBaseA <- readBin(read.filename,"int",size=2,n = 1)
165
+    averageCorrectedIntForCalledClustersForBaseC <- readBin(read.filename,"int",size=2,n = 1)
166
+    averageCorrectedIntForCalledClustersForBaseG <- readBin(read.filename,"int",size=2,n = 1)
167
+    averageCorrectedIntForCalledClustersForBaseT <- readBin(read.filename,"int",size=2,n = 1)
168
+    numberOfBaseCallsForNoCall  <- readBin(read.filename,"int",size=4,n = 1)
169
+    numberOfBaseCallsForBaseA  <- readBin(read.filename,"int",size=4,n = 1)
170
+    numberOfBaseCallsForBaseC  <- readBin(read.filename,"int",size=4,n = 1)
171
+    numberOfBaseCallsForBaseG  <- readBin(read.filename,"int",size=4,n = 1)
172
+    numberOfBaseCallsForBaseT  <- readBin(read.filename,"int",size=4,n = 1)
173
+    signalToNoiseRatio   <- readBin(read.filename,"double",size=4,n = 1)
174
+    bytesRead <- bytesRead+(2*3)+(2*9)+(4*5)+4
175
+    correctedIntMetricsOut[[k]] <- c(laneNumber,tileNumber,cycleNumber,averageCycleIntensity,
176
+                                     averageCorrectedIntensityForChannelA,averageCorrectedIntensityForChannelC,
177
+                                     averageCorrectedIntensityForChannelG,averageCorrectedIntensityForChannelT,
178
+                                     averageCorrectedIntForCalledClustersForBaseA,averageCorrectedIntForCalledClustersForBaseC,
179
+                                     averageCorrectedIntForCalledClustersForBaseG,averageCorrectedIntForCalledClustersForBaseT,
180
+                                     numberOfBaseCallsForNoCall,
181
+                                     numberOfBaseCallsForBaseA,numberOfBaseCallsForBaseC,
182
+                                     numberOfBaseCallsForBaseG,numberOfBaseCallsForBaseT)
183
+    k <- k+1
184
+  }
185
+  close(read.filename)
186
+  correctedIntMetricsMat <- do.call(rbind,correctedIntMetricsOut)
187
+}
188
+readImageMetrics <- function(imageMetricsBin){
189
+  bytesOfFile <- file.info(imageMetricsBin)$size
190
+  read.filename <- file(imageMetricsBin, "rb")
191
+  bytesRead <- 0
192
+  k <- 1
193
+  versionNumber <- readBin(read.filename,"int",size=1,n = 1)
194
+  recordSize <- readBin(read.filename,"int",size=1,n = 1,signed = FALSE)
195
+  bytesRead <- bytesRead+1+1
196
+  imageMetricsOut <- list()
197
+  while(bytesRead < bytesOfFile){
198
+    laneNumber <- readBin(read.filename,"int",size=2,n = 1)
199
+    tileNumber <- readBin(read.filename,"int",size=2,n = 1)
200
+    cycleNumber <- readBin(read.filename,"int",size=2,n = 1)
201
+    channelNumber <- readBin(read.filename,"int",size=2,n = 1,signed = FALSE)
202
+    minimumContrast <- readBin(read.filename,"int",size=2,n = 1,signed = FALSE)
203
+    maximumContrast <- readBin(read.filename,"int",size=2,n = 1,signed = FALSE)
204
+    bytesRead <- bytesRead+(2*3)+(2*3)
205
+    imageMetricsOut[[k]] <- c(laneNumber,tileNumber,cycleNumber,
206
+                              channelNumber,minimumContrast,maximumContrast)
207
+    k <- k+1
208
+  }
209
+  close(read.filename)
210
+  imageMetricsMat <- do.call(rbind,imageMetricsOut)
211
+}
212
+readTileMetrics <- function(tileMetricsBin){
213
+  bytesOfFile <- file.info(tileMetricsBin)$size
214
+  read.filename <- file(tileMetricsBin, "rb")
215
+  bytesRead <- 0
216
+  k <- 1
217
+  versionNumber <- readBin(read.filename,"int",size=1,n = 1)
218
+  recordSize <- readBin(read.filename,"int",size=1,n = 1,signed = FALSE)
219
+  bytesRead <- bytesRead+1+1
220
+  tileMetricsOut <- list()
221
+  while(bytesRead < bytesOfFile){
222
+    laneNumber <- readBin(read.filename,"int",size=2,n = 1)
223
+    tileNumber <- readBin(read.filename,"int",size=2,n = 1)
224
+    codeTile <- readBin(read.filename,"int",size=2,n = 1,signed = FALSE)
225
+    valueTile <- readBin(read.filename,"double",size=4,n = 1)
226
+    bytesRead <- bytesRead+(2*2)+2+4
227
+    tileMetricsOut[[k]] <- c(laneNumber,tileNumber,
228
+                             codeTile,valueTile)
229
+    k <- k+1
230
+  }
231
+  close(read.filename)
232
+  tileMetricsMat <- do.call(rbind,tileMetricsOut)
233
+  colnames(tileMetricsMat) <-  c(
234
+    "laneNumber","tileNumber",
235
+    "code","value"
236
+  )
237
+  tbl_df(tileMetricsMat) %>%
238
+    mutate(code = str_c("c",code)) %>%
239
+    group_by(laneNumber,tileNumber,code) %>%
240
+    summarise_all(max) %>%
241
+    ungroup() %>%
242
+    mutate(Lane=laneNumber,Tile=tileNumber) %>% 
243
+    mutate(code=stringr::str_replace_all(code,"c100","ClusterDensity")) %>% 
244
+    mutate(code=stringr::str_replace_all(code,"c101","ClusterDensityPF")) %>% 
245
+    mutate(code=stringr::str_replace_all(code,"c102","NumberOfClusters")) %>%        
246
+    mutate(code=stringr::str_replace_all(code,"c103","NumberOfClustersPF")) %>%                       
247
+    mutate(code=stringr::str_replace_all(code,"c200",
248
+                                         paste0("PhasingFor",
249
+                                                names(readPattern(bcl2fastqparams))[readPattern(bcl2fastqparams) != 0][1]))) %>% 
250
+    mutate(code=stringr::str_replace_all(code,"c201",
251
+                                         paste0("PrePhasingFor",
252
+                                                names(readPattern(bcl2fastqparams))[readPattern(bcl2fastqparams) != 0][1]))) %>%    
253
+    mutate(code=stringr::str_replace_all(code,"c202",
254
+                                         paste0("PhasingFor",
255
+                                                names(readPattern(bcl2fastqparams))[readPattern(bcl2fastqparams) != 0][2]))) %>% 
256
+    mutate(code=stringr::str_replace_all(code,"c203",
257
+                                         paste0("PrePhasingFor",
258
+                                                names(readPattern(bcl2fastqparams))[readPattern(bcl2fastqparams) != 0][2]))) %>%
259
+    mutate(code=stringr::str_replace_all(code,"c204",
260
+                                         paste0("PhasingFor",
261
+                                                names(readPattern(bcl2fastqparams))[readPattern(bcl2fastqparams) != 0][3]))) %>% 
262
+    mutate(code=stringr::str_replace_all(code,"c205",
263
+                                         paste0("PrePhasingFor",
264
+                                                names(readPattern(bcl2fastqparams))[readPattern(bcl2fastqparams) != 0][3]))) %>% 
265
+    mutate(code=stringr::str_replace_all(code,"c206",
266
+                                         paste0("PhasingFor",
267
+                                                names(readPattern(bcl2fastqparams))[readPattern(bcl2fastqparams) != 0][4]))) %>% 
268
+    mutate(code=stringr::str_replace_all(code,"c207",
269
+                                         paste0("PrePhasingFor",
270
+                                                names(readPattern(bcl2fastqparams))[readPattern(bcl2fastqparams) != 0][4]))) %>%
271
+    filter(code!="PrePhasingForNA" & code!="PhasingForNA" ) %>% 
272
+    spread(code,value) %>%
273
+    dplyr::select(-laneNumber,-tileNumber,
274
+                          -starts_with("c",ignore.case = FALSE))
275
+}
276
+readPhasingMetrics <- function(phasingMetricsBin){
277
+  bytesOfFile <- file.info(phasingMetricsBin)$size
278
+  read.filename <- file(phasingMetricsBin, "rb")
279
+  bytesRead <- 0
280
+  k <- 1
281
+  versionNumber <- readBin(read.filename,"int",size=1,n = 1)
282
+  recordSize <- readBin(read.filename,"int",size=1,n = 1,signed = FALSE)
283
+  bytesRead <- bytesRead+1+1
284
+  phasingMetricsOut <- list()
285
+  while(bytesRead < bytesOfFile){
286
+    laneNumber <- readBin(read.filename,"int",size=2,n = 1)
287
+    tileNumber <- readBin(read.filename,"int",size=2,n = 1)
288
+    cycleNumber <- readBin(read.filename,"int",size=2,n = 1,signed = FALSE)
289
+    phasingWeight  <- readBin(read.filename,"double",size=4,n = 1)
290
+    prephasingWeight  <- readBin(read.filename,"double",size=4,n = 1)
291
+    bytesRead <- bytesRead+(2*3)+(4*2)
292
+    phasingMetricsOut[[k]] <- c(laneNumber,tileNumber,cycleNumber,
293
+                                phasingWeight,prephasingWeight)
294
+    k <- k+1
295
+  }
296
+  close(read.filename)
297
+  phasingMetricsMat <- do.call(rbind,phasingMetricsOut)
298
+}
299
+readErrorMetrics <- function(errorMetricsBin){
300
+  bytesOfFile <- file.info(errorMetricsBin)$size
301
+  read.filename <- file(errorMetricsBin, "rb")
302
+  bytesRead <- 0
303
+  k <- 1
304
+  versionNumber <- readBin(read.filename,"int",size=1,n = 1)
305
+  recordSize <- readBin(read.filename,"int",size=1,n = 1,signed = FALSE)
306
+  bytesRead <- bytesRead+1+1
307
+  tileMetricsOut <- list()
308
+  while(bytesRead < bytesOfFile){
309
+    laneNumber <- readBin(read.filename,"int",size=2,n = 1)
310
+    tileNumber <- readBin(read.filename,"int",size=2,n = 1)
311
+    cycleNumber <- readBin(read.filename,"int",size=2,n = 1,signed = FALSE)
312
+    errorRate <- readBin(read.filename,"double",size=4,n = 1)
313
+    numberOfPerfectReads <- readBin(read.filename,"int",size=4,n = 1)
314
+    numberOfReadsWithOneError <- readBin(read.filename,"int",size=4,n = 1)
315
+    numberOfReadsWithTwoError <- readBin(read.filename,"int",size=4,n = 1)
316
+    numberOfReadsWithThreeError <- readBin(read.filename,"int",size=4,n = 1)
317
+    numberOfReadsWithFourError <- readBin(read.filename,"int",size=4,n = 1)
318
+    bytesRead <- bytesRead+(2*3)+(4*6)
319
+    tileMetricsOut[[k]] <- c(laneNumber,tileNumber,cycleNumber,
320
+                             errorRate,numberOfPerfectReads,
321
+                             numberOfReadsWithOneError,numberOfReadsWithTwoError,
322
+                             numberOfReadsWithThreeError,numberOfReadsWithFourError)
323
+    k <- k+1
324
+  }
325
+  close(read.filename)
326
+  tileMetricsMat <- do.call(rbind,tileMetricsOut)
327
+}
328
+readInterOpsMetrics <- function(bcl2fastqparams,verbose=TRUE,
329
+                                interopsToParse=c("TileMetricsOut","QMetricsOut")){
330
+  interOpsMetrics <- list()
331
+  if(any(interopsToParse %in% "TileMetricsOut")){
332
+    if(verbose){message("Parsing InterOps Tile binary file..",appendLF = FALSE)}
333
+    tileMetricsFile <- file.path(bcl2fastqparams@RunDir,"InterOp","TileMetricsOut.bin")
334
+    if(file.exists(tileMetricsFile)){
335
+      interOpsMetrics[["tileMet"]] <- readTileMetrics(tileMetricsFile)
336
+    }
337
+    if(verbose){message("done",appendLF = TRUE)}
338
+  }
339
+  if(any(interopsToParse %in% "QMetricsOut")){
340
+    if(verbose){message("Parsing InterOps QMetrics binary file..",appendLF = FALSE)}
341
+    qMetricsFile <- file.path(bcl2fastqparams@RunDir,"InterOp","QMetricsOut.bin")
342
+    if(file.exists(qMetricsFile)){
343
+      interOpsMetrics[["qMet"]] <- readQMetrics(qMetricsFile)
344
+    }
345
+    if(verbose){message("done",appendLF = TRUE)}
346
+  }
347
+  return(interOpsMetrics)
348
+}
349
+
350
+
351
+#' Function to parse InterOps files and generate summary reports
352
+#'
353
+#' Parses the InterOps binary files produced by Illumina's Real Time Analysis sofware and
354
+#' used by Illumina's SAV sofware.
355
+#' InterOp binary files contain information on phasing/prephsing, yield,read numbers
356
+#' and basecalling quality score distributions per cycle.
357
+#' This interOpsReport functions parses and summarises the InterOps files, TileMetrics.bin and QMetrics.bin, and the 
358
+#' Stats directory XML files, ConversionStats.xml and DemultiplexingStats.xml. 
359
+#'
360
+#'
361
+#' @docType methods
362
+#' @name interOpsReport
363
+#' @rdname interOpsReport
364
+#'
365
+#' @author Thomas Carroll.
366
+#' @param bcl2fastqparams A BCL2FastQparams object.
367
+#' @param verbose TRUE or FALSE . TRUE reports progress through file parsing.
368
+#' @return A named list of length 3 containing machine and run information, 
369
+#' basecalling quality information and demultiplexing information.
370
+#' @details The interOpsReport function returns a list of machine and run 
371
+#' information, basecalling quality information and demultiplexing information.
372
+#' The three named elements are descibed below.  
373
+#' \itemize{
374
+#' \item{"machineReport"}{ A data.frame containing information machine and software parameters
375
+#' }
376
+#' \item{"sequencingReport"}{ A data.frame of mean cluster density, percentage clusters passing filter, phasing 
377
+#' and prephasing percentages, number of reads total/passing filter and percent of reads with mean quality score > Q30
378
+#' grouped by lane and read
379
+#' }
380
+#' \item{"demuxReport"}{ A data.frame of demultiplexing results containing yield, number of reads, percentage of reads
381
+#' with quality scores greater than >Q30 and the percent of total reads per lane.
382
+#' Results are summarised per lane for samples, underdetermined indexes and all indexes (identifed and unidentified).
383
+#' }
384
+#' }
385
+#' 
386
+#' @import stringr XML RColorBrewer methods raster BiocStyle
387
+#' @examples
388
+#'
389
+
390
+#' fileLocations <- system.file("extdata",package="basecallQC")
391
+#' runXML <- dir(fileLocations,pattern="runParameters.xml",full.names=TRUE)
392
+#' config <- dir(fileLocations,pattern="config.ini",full.names=TRUE)
393
+#' bcl2fastqparams <- BCL2FastQparams(runXML,config,runDir=getwd(),verbose=FALSE)
394
+#' 
395
+#' # myRes_BCAGJ8ANXX <- interOpsReport(bcl2fastqparams,verbose=TRUE) 
396
+#' @export
397
+interOpsReport <- function(bcl2fastqparams,verbose=TRUE){
398
+  if(verbose){message("Parsing XML files..",appendLF = FALSE)}
399
+  basecallmetrics <- baseCallMetrics(bcl2fastqparams)
400
+  demultiplexmetrics <- demultiplexMetrics(bcl2fastqparams)
401
+  dmx <- demultiplexmetrics$demuxStatsProcessed
402
+  conv <- basecallmetrics$convStatsProcessed
403
+  if(verbose){message("done",appendLF = TRUE)}
404
+  
405
+  dmxSum <- dmx %>% 
406
+    tbl_df %>% 
407
+    dplyr::select(Sample,Barcode,Lane) %>%
408
+    distinct(Sample,Barcode,Lane) %>%
409
+    mutate(Lane=factor(as.numeric(str_replace_all(Lane,"Lane",""))))
410
+  
411
+  convSum <- conv %>%
412
+    tbl_df %>%
413
+    group_by(Sample,Lane,Filter) %>%
414
+    summarise(YieldSum=sum(as.numeric(Yield)),
415
+              Yield30Sum=sum(as.numeric(Yield30)),
416
+              Reads=sum(ClusterCount)) %>%
417
+    filter(Filter=="Pf") %>%
418
+    group_by(Lane) %>%
419
+    mutate(PercentageOfGreaterQ30_Bases_PF=(Yield30Sum/YieldSum)*100,
420
+           PercentOfLane=(Reads/sum(Reads[Sample != "all"]))*100) %>%
421
+    dplyr::select(-Filter,-Yield30Sum)
422
+  
423
+  dmxReport <- full_join(convSum,tbl_df(dmxSum),by = c("Sample", "Lane"))
424
+  
425
+  if(verbose){message("Parsing machine information..",appendLF = FALSE)}
426
+  
427
+  runPReport <- bcl2fastqparams@RunParameters$runParams %>% 
428
+    dplyr::select(ComputerName,RunID,ExperimentName,
429
+                  Flowcell,
430
+                  ChemistryVersion,RunMode,
431
+                  ApplicationName,ApplicationVersion,
432
+                  RTAVersion
433
+    ) %>%
434
+    mutate(basecallQC_Version=as.character(packageVersion("basecallQC"))) %>% 
435
+    t
436
+  
437
+  if(verbose){message("done",appendLF = TRUE)}
438
+  if(verbose){message("Parsing InterOps binary files..",appendLF = FALSE)}
439
+  interOpsMetrics <- readInterOpsMetrics(bcl2fastqparams,verbose = FALSE)
440
+  tileMet <- interOpsMetrics$tileMet  %>%
441
+    group_by(Lane) %>%
442
+    summarise(meanClusterDensity=mean(ClusterDensity),
443
+              sdClusterDensity=sd(ClusterDensity),
444
+              meanPhasingForRead1=mean(PhasingForRead1,na.rm=TRUE),
445
+              meanPrePhasingForRead1=mean(PrePhasingForRead1,na.rm=TRUE),
446
+              NumberOfReads=sum(NumberOfClusters),
447
+              NumberOfReadsPF=sum(NumberOfClustersPF),
448
+              percent_PF_Clusters=mean(NumberOfClustersPF/NumberOfClusters)*100,
449
+              percent_PF_ClustersSD=sd(NumberOfClustersPF/NumberOfClusters)*100,
450
+              meanPhasingForRead2 = if(exists('PhasingForRead2', where = .)) mean(PhasingForRead2,na.rm=TRUE) else 0,
451
+              meanPrePhasingForRead2 = if(exists('PrePhasingForRead2', where = .)) mean(PrePhasingForRead2,na.rm=TRUE) else 0
452
+    ) %>% 
453
+    mutate(meanClusterDensity=stringr::str_c(signif(meanClusterDensity/1000,4),"+/-",signif(sdClusterDensity/1000,4)),
454
+           percent_PF_Clusters=stringr::str_c(signif(percent_PF_Clusters,3),"+/-",signif(percent_PF_ClustersSD,3)),
455
+           Phasing_PrephasingRead1=stringr::str_c(signif(meanPhasingForRead1*100,3)," / ",signif(meanPrePhasingForRead1*100,3))) %>%
456
+           {if(exists('meanPhasingForRead2', where = .)) mutate(.,Phasing_PrephasingRead2=stringr::str_c(signif(meanPhasingForRead2*100,3)," / ",signif(meanPrePhasingForRead2*100,3))) else .} %>%  
457
+    dplyr::select(Lane,meanClusterDensity,percent_PF_Clusters,
458
+                          starts_with("Phasing_Prephasing"),
459
+                          NumberOfReads,NumberOfReadsPF)
460
+  
461
+  
462
+  qMet <- interOpsMetrics$qMet %>% 
463
+    group_by(Lane,Cycle) %>% 
464
+    dplyr::select(Lane,Cycle,starts_with("Q")) %>% 
465
+    summarise_all(sum) %>% 
466
+    ungroup() %>% 
467
+    mutate(q30=rowSums(.[32:52])/rowSums(.[3:52])) %>% 
468
+    mutate(Read = cut(Cycle,
469
+                      breaks=unique(cumsum(c(0,
470
+                                             readPattern(bcl2fastqparams)                             
471
+                      ))),
472
+                      labels= names(readPattern(bcl2fastqparams))[readPattern(bcl2fastqparams) !=0]
473
+    )) %>% 
474
+    group_by(Lane,Read) %>% 
475
+    summarise_all(mean) %>% 
476
+    dplyr::select(Lane,Read,q30) %>% 
477
+    mutate(q30=signif(q30*100,3)) %>%
478
+    mutate(Read=paste0("Q30-",Read)) %>% 
479
+    spread(Read,q30)
480
+  
481
+  fullSummaryDual <- full_join(tileMet,qMet,by = "Lane")
482
+  if(verbose){message("done",appendLF = TRUE)}
483
+  toReport <- list(machineReport=as.data.frame(runPReport),
484
+                   sequencingReport=as.data.frame(fullSummaryDual),
485
+                   demuxReport=as.data.frame(dmxReport))
486
+  return(toReport)
487
+}