Browse code

addXCMSPeaks using modern interface from xcms

Ricca authored on 26/10/2021 13:08:30
Showing 52 changed files

... ...
@@ -1,5 +1,5 @@
1 1
 Package: flagme
2
-Version: 1.33.5
2
+Version: 1.48.1
3 3
 Date: 2015/04/06
4 4
 Title: Analysis of Metabolomics GC/MS Data
5 5
 Author: Mark Robinson <[email protected]>, Riccardo Romoli <[email protected]>
... ...
@@ -12,7 +12,7 @@ License: LGPL (>= 2)
12 12
 Collate: 0classes.R clusterAlignment.R init.R multipleAlignment.R
13 13
         peaksAlignment.R progressiveAlignment.R betweenAlignment.R dp.R
14 14
         metrics.R parse.R peaksDataset.R gatherInfo.R plotFragments.R
15
-        rmaFitUnit.R addXCMSPeaks.R retFatMatrix.R exportSpectra.R
15
+        rmaFitUnit.R addXCMSPeaks.R retFatMatrix.R exportSpectra.R importSpectra.R
16 16
 biocViews: DifferentialExpression, MassSpectrometry
17
-RoxygenNote: 6.1.1
17
+RoxygenNote: 7.1.2
18 18
 Encoding: UTF-8
... ...
@@ -7,11 +7,15 @@ export(betweenAlignment)
7 7
 export(calcTimeDiffs)
8 8
 export(clusterAlignment)
9 9
 export(corPrt)
10
+export(distToLib)
10 11
 export(dp)
11 12
 export(dynRT)
12 13
 export(exportSpectra)
13 14
 export(gatherInfo)
15
+export(headToTailPlot)
16
+export(importSpec)
14 17
 export(imputePeaks)
18
+export(matchSpec)
15 19
 export(multipleAlignment)
16 20
 export(ndpRT)
17 21
 export(normDotProduct)
... ...
@@ -36,6 +40,7 @@ importClassesFrom(SparseM,matrix.csc)
36 40
 importFrom(CAMERA,annotate)
37 41
 importFrom(CAMERA,getpspectra)
38 42
 importFrom(MASS,rlm)
43
+importFrom(SparseM,as.matrix)
39 44
 importFrom(SparseM,as.matrix.csc)
40 45
 importFrom(gplots,colorpanel)
41 46
 importFrom(graphics,axis)
... ...
@@ -1,184 +1,287 @@
1 1
 #' Add xcms/CAMERA peak detection results
2
-#' 
2
+#'
3 3
 #' Reads the raw data using xcms, group each extracted ion according to their
4 4
 #' retention time using CAMERA and attaches them to an already created
5 5
 #' \code{peaksDataset} object
6
-#' 
6
+#'
7 7
 #' Repeated calls to xcmsSet and annotate to perform peak-picking and
8 8
 #' deconvolution. The peak detection results are added to the original
9 9
 #' \code{peaksDataset} object. Two peak detection alorithms are available:
10 10
 #' continuous wavelet transform (peakPicking=c('cwt')) and the matched filter
11 11
 #' approach (peakPicking=c('mF')) described by Smith et al (2006). For further
12 12
 #' information consult the xcms package manual.
13
-#' 
14
-#' @param files character vector of same length as \code{object@rawdata} (user
15
-#' ensures the order matches)
16
-#' @param object a \code{peaksDataset} object.
17
-#' @param peakPicking Methods to use for peak detection. See details.
18
-#' @param perfwhm percentage of full width half maximum. See
19
-#' CAMERA::groupFWHM() for more details
20
-#' @param quick logical. See CAMERA::annotate() for more details
21
-#' @param ... arguments passed on to \code{xcmsSet} and \code{annotate}
13
+#' @title addXCMSPeaks
14
+#' @param files list of chromatogram files
15
+#' @param object a \code{peakDataset} object
16
+#' @param settings list. It conteins the settings for the peak-picking
17
+#' @param rtrange vector; retention time range
18
+#' @param mzrange vector, mz range
19
+#' @param perfwhm etermines the maximal retentiontime difference of features in
20
+#'     one pseudospectrum.
21
+#' @param minintens minimum ion intensity to be included into a pseudospectra
22
+#' @param minfeat minimum number of ion to be created a pseudospectra
23
+#' @param multipleMatchedFilter logical Try to remove redundant peaks, in 
24
+#' this case where there are any peaks within an absolute m/z value of 0.2 and 
25
+#' within 3 s for any one sample in the xcmsSet (the largest peak is kept)
26
+#' @param multipleMatchedFilterParam list. It conteins the settings for the
27
+#' peak-picking. mz_abs represent the the mz range; rt_abs represent thert range
28
+#' @importFrom xcms xcmsRaw xcmsSet
29
+#' @importFrom CAMERA annotate getpspectra
30
+#' @importFrom stats aggregate
31
+#' @export addXCMSPeaks
22 32
 #' @return \code{peaksDataset} object
23 33
 #' @author Riccardo Romoli \email{riccardo.romoli@@unifi.it}
24 34
 #' @seealso \code{\link{peaksDataset}} \code{\link{findPeaks.matchedFilter}}
25 35
 #' \code{\link{findPeaks.centWave}} \code{\link{xcmsRaw-class}}
26 36
 #' @keywords manip
27 37
 #' @examples
28
-#' 
29
-#' # need access to CDF (raw data)
30
-#' require(gcspikelite)
31
-#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
32
-#' 
33
-#' # full paths to file names
34
-#' cdfFiles <- dir(gcmsPath, "CDF", full=TRUE)
35
-#' 
36
-#' # create a 'peaksDataset' object and add XCMS peaks to it
37
-#' pd <- peaksDataset(cdfFiles[1], mz=seq(50,550), rtrange=c(7.5,8.5))
38
-#' pd <- addXCMSPeaks(cdfFiles[1], pd, peakPicking=c('mF'),
39
-#'                    snthresh=3, fwhm=4, step=1, steps=2, mzdiff=0.5)
38
+#' files <- list.files(path = paste(find.package("gcspikelite"), "data",
39
+#'                     sep = "/"),"CDF", full = TRUE)
40
+#' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
41
+#' ## create settings object
42
+#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
43
+#' cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
44
+#'  prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
45
+#'  extendLengthMSW = TRUE, mzCenterFun = "wMean")
46
+#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
47
+#'  multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
48
+#'  list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
49
+#' data
40 50
 #'
41 51
 #' @importFrom xcms xcmsRaw xcmsSet
42 52
 #' @importFrom CAMERA annotate getpspectra
43 53
 #' @importFrom stats aggregate
44 54
 #' @export addXCMSPeaks
45
-addXCMSPeaks <- function (files, object, peakPicking = c("cwt", "mF"), perfwhm = 0.75, quick = TRUE, ...)
46
-{
55
+addXCMSPeaks <- function(files, object, settings, rtrange = NULL,
56
+                           mzrange = NULL, perfwhm = 0.75, minintens = 100,
57
+                           minfeat = 6, multipleMatchedFilter = FALSE, 
58
+                           multipleMatchedFilterParam = list(
59
+                               fwhm = c(5, 10, 20), mz_abs = 0.1, rt_abs = 3)
60
+                               ) {
61
+    ## Rmpi tends to give many warnings that are not relevant to end
62
+    ## users: this is an attempt to suppress this output
63
+    owarn <- options("warn")
64
+    on.exit(options(warn = owarn$warn))
47 65
     options(warn = -1)
48
-    cdfFiles <- as.character(files)
49
-    if (length(cdfFiles) != length(object@rawdata)) 
50
-        stop("Number of files must be the same as the number of runs (and must match).")
51
-    xs <- lapply(cdfFiles, function(x, y) {
52
-        f <- which(cdfFiles %in% x)
53
-        xr <- xcmsRaw(x)
54
-        rtrange <- c(min(object@rawrt[[f]]), max(object@rawrt[[f]])) * 
55
-            60
56
-        scanRange <- c(max(1, which(xr@scantime > rtrange[1])[1], 
57
-            na.rm = TRUE), min(length(xr@scantime), which(xr@scantime > 
58
-            rtrange[2])[1] - 1, na.rm = TRUE))
59
-        if (peakPicking == "cwt") {
60
-            s <- xcmsSet(x, method = "centWave", prefilter = c(5, 
61
-                100), scanrange = scanRange, integrate = 1, mzdiff = -0.001, 
62
-                fitgauss = TRUE, ...)
66
+
67
+    ## if an rtrange is given, we first find out which scans correspond
68
+    ## to this and then use the scanRange argument of xcmsSet
69
+    if (!is.null(rtrange)) {
70
+        if (length(rtrange) != 2) {
71
+            stop("Improper rtrange given!")
63 72
         }
64
-        if (peakPicking == "mF") {
65
-            s <- xcmsSet(x, method = "matchedFilter", scanrange = scanRange, 
66
-                max = 500, ...)
67
-        }
68
-        idx <- which(s@peaks[, "mz"] > min(object@mz) & s@peaks[, 
69
-            "mz"] < max(object@mz))
70
-        s@peaks <- s@peaks[idx, ]
71
-        if(quick == TRUE)
72
-        {
73
-            a <- annotate(s, perfwhm = perfwhm, quick = quick)
74
-        }
75
-        if(quick == FALSE)
76
-        {
77
-            a <- annotate(s, perfwhm = perfwhm, cor_eic_th = 0.8,
78
-                          pval = 0.05, graphMethod = "hcs",
79
-                          calcIso = FALSE, calcCiS = TRUE,
80
-                          calcCaS = FALSE)
73
+
74
+        rtrange <- rtrange * 60  ## convert from minutes to seconds
75
+        xr <- xcms::xcmsRaw(files[1])
76
+        scanRange <- c(max(1, which(xr@scantime > rtrange[1])[1], na.rm = TRUE),
77
+                       min(length(xr@scantime),
78
+                           which(xr@scantime > rtrange[2])[1] - 1, na.rm = TRUE)
79
+                           )
80
+        allSettings <- c(list(files = files, scanrange = scanRange), settings)
81
+    }
82
+    else {
83
+        allSettings <- c(list(files = files), settings)
84
+    }
85
+
86
+  ## peak-picking
87
+  cat("Start peakPicking \n")
88
+    row <- MSnbase::readMSData(files, centroided. = TRUE, mode = "onDisk",
89
+        msLevel. = 1)
90
+    if (class(settings)[1] == "MatchedFilterParam") {
91
+        if (multipleMatchedFilter == TRUE) {
92
+            settings@fwhm <- multipleMatchedFilterParam$fwhm[1]
93
+            set1a <- xcms::findChromPeaks(row, param = settings )
94
+            settings@fwhm <- multipleMatchedFilterParam$fwhm[2]
95
+            set1b <- xcms::findChromPeaks(row, param = settings )
96
+            settings@fwhm <- multipleMatchedFilterParam$fwhm[3]
97
+            set1c <- xcms::findChromPeaks(row, param = settings )
98
+            set1 <- set1c
99
+            xcms::chromPeaks(set1) <- rbind(
100
+                xcms::chromPeaks(set1a), xcms::chromPeaks(set1b), 
101
+                xcms::chromPeaks(set1c)
102
+            )
103
+            xcms::chromPeaks(set1) <- xcms::chromPeaks(set1)[
104
+                order(xcms::chromPeaks(set1)[, "sample"], decreasing = FALSE), ]
105
+                s <- de_Duper(set1,
106
+                             mz_abs = multipleMatchedFilterParam$mz_abs,
107
+                             rt_abs = multipleMatchedFilterParam$rt_abs)
81 108
         }
82
-        return(a)
83
-    }, y = peakPicking)
84
-    if (peakPicking == "cwt") {
85
-        area <- c("intb")
86 109
     }
87
-    if (peakPicking == "mF") {
88
-        area <- c("intf")
110
+    xset <- xcms::findChromPeaks(row, param = settings)
111
+  cat("peakPicking Done \n")
112
+    pdp <- xcms::PeakDensityParam(sampleGroups = seq(along = files), bw = 20,
113
+        minFraction = 0.5)
114
+    xset <- xcms::groupChromPeaks(xset, pdp)
115
+    xset <- xcms::fillChromPeaks(xset, param = xcms::ChromPeakAreaParam())
116
+    xset <- as(xset, "xcmsSet")
117
+    if (!is.null(mzrange)) {
118
+        idx <-  (xset@peaks[, "mz"] > mzrange[1]) &
119
+                    (xset@peaks[, "mz"] < mzrange[2]
120
+                )
121
+        xset@peaks <- xset@peaks[idx, ]
122
+    }
123
+    ## deconvolution; list of all the xset
124
+    ap <- split(xset, factor(xcms::sampnames(xset),
125
+        levels = xcms::sampnames(xset)))
126
+    apd <-  lapply(ap, function(x) {
127
+        xa <- CAMERA::xsAnnotate(x,  sample = 1)
128
+        xa <- CAMERA::groupFWHM(xa, perfwhm = perfwhm)
89 129
     }
90
-    data <- lapply(seq(along = cdfFiles), function(x) {
91
-        filt <- sapply(xs[[x]]@pspectra, function(r) {
92
-            length(r)
93
-        })
94
-        spec.idx <- c(1:length(xs[[x]]@pspectra))[which(filt >= 
95
-            6)]
96
-        mzrange <- object@mz
97
-        abu <- data.frame(matrix(0, nrow = length(mzrange), ncol = length(spec.idx)))
98
-        rownames(abu) <- mzrange
99
-        colnames(abu) <- spec.idx
100
-        mz <- data.frame(mz = mzrange)
101
-        abu <- sapply(spec.idx, function(z) {
102
-            spec <- getpspectra(xs[[x]], z)[, c("mz", area)]
103
-            spec[, "mz"] <- round(spec[, "mz"])
104
-            if (max(table(spec[, 1])) > 1) {
105
-                spec.noDouble <- cbind(aggregate(spec[, 2], list(spec[, 
106
-                  1]), FUN = sum))
107
-                colnames(spec.noDouble) <- c("mz", area)
108
-                spec <- spec.noDouble
130
+                    )
131
+    ## filter pseudo spectra
132
+    intensity <- "maxo"
133
+    res <- lapply(apd, function(x) {
134
+        allpks <- x@groupInfo
135
+        minI <- minintens# * max(allpks[, intensity])
136
+        tooSmall <- which(allpks[, intensity] < minI)
137
+        pspectra <- lapply(x@pspectra, function(x) {
138
+            x[!x %in% tooSmall]})
139
+        npeaks <- sapply(pspectra, length)
140
+        pspectra <- pspectra[npeaks >= minfeat]
141
+        ## list of unique pspec, double masses removed
142
+        listpspec <- lapply(pspectra, function(x) {
143
+            aa <- cbind(mz = round(allpks[x, "mz"], digits = 0),
144
+                        allpks[x, c(intensity, "rt", "rtmin", "rtmax")]
145
+                        )
146
+            double <- duplicated(aa[, 1])
147
+            bb <- cbind(aggregate(aa[, 2], list(aa[, 1]),
148
+                FUN = sum), aa[!double, 3:5])
149
+            setNames(bb, c(colnames(aa)))
150
+        }
151
+                            )
152
+        ## get mzrange from data
153
+        mz.max <- max(sapply(listpspec, function(x) {
154
+            max(x[, "mz"])}))
155
+        mz.min <- min(sapply(listpspec, function(x) {
156
+            min(x[, "mz"])}))
157
+        mz.range <- data.frame(mz = c(mz.min:mz.max))
158
+        ## merge pspec with mzrange
159
+        listpspec.merged <- lapply(listpspec, function(x) {
160
+            merge(x, mz.range, by = "mz", all = TRUE)
109 161
             }
110
-            else {
111
-                spec
162
+            )
112 163
             }
113
-            abu$z <- merge(spec, mz, by = "mz", all = TRUE)[, 
114
-                area]
115
-        })
116
-        colnames(abu) <- spec.idx
117
-        abu[is.na(abu)] <- c(0)
118
-        return(abu)
119
-    })
120
-    apex.rt <- lapply(seq(along = cdfFiles), function(x) {
121
-        filt <- sapply(xs[[x]]@pspectra, function(r) {
122
-            length(r)
123
-        })
124
-        spec.idx <- c(1:length(xs[[x]]@pspectra))[which(filt >= 
125
-            6)]
126
-        apex.rt <- sapply(spec.idx, function(z) {
127
-            spec.rt <- getpspectra(xs[[x]], z)[, c("rt")]
128
-            rt <- round(mean(spec.rt)/60, digits = 3)
129
-        })
130
-        return(apex.rt)
131
-    })
132
-    spectra.ind <- lapply(seq(along = cdfFiles), function(x) {
133
-        filt <- sapply(xs[[x]]@pspectra, function(r) {
134
-            length(r)
135
-        })
136
-        spec.idx <- c(1:length(xs[[x]]@pspectra))[which(filt >= 
137
-            6)]
138
-    })
139
-    ind.start <- lapply(seq(along = cdfFiles), function(x) {
140
-        filt <- sapply(xs[[x]]@pspectra, function(r) {
141
-            length(r)
142
-        })
143
-        spec.idx <- c(1:length(xs[[x]]@pspectra))[which(filt >= 
144
-            6)]
145
-        rt.start <- sapply(spec.idx, function(z) {
146
-            spec.rt <- getpspectra(xs[[x]], z)[, c("rtmin")]
147
-            rt <- round(mean(spec.rt), digits = 3)
148
-        })
149
-        return(rt.start)
150
-    })
151
-    ind.stop <- lapply(seq(along = cdfFiles), function(x) {
152
-        filt <- sapply(xs[[x]]@pspectra, function(r) {
153
-            length(r)
154
-        })
155
-        spec.idx <- c(1:length(xs[[x]]@pspectra))[which(filt >= 
156
-            6)]
157
-        rt.stop <- sapply(spec.idx, function(z) {
158
-            spec.rt <- getpspectra(xs[[x]], z)[, c("rtmax")]
159
-            rt <- round(mean(spec.rt), digits = 3)
160
-        })
161
-        return(rt.stop)
162
-    })
163
-    object@files
164
-    object@mz
164
+            )
165
+    ## merge again with a common mz range among all sampless
166
+    max.mz <- max(sapply(res, function(x) {
167
+        max(sapply(x, "[[", "mz"))}))
168
+    min.mz <- min(sapply(res, function(x) {
169
+        min(sapply(x, "[[", "mz"))}))
170
+    mz.range.all <- data.frame(mz = c(min.mz:max.mz))
171
+    res.mz.mrg <- lapply(res, function(x) {
172
+        lapply(x, function(y) {
173
+            merge(y, mz.range.all, by = "mz", all = TRUE)
174
+            }
175
+            )
176
+            }
177
+            )
178
+    ## prepare the S4 slots
179
+    spec.ind <- lapply(res.mz.mrg, function(x) {
180
+        1:length(x)
181
+        }
182
+        )
183
+    apex.rt <- lapply(res.mz.mrg, function(x) {
184
+        rt <- lapply(x, "[[",  "rt")
185
+        round(sapply(rt, function(x) {
186
+            mean(x, na.rm = TRUE) / 60
187
+            }
188
+            ), digits = 3)
189
+            }
190
+            )
191
+    start.rt <- lapply(res.mz.mrg, function(x) {
192
+        rt <- lapply(x, "[[",  "rtmin")
193
+        round(sapply(rt, function(x) {
194
+            mean(x, na.rm = TRUE) / 60
195
+            }
196
+            ), digits = 3)
197
+            }
198
+            )
199
+    stop.rt <- lapply(res.mz.mrg, function(x) {
200
+        rt <- lapply(x, "[[",  "rtmax")
201
+        round(sapply(rt, function(x) {
202
+            mean(x, na.rm = TRUE) / 60
203
+            }
204
+            ), digits = 3)
205
+            }
206
+            )
207
+    data <- lapply(res.mz.mrg, function(x) {
208
+        a <- lapply(x, "[[",  intensity)
209
+        aa <- do.call(cbind, a)
210
+        colnames(aa) <- c(1:ncol(aa))
211
+        aa[is.na(aa)] <- c(0)
212
+        return(aa)
213
+        }
214
+        )
215
+
165 216
     for (i in 1:length(files)) {
166 217
         ord <- order(apex.rt[[i]])
167 218
         data[[i]] <- data[[i]][, ord]
168 219
         apex.rt[[i]] <- apex.rt[[i]][ord]
169
-        spectra.ind[[i]] <- spectra.ind[[i]][ord]
170
-        ind.start[[i]] <- ind.start[[i]][ord]
171
-        ind.stop[[i]] <- ind.stop[[i]][ord]
220
+        spec.ind[[i]] <- spec.ind[[i]][ord]
221
+        start.rt[[i]] <- start.rt[[i]][ord]
222
+        stop.rt[[i]] <- stop.rt[[i]][ord]
172 223
     }
173
-    options(warn = 0)
174
-    nm <- lapply(files, function(u) {
175
-        sp <- strsplit(u, split = "/")[[1]]
176
-        sp[length(sp)]
177
-    })
178
-    nm <- sub(".CDF$", "", nm)
179
-    names(data) <- names(apex.rt) <- names(spectra.ind) <- names(ind.start) <- names(ind.stop) <- nm
180
-    new("peaksDataset", files = object@files, peaksdata = data, 
181
-        peaksrt = apex.rt, peaksind = spectra.ind, peaksind.start = ind.start, 
182
-        peaksind.end = ind.stop, rawdata = object@rawdata, rawrt = object@rawrt, 
183
-        mz = object@mz)
224
+    new("peaksDataset",
225
+        files = files,
226
+        peaksdata = data,
227
+        peaksrt = apex.rt,
228
+        peaksind = spec.ind,
229
+        peaksind.start = start.rt,
230
+        peaksind.end = stop.rt,
231
+        rawdata = object@rawdata,
232
+        rawrt = object@rawrt,
233
+        mz = c(min.mz:max.mz)
234
+        )
184 235
 }
236
+
237
+
238
+##' Duplicate peak removal function
239
+##'
240
+##' Remove redundant peaks, in this case where there are any peaks within an
241
+##' absolute m/z value of 0.2 and within 3 s for any one sample in the xcmsSet
242
+##' (the largest peak is kept)
243
+##' @title deDuper
244
+##' @param object xcms object
245
+##' @param mz_abs mz range
246
+##' @param rt_abs rt range
247
+##' @return an object of xcms class
248
+##' @author r
249
+de_Duper <- function(object, mz_abs = 0.1, rt_abs = 2) {
250
+    mzdiff <- 0
251
+    peaks_mat <- xcms::chromPeaks(object)
252
+    mz_min <- peaks_mat[, "mz"] - mz_abs
253
+    mz_max <- peaks_mat[, "mz"] + mz_abs
254
+    rt_min <- peaks_mat[, "rt"] - rt_abs
255
+    rt_max <- peaks_mat[, "rt"] + rt_abs
256
+
257
+    peaks_mat_out <- NULL
258
+
259
+    samples <- unique(peaks_mat[, "sample"])
260
+
261
+    cat("\n", "Duplicate peak removal; % complete: ")
262
+    percplus <- -1
263
+
264
+    for (i in 1:length(samples)) {
265
+        perc <- round(i / length(samples) * 100)
266
+        if (perc %% 10 == 0 && perc != percplus) {
267
+            cat(perc, " ")
268
+        }
269
+        percplus <- perc
270
+
271
+        peaks_mat_i <- peaks_mat[which(peaks_mat[, "sample"] == samples[i]), ,
272
+            drop = FALSE]
273
+        mz_min_i <- mz_min[which(peaks_mat[, "sample"] == samples[i])]
274
+        mz_max_i <- mz_max[which(peaks_mat[, "sample"] == samples[i])]
275
+        rt_min_i <- rt_min[which(peaks_mat[, "sample"] == samples[i])]
276
+        rt_max_i <- rt_max[which(peaks_mat[, "sample"] == samples[i])]
277
+        uorder_i <- order(peaks_mat_i[, "into"], decreasing = TRUE)
278
+        uindex_i <- xcms::rectUnique(cbind(mzmin = mz_min_i, mzmax = mz_max_i,
279
+                                            rtmin = rt_min_i, rtmax = rt_max_i),
280
+                                      uorder_i, mzdiff)
281
+        peaks_mat_i <- peaks_mat_i[uindex_i, , drop = FALSE]
282
+        peaks_mat_out <- rbind(peaks_mat_out, peaks_mat_i)
283
+    }
284
+    cat("\n")
285
+    xcms::chromPeaks(object) <- peaks_mat_out
286
+    return(object)
287
+}
185 288
\ No newline at end of file
... ...
@@ -176,10 +176,11 @@ betweenAlignment <- function(pD, cAList, pAList, impList, filterMin = 1,
176 176
 #' @export
177 177
 #' @noRd
178 178
 setMethod("show","betweenAlignment",
179
-          function(object){
179
+          function(object) {
180 180
               cat("An object of class \"", class(object), "\"\n", sep = "")
181 181
               cat(length(object@mergedPeaksDataset@peaksrt), "groups:",
182
-                  sapply(object@mergedPeaksDataset@peaksrt, length), "merged peaks\n"
182
+                  sapply(object@mergedPeaksDataset@peaksrt, length),
183
+                  "merged peaks\n"
183 184
                   )
184 185
           }
185 186
           )
... ...
@@ -66,13 +66,13 @@ setMethod("decompress","clusterAlignment",
66 66
 #' @export clusterAlignment
67 67
 clusterAlignment <- function(pD, runs = 1:length(pD@rawdata),
68 68
                              timedf = NULL, usePeaks = TRUE, 
69
-                             verbose = TRUE, ...){
69
+                             verbose = TRUE, ...) {
70 70
     n <- length(runs)
71
-    if(usePeaks)
71
+    if (usePeaks)
72 72
         nr <- length(pD@peaksdata)
73 73
     else
74 74
         nr <- length(pD@rawdata)
75
-    alignments <- vector("list", n*(n-1)/2)
75
+    alignments <- vector("list", n * (n - 1) / 2)
76 76
     aligned <- matrix(-1, nr, nr)
77 77
     colnames(aligned) <- names(pD@rawdata)
78 78
     rownames(aligned) <- names(pD@rawdata)
... ...
@@ -80,48 +80,43 @@ clusterAlignment <- function(pD, runs = 1:length(pD@rawdata),
80 80
     colnames(dist) <- names(pD@rawdata)[runs]
81 81
     rownames(dist) <- names(pD@rawdata)[runs]
82 82
     count <- 0
83
-    for(i in 1:(n-1))
84
-    {
83
+    for (i in 1:( n - 1)) {
85 84
         run.i <- runs[i]
86
-        for(j in (i+1):n)
87
-        {
85
+        for (j in (i + 1):n) {
88 86
             run.j <- runs[j]
89
-            count <- count+1
90
-            if(verbose)
91
-            {
87
+            count <- count + 1
88
+            if(verbose) {
92 89
                 cat("[clusterAlignment] Aligning",
93 90
                     names(pD@rawdata)[run.i], "to",
94 91
                     names(pD@rawdata)[run.j], "\n")
95 92
             }
96
-            if(usePeaks)
97
-            {
98
-              alignments[[count]] <- 
99
-                peaksAlignment(pD@peaksdata[[run.i]], 
100
-                               pD@peaksdata[[run.j]], 
101
-                               pD@peaksrt[[run.i]], 
102
-                               pD@peaksrt[[run.j]], 
103
-                               usePeaks = usePeaks, 
104
-                               timedf = timedf[[count]], 
93
+            if (usePeaks) {
94
+              alignments[[count]] <-
95
+                peaksAlignment(pD@peaksdata[[run.i]],
96
+                               pD@peaksdata[[run.j]],
97
+                               pD@peaksrt[[run.i]],
98
+                               pD@peaksrt[[run.j]],
99
+                               usePeaks = usePeaks,
100
+                               timedf = timedf[[count]],
105 101
                                verbose = verbose, ...)
106
-            }
107
-            else
108
-            {
102
+            } else {
109 103
                 alignments[[count]] <-
110 104
                     peaksAlignment(pD@rawdata[[run.i]],
111 105
                                    pD@rawdata[[run.j]],
112 106
                                    pD@rawrt[[run.i]],
113 107
                                    pD@rawrt[[run.j]],
114
-                                   usePeaks=usePeaks, timedf=NULL,
115
-                                   verbose=verbose, ...)
108
+                                   usePeaks = usePeaks, timedf = NULL,
109
+                                   verbose = verbose, ...)
116 110
             }
117
-            aligned[runs[i],runs[j]] <- aligned[runs[j],runs[i]] <- count
118
-            dist[j,i] <- dist[i,j] <- alignments[[count]]@dist
119
-	}
120
-    }
111
+            aligned[runs[i], runs[j]] <- aligned[runs[j], runs[i]] <- count
112
+            dist[j, i] <- dist[i, j] <- alignments[[count]]@dist
113
+          }
114
+  }
121 115
     merge <- hclust(as.dist(dist), method = "average")$merge
122 116
     merge.copy <- merge
123
-    for(i in 1:length(runs))
124
-        {merge[which(merge.copy == (-i))] <- (-runs[i])}
117
+    for (i in 1:length(runs)) {
118
+      merge[which(merge.copy == (-i))] <- (-runs[i])
119
+      }
125 120
     new("clusterAlignment", runs = runs, aligned = aligned,
126 121
         gap = alignments[[1]]@gap, D = alignments[[1]]@D, dist = dist,
127 122
         alignments = alignments, merge = merge)
... ...
@@ -170,38 +165,36 @@ function(object) {
170 165
 ##' @keywords classes
171 166
 ##' @examples
172 167
 ##' 
173
-##' require(gcspikelite)
174
-##' 
175
-##' ## paths and files
176
-##' gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
177
-##' cdfFiles <- dir(gcmsPath, "CDF", full=TRUE)
178
-##' eluFiles <- dir(gcmsPath, "ELU", full=TRUE)
179
-##' 
180
-##' ## read data
181
-##' pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,8.5))
182
-##' pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'),
183
-##'                    snthresh=3, fwhm=10,  step=0.1, steps=2, mzdiff=0.5)
184
-##' ca <- clusterAlignment(pd, metric = 1, D = 50, type = 1, gap = 0.5)
168
+#' require(gcspikelite)
169
+#' 
170
+#' # paths and files
171
+#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
172
+#' cdfFiles <- dir(gcmsPath, "CDF", full=TRUE)
173
+#' eluFiles <- dir(gcmsPath, "ELU", full=TRUE)
174
+#' 
175
+#' # read data, peak detection results
176
+#' pd <- peaksDataset(cdfFiles[1:2], mz=seq(50,550), rtrange=c(7.5,8.5))
177
+#' pd <- addAMDISPeaks(pd, eluFiles[1:2])
178
+#' 
179
+#' ca <- clusterAlignment(pd, gap=0.5, D=0.05, df=30, metric=1, type=1)
185 180
 ##' plotClustAlignment(ca, run = 1)
186 181
 ##' plotClustAlignment(ca, run = 2)
187
-##' plotClustAlignment(ca, run = 3) 
182
+##' plotClustAlignment(ca, run = 3)
188 183
 ##' 
189 184
 ##' @importFrom graphics plot
190 185
 ##' @export
191 186
 setMethod("plotClustAlignment", "clusterAlignment",
192
-          function(object, alignment = 1, ...)
193
-          {
187
+          function(object, alignment = 1, ...) {
194 188
               rn <- rownames(object@aligned)
195
-              for(i in alignment){
196
-                  ind <- which(object@aligned == i, arr.ind = TRUE)[2,]
189
+              for (i in alignment) {
190
+                  ind <- which(object@aligned == i, arr.ind = TRUE)[2, ]
197 191
                   plot(## object@alignments[[i]],
198 192
                       object@alignments[[i]]@v$match,
199 193
                       main = paste("D=", object@D, " gap=", object@gap,
200 194
                                    sep = ""),
201
-                      xlab = paste("Peaks ",rn[ind[1]], sep = " - "),
202
-                      ylab = paste("Peaks ",rn[ind[2]], sep = " - "),
195
+                      xlab = paste("Peaks ", rn[ind[1]], sep = " - "),
196
+                      ylab = paste("Peaks ", rn[ind[2]], sep = " - "),
203 197
                       ...)
204 198
               }
205 199
           }
206 200
           )
207
-
... ...
@@ -43,36 +43,29 @@
43 43
 #'
44 44
 #' @useDynLib flagme
45 45
 #' @export dp
46
-dp<-function(M,gap=.5,big=10000000000,verbose=FALSE) {
46
+dp <- function(M, gap = .5, big= 10000000000, verbose = FALSE) {
47 47
 
48 48
   # setup score matrix
49
-  bigr<-c(0,big*(1:ncol(M)))
50
-  bigc<-c(big*(1:nrow(M)))
51
-  D<-rbind(bigr,cbind(bigc,M)); #D[1,1]<-0
52
-  #bigr<-c(0,gap*(1:ncol(M)))
53
-  #bigc<-c(gap*(1:nrow(M)))
54
-  #D<-rbind(bigr,cbind(bigc,M)); #D[1,1]<-0
49
+  bigr <- c(0, big * (1:ncol(M)))
50
+  bigc <- c(big * (1:nrow(M)))
51
+  D <- rbind(bigr, cbind(bigc, M)); # D[1, 1] <-0
52
+  # bigr <- c(0, gap * (1:ncol(M)))
53
+  # bigc <- c(gap * (1:nrow(M)))
54
+  # D <- rbind(bigr, cbind(bigc, M)); # D[1, 1] <-0
55 55
   
56 56
   # setup traceback
57
-  phi<-matrix(0,nrow=nrow(D),ncol=ncol(D))
58
-  phi[1,]<-2; phi[,1]<-1; phi[1,1]<-3
59
-  
60
-  match<-matrix(-1,nrow=nrow(M)+ncol(M),ncol=2) # matrix to store matches
57
+  phi <- matrix(0, nrow = nrow(D), ncol = ncol(D))
58
+  phi[1, ] <- 2; phi[, 1] <- 1; phi[1, 1] <- 3
59
+  match <- matrix(-1, nrow = nrow(M) + ncol(M), ncol = 2) # matrix to store matches
61 60
 
62
-  out<-.C("dp", D=as.double(D), M=as.double(M), gap=as.double(gap), phi=as.integer(phi), 
63
-                nr=as.integer(nrow(M)), ncol=as.integer(ncol(M)),
64
-          match=as.integer(match), nmatch=as.integer(0),
65
-          PACKAGE="flagme"
66
-          ) 
61
+  out <- .C("dp", D = as.double(D), M = as.double(M), gap = as.double(gap), 
62
+    phi = as.integer(phi), nr = as.integer(nrow(M)), ncol = as.integer(ncol(M)),
63
+    match = as.integer(match), nmatch = as.integer(0), PACKAGE = "flagme")
67 64
   #list(D=matrix(out$D,ncol=ncol(D)),phi=matrix(out$phi,ncol=ncol(D)),match=1+matrix(out$match,ncol=2)[out$nmatch:1,])
68
-  list(match=1+matrix(out$match,ncol=2)[out$nmatch:1,])
65
+  list(match = 1 + matrix(out$match, ncol = 2)[out$nmatch:1, ])
69 66
 }
70 67
 
71 68
 
72
-
73
-
74
-
75
-
76 69
 #' dynRT
77 70
 #' 
78 71
 #' Dynamic Retention Time Based Alignment algorithm, given a similarity matrix
... ...
@@ -87,77 +80,72 @@ dp<-function(M,gap=.5,big=10000000000,verbose=FALSE) {
87 80
 #' @examples
88 81
 #' 
89 82
 #' require(gcspikelite)
90
-#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
91
-#' cdfFiles <- dir(gcmsPath,"CDF", full=TRUE)
92
-#' ## read data, peak detection results
93
-#' pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550),
94
-#'     rtrange=c(7.5,10.5))
95
-#' pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd,
96
-#'     peakPicking=c('mF'),snthresh=3, fwhm=10,  step=0.1, steps=2,
97
-#'     mzdiff=0.5, sleep=0)
83
+#' files <- list.files(path = paste(find.package("gcspikelite"), "data",
84
+#'                     sep = "/"),"CDF", full = TRUE)
85
+#' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
86
+#' ## create settings object
87
+#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
88
+#' cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
89
+#'  prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
90
+#'  extendLengthMSW = TRUE, mzCenterFun = "wMean")
91
+#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
92
+#'  multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
93
+#'  list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
94
+#' data
98 95
 #' ## review peak picking
99
-#' plotChrom(pd, rtrange=c(7.5, 10.5), runs=c(1:3))
96
+#' plotChrom(data, rtrange=c(7.5, 10.5), runs=c(1:2))
100 97
 #' ## similarity
101
-#' r <- ndpRT(pd@peaksdata[[1]], pd@peaksdata[[2]], pd@peaksrt[[1]],
102
-#'     pd@peaksrt[[2]], D=50)
98
+#' r <- ndpRT(data@peaksdata[[1]], data@peaksdata[[2]], data@peaksrt[[1]],
99
+#'     data@peaksrt[[2]], D = 50)
103 100
 #' ## dynamic retention time based alignment algorithm
104
-#' v <- dynRT(S=r)
101
+#' v <- dynRT(S = r)
105 102
 #' 
106 103
 #' @export dynRT
107
-dynRT <- function(S){
108
-    options(warn=-1)
104
+dynRT <- function(S) {
105
+    options(warn = -1)
109 106
     ## S similarity matrix
110 107
     ## move trought S to find the highest score
111
-    S <- round(S, digits=3)
112
-    id <- max(table(S))-1
108
+    S <- round(S, digits = 3)
109
+    id <- max(table(S)) - 1
113 110
     ## filling <- which(table(S) > 200)# weak
114 111
     filling <- which(table(S) > id)
115 112
     filling.names <- as.numeric(names(filling))
116
-    if(filling.names > 1)
117
-    {
113
+    if (filling.names > 1) {
118 114
         filling.names <- 1
119 115
     }
120 116
     trace <- c()
121 117
     ## this boolean workaround is to allow to use both the ndp
122 118
     ## function and the matching function from MR and RR 
123
-    if(filling.names == 1)
124
-    {
125
-        for(i in 1:nrow(S)){
126
-            trace[i] <- which(S[i,] == min(S[i,])) # MR
119
+    if (filling.names == 1) {
120
+        for (i in 1:nrow(S)) {
121
+            trace[i] <- which(S[i, ] == min(S[i, ])) # MR
127 122
         }
128 123
     }
129
-    if(filling.names == 0)
130
-    {
131
-        for(i in 1:nrow(S)){
132
-            trace[i] <- which(S[i,] == max(S[i,])) # RR
124
+    if (filling.names == 0) {
125
+        for (i in 1:nrow(S)) {
126
+            trace[i] <- which(S[i, ] == max(S[i, ])) # RR
133 127
         }
134 128
     }
135
-    trace.mtx <- matrix(NA, nrow=nrow(S), ncol=2)  
136
-    trace.mtx[,1] <- 1:nrow(S)
129
+    trace.mtx <- matrix(NA, nrow = nrow(S), ncol = 2)
130
+    trace.mtx[, 1] <- 1:nrow(S)
137 131
     trace[which(trace == 1)] <- NA
138
-    trace.mtx[,2] <- trace
132
+    trace.mtx[, 2] <- trace
139 133
     ## cercare i composti matchati piu di una volta e renderli univoci
140
-    idx <- which(table(trace.mtx[,2]) > 1) # colonne della matrice S
134
+    idx <- which(table(trace.mtx[, 2]) > 1) # colonne della matrice S
141 135
     names.idx <- as.numeric(names(idx)) # i doppioni
142 136
     position <- c() # i doppi con il match piu alto
143
-    
144
-    if(filling.names == 1)
145
-    {
146
-        for(k in 1:length(names.idx)){
147
-            position[k] <- which(S[,names.idx[k]] == min(S[,names.idx[k]]))
148
-        } 
137
+    if (filling.names == 1) {
138
+        for (k in 1:length(names.idx)) {
139
+            position[k] <- which(S[, names.idx[k]] == min(S[, names.idx[k]]))
140
+        }
149 141
     }
150
-    if(filling.names == 0)
151
-    {
152
-        for(k in 1:length(names.idx)){
153
-            position[k] <- which(S[,names.idx[k]] == max(S[,names.idx[k]]))
142
+    if (filling.names == 0) {
143
+        for (k in 1:length(names.idx)) {
144
+            position[k] <- which(S[, names.idx[k]] == max(S[, names.idx[k]]))
154 145
         }
155 146
     }
156
-    
157 147
     tutti <- which(trace %in% names.idx)
158
-    trace.mtx[,2][tutti[! tutti %in% position]] <- NA
159
-    
160
-    return(list(match=trace.mtx))
161
-    
162
-    options(warn=0)
148
+    trace.mtx[, 2][tutti[! tutti %in% position]] <- NA
149
+    return(list(match = trace.mtx))
150
+    options(warn = 0)
163 151
 }
... ...
@@ -1,9 +1,9 @@
1 1
 #' exportSpectra
2
-#' 
2
+#'
3 3
 #' Write the mass spectum into a .msp file to be used in NIST search.
4
-#' 
4
+#'
5 5
 #' Write the mass spectum into a .msp file to be used in NIST search.
6
-#' 
6
+#'
7 7
 #' @param object an object of class "peaksDataset"
8 8
 #' @param outList an object created using the gatherInfo() function
9 9
 #' @param spectra numeric. The number of the mass spectra to be printed. It
... ...
@@ -13,7 +13,7 @@
13 13
 #' @return a .msp file
14 14
 #' @author riccardo.romoli@@unifi.com
15 15
 #' @export exportSpectra
16
-exportSpectra <- function (object, outList, spectra, normalize = TRUE) 
16
+exportSpectra <- function (object, outList, spectra, normalize = TRUE)
17 17
 {
18 18
     spectra <- as.numeric(spectra)
19 19
     mz <- outList[[spectra]]$mz
... ...
@@ -21,7 +21,7 @@ exportSpectra <- function (object, outList, spectra, normalize = TRUE)
21 21
     if(normalize)
22 22
     {
23 23
         for(i in 1:ncol(abu)){
24
-            if(is.na(sum(abu[, i]))) 
24
+            if(is.na(sum(abu[, i])))
25 25
                 next
26 26
             abu[, i] <- 100 * abu[, i]/abu[which.max(abu[, i]), i]
27 27
         }
... ...
@@ -33,11 +33,10 @@ exportSpectra <- function (object, outList, spectra, normalize = TRUE)
33 33
     idx <- 1
34 34
     spec <- paste(mz, abu[,idx])
35 35
     msp <- rbind(paste("NAME: Variable", spectra),
36
-                 paste("COMMENT:", round(object@peaksrt[[idx]][spectra], digits = 2), "min"), 
37
-                 paste("FORMULA:"), paste("MW:"), paste("CAS:"), paste("SYNONYM:"), 
36
+                 paste("COMMENT:", round(object@peaksrt[[idx]][spectra], digits = 2), "min"),
37
+                 paste("FORMULA:"), paste("MW:"), paste("CAS:"), paste("SYNONYM:"),
38 38
                  paste("Num Peaks:", length(spec)), matrix(unlist(spec), nrow = length(unlist(spec)), ncol = 1))
39 39
 
40 40
     write(msp, file = paste0(spectra,".msp"), sep = "\n")
41
-    
42
-}
43 41
 
42
+}
44 43
new file mode 100644
... ...
@@ -0,0 +1,195 @@
1
+##' Read the mass spectra from an external msp file
2
+##'
3
+##' Read the mass spectra from an external file in msp format. The format is
4
+##' used in NIST search library database.
5
+##' @title importSpec
6
+##' @param file a .msp file from NIST search library database
7
+##' @return list conaining the mass spctra
8
+##' @author [email protected]
9
+##' @export
10
+importSpec <- function(file){
11
+    ## read msp lib
12
+    lib <- scan(file, what = "", sep = "\n", quiet = TRUE)
13
+    ## separate each mass spec
14
+    starts <- which(regexpr("[Nn][Aa][Mm][Ee]:", lib) == 1)
15
+    ends <- c(starts[-1] - 1, length(lib))
16
+    ## loop to extract the mass spec into a list
17
+    list.spec <- lapply(1:length(starts), function(z){
18
+        ## meta data
19
+#        browser()
20
+        comp <- lib[starts[z]:ends[z]]
21
+        numPeaks.idx <- which(regexpr("[Nn][Uu][Mm] [Pp][Ee][Aa][Kk][Ss]:", comp) == 1)
22
+        metaData <- comp[1:numPeaks.idx - 1]
23
+        md <- strsplit(metaData, split = ": ")
24
+        md1 <- sapply(md, "[[", 1)
25
+        md2 <- sapply(md, "[", 2)
26
+        metaData.list <- setNames(as.list(md2), md1)
27
+        ## mass spec
28
+        nlines <- length(comp)
29
+        npeaks <- as.numeric(strsplit(comp[numPeaks.idx], ":")[[1]][2])
30
+        peaks.idx <- (numPeaks.idx + 1):nlines
31
+        pks <- gsub("^ +", "", unlist(strsplit(comp[peaks.idx], ";")))
32
+        ## il separatore potrebbe essere anche (), va trovata una soluzione
33
+        pks.2 <- gsub("^ +", "", unlist(strsplit(comp[peaks.idx], "[()]")))
34
+gsub("\"", "", pks.2)
35
+
36
+        pks <- pks[pks != ""]
37
+        if (length(pks) != npeaks)
38
+        {
39
+            stop("Not the right number of peaks in compound", metaData.list$NAME)
40
+        }
41
+        ## error due to the presence of tab \t instead of a blank space
42
+        if(length(grep(pattern = "\t", pks)) > 0)
43
+        {
44
+            pklst <- strsplit(pks, "\t")
45
+            pklst <- lapply(pklst, function(x) x[x != ""])
46
+        }
47
+        else if(length(grep(pattern = "\\s", pks)) > 0)
48
+        {
49
+            pklst <- strsplit(pks, " ")
50
+            pklst <- lapply(pklst, function(x) x[x != ""])
51
+        }
52
+        else
53
+        {
54
+            cat("Some formatting errors in the msp library \n")
55
+        }
56
+
57
+        mz <- as.numeric(sapply(pklst, "[[", 1))
58
+        mz <- round(mz)
59
+        int <- as.numeric(sapply(pklst, "[[", 2)) # error
60
+        ##
61
+        finaltab <- matrix(c(mz, int), ncol = 2)
62
+
63
+        if (any(table(mz) > 1))
64
+        {
65
+            warning("Duplicate mass in compound ", metaData.list$NAME,
66
+                    " (CAS ", metaData.list$NAME, ")... summing up intensities")
67
+            finaltab <- aggregate(finaltab[,2],
68
+                                  by = list(finaltab[,1]),
69
+                                  FUN = sum)
70
+        }
71
+        colnames(finaltab) <- c("mz", "intensity")
72
+        c(metaData.list, list(spec = finaltab))
73
+    }
74
+    )
75
+    return(list.spec)
76
+}
77
+
78
+
79
+##' Calculate the distance between a reference mass spectrum
80
+##'
81
+##' Calculate the distance between a reference mass spectrum and one from the
82
+##' sample
83
+##' @title matchSpec
84
+##' @param spec1 reference mass spectrum
85
+##' @param outList the return of \code{\link{gatherInfo}}
86
+##' @param whichSpec the entry number of outList
87
+##' @return the distance between the reference mass spectrum and the others
88
+##' @author Riccardo Romoli
89
+##' @export
90
+matchSpec <- function(spec1, outList, whichSpec){
91
+  ## if(whichSpec == 143) browser()
92
+  ## first get the average spec from the gatherList
93
+  averageInt <-
94
+    apply(outList[[whichSpec]]$data, MARGIN = 1, FUN = mean, na.rm = TRUE)
95
+  ## outList[[whichSpec]]$data[,1:49] # si ma perch49?!?
96
+  ## normalize the intensity
97
+  normInt <- sapply(averageInt, function(x){
98
+    i <- which.max(averageInt)
99
+    (100*x)/averageInt[i]
100
+  }
101
+  )
102
+  ## combine mz and normalized intensity
103
+  pspec <- matrix(c(outList[[whichSpec]]$mz, normInt), ncol = 2)
104
+  colnames(pspec) <- c("mz", "intensity")
105
+  libSpec <- spec1$spec
106
+  ## merge the spectra to the same mz range and remove the NA
107
+  mztomerge <- data.frame(mz = min(c(min(libSpec[,"mz"], na.rm = TRUE),
108
+                                     min(pspec[,"mz"], na.rm = TRUE))) :
109
+                            max(c(max(libSpec[,"mz"], na.rm = TRUE),
110
+                                  max(pspec[,"mz"], na.rm = TRUE)))
111
+                          )
112
+  pspec.merged <- merge(pspec, mztomerge, by = "mz", all = TRUE)
113
+  libSpec.merged <- merge(libSpec, mztomerge, by = "mz", all = TRUE)
114
+  pspec.merged[is.na(pspec.merged)] <- 0
115
+  libSpec.merged[is.na(libSpec.merged)] <- 0
116
+  ## calculate the distance among the spectra
117
+  distance <- normDotProduct(as.matrix(pspec.merged[,"intensity"]),
118
+                             as.matrix(libSpec.merged[,"intensity"]))
119
+  return(distance)
120
+}
121
+
122
+
123
+##' The function calculate the distance between each mas spec in the msp file
124
+##' and the aligned mass spec from each sampe
125
+##'
126
+##' Return the distance matrix
127
+##' @title distToLib
128
+##' @param mspLib a .msp file from NIST
129
+##' @param outList an object from gatherInfo()
130
+##' @return the distance matrix between the mass spec and the aligned spec
131
+##' @author Riccardo Romoli
132
+##' @export
133
+distToLib <- function(mspLib, outList ){
134
+  mspDist <- lapply(1:length(mspLib),
135
+                    function(x)
136
+                    {
137
+                      ## trace
138
+                      cat(x, "\n")
139
+                      sapply(1:length(outList),
140
+                             function(y)
141
+                             {
142
+                               ## cat("y is = ", y, "\n") # for debug only
143
+                               matchSpec(mspLib[[x]], outList,
144
+                                         whichSpec = y)
145
+                             }
146
+                             )
147
+                    })
148
+  mtx.dist <- do.call(rbind, mspDist)
149
+  rownames(mtx.dist) <- sapply(mspLib, "[[", 1)
150
+  colnames(mtx.dist) <- sapply(1:length(outList),
151
+                               function(x)
152
+                               {
153
+                                 paste0("outListFold", x)
154
+                               }
155
+                               )
156
+  return(mtx.dist)
157
+}
158
+
159
+
160
+##' The head-to-tail-plot for the mass spectra
161
+##'
162
+##' Head-to-tail-plot to visually compare the mass spectra
163
+##' @title Head to tail plot
164
+##' @param specFromLib the mass spectra obtained from the .msp file
165
+##' @param specFromList the mass spectra obtained from \code{\link{gatherInfo}}
166
+##' @return the plot
167
+##' @author Riccardo Romoli
168
+##' @export
169
+headToTailPlot <- function(specFromLib, specFromList){
170
+    libSpec <- specFromLib$spec
171
+    pspec <- specFromList
172
+    ## get average and normalized intensity of the mass spec
173
+    averageInt <- apply(pspec$data, MARGIN = 1, FUN = mean, na.rm = TRUE)
174
+    ## normalize the intensity
175
+    normInt <- sapply(averageInt, function(x){
176
+        i <- which.max(averageInt)
177
+        (1000*x)/averageInt[i]
178
+    }
179
+    )
180
+    pspec.av <- data.frame(mz = pspec$mz, intensity = normInt)
181
+    ## merge the spectra to the same mz range and remove the NA
182
+    mztomerge <- data.frame(mz = min(c(min(libSpec[,"mz"], na.rm = TRUE),
183
+                                       min(pspec.av[,"mz"], na.rm = TRUE))) :
184
+                              max(c(max(libSpec[,"mz"], na.rm = TRUE),
185
+                                    max(pspec.av[,"mz"], na.rm = TRUE)))
186
+                            )
187
+    pspec.merged <- merge(pspec.av, mztomerge, by = "mz", all = TRUE)
188
+    libSpec.merged <- merge(libSpec, mztomerge, by = "mz", all = TRUE)
189
+    pspec.merged[is.na(pspec.merged)] <- 0
190
+    libSpec.merged[is.na(libSpec.merged)] <- 0
191
+    ## now the plot
192
+    plot(pspec.merged, type = "h", ylim = c(-1000, 1000), main = "Head to Tail Plot")
193
+    points(libSpec.merged[,"mz"], y = -libSpec.merged[,"intensity"]*10, col = 2,
194
+           type = "h")
195
+}
... ...
@@ -118,7 +118,7 @@ normDotProduct <- function (x1, x2, t1=NULL, t2=NULL, df=max(ncol(x1), ncol(x2))
118 118
 #' Retention Time Penalized Normalized Dot Product
119 119
 #' 
120 120
 #' This function calculates the similarity of all pairs of peaks from 2
121
-#' samples, using the spectra similarity and the rretention time differencies
121
+#' samples, using the spectra similarity and the retention time differencies
122 122
 #' 
123 123
 #' Computes the normalized dot product between every pair of peak vectors in
124 124
 #' the retention time window (\code{D})and returns a similarity matrix.
... ...
@@ -136,65 +136,64 @@ normDotProduct <- function (x1, x2, t1=NULL, t2=NULL, df=max(ncol(x1), ncol(x2))
136 136
 #' 
137 137
 #' ## Not Run
138 138
 #' require(gcspikelite)
139
-#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
140
-#' cdfFiles <- dir(gcmsPath,"CDF", full=TRUE)
141
-#' 
142
-#'                                         # read data, peak detection results
143
-#' pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,10.5))
144
-#' pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'),
145
-#'                    snthresh=3, fwhm=10,  step=0.1, steps=2, mzdiff=0.5,
146
-#'                    sleep=0)
139
+#' files <- list.files(path = paste(find.package("gcspikelite"), "data",
140
+#'                     sep = "/"),"CDF", full = TRUE)
141
+#' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
142
+#' ## create settings object
143
+#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
144
+#' cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
145
+#'  prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
146
+#'  extendLengthMSW = TRUE, mzCenterFun = "wMean")
147
+#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
148
+#'  multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
149
+#'  list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
150
+#' data
147 151
 #' ## review peak picking
148
-#' plotChrom(pd, rtrange=c(7.5, 10.5), runs=c(1:3))
152
+#' plotChrom(data, rtrange = c(7.5, 10.5), runs = c(1:2))
149 153
 #' 
150
-#' r <- ndpRT(pd@peaksdata[[1]], pd@peaksdata[[2]],
151
-#'            pd@peaksrt[[1]], pd@peaksrt[[2]], D=50)
154
+#' r <- ndpRT(data@peaksdata[[1]], data@peaksdata[[2]],
155
+#'            data@peaksrt[[1]], data@peaksrt[[2]], D = 50)
152 156
 #' ## End (Not Run)
153 157
 #' 
154 158
 #' @export ndpRT
155
-ndpRT <- function(s1, s2, t1, t2, D){
156
-    
159
+ndpRT <- function(s1, s2, t1, t2, D) {
157 160
     Normalize <- function(j){
158
-        n <- apply(j, 2, function(k){
161
+        n <- apply(j, 2, function(k) {
159 162
             m <- k[which.max(k)]
160
-            norm <- k/m*100
163
+            norm <- k / m * 100
161 164
         })
162 165
         return(n)
163 166
     }
164
-    
165
-    scoring <- function(s1, s2, t1, t2, D){
166
-        angle <- function(s1, s2){
167
-            theta <- acos(sum(s1*s2) / (sqrt(sum(s1 * s1)) * sqrt(sum(s2 * s2))))
168
-            theta <- 1-theta
169
-            if(theta < 0)
170
-            {
167
+    scoring <- function(s1, s2, t1, t2, D) {
168
+        angle <- function(s1, s2) {
169
+            theta <- acos(
170
+                sum(s1 * s2) / (sqrt(sum(s1 * s1)) * sqrt(sum(s2 * s2)))
171
+                )
172
+            theta <- 1 - theta
173
+            if(theta < 0) {
171 174
                 theta <- 0
172
-            }       
173
-            return(theta) 
175
+            }
176
+            return(theta)
174 177
         }
175
-        
176
-        rtPen <- function(t1, t2, D){
178
+        rtPen <- function(t1, t2, D) {
177 179
             ## D espresso in secondi
178
-            t1 <- t1/60 # trasformo in secondi
179
-            t2 <- t2/60 # trasformo in secondi
180
-            srt <- exp(-(((t1-t2)^2) / D^2)) # da articolo MR, modificato
180
+            t1 <- t1 / 60 # trasformo in secondi
181
+            t2 <- t2 / 60 # trasformo in secondi
182
+            srt <- exp(- (((t1 - t2)^2) / D^2)) # da articolo MR, modificato
181 183
             # era 2*D^2
182 184
             return(srt)
183 185
         }
184
-        
185 186
         score <- angle(s1, s2) * rtPen(t1, t2, D)
186 187
         return(score)
187 188
     }
188
-    
189 189
     s1 <- Normalize(s1)
190 190
     s2 <- Normalize(s2)
191
-    
192
-    res <- matrix(0, nrow=ncol(s1), ncol=ncol(s2))
193
-    for(i in 1:ncol(s1)){
194
-        for(j in 1:ncol(s2)){
195
-            res[i,j] <- scoring(s1[,i], s2[,j], t1[i], t2[j], D=D)
191
+    res <- matrix(0, nrow = ncol(s1), ncol = ncol(s2))
192
+    for (i in 1:ncol(s1)) {
193
+        for (j in 1:ncol(s2)) {
194
+            res[i, j] <- scoring(s1[, i], s2[, j], t1[i], t2[j], D = D)
196 195
         }
197
-    }    
196
+    }
198 197
     return(res)
199 198
 }
200 199
 
... ...
@@ -226,55 +225,58 @@ ndpRT <- function(s1, s2, t1, t2, D){
226 225
 #' 
227 226
 #' ## Not Run
228 227
 #' require(gcspikelite)
229
-#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
230
-#' cdfFiles <- dir(gcmsPath,"CDF", full=TRUE)
231
-#' ## read data, peak detection results
232
-#' pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,10.5))
233
-#' pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'),
234
-#'                    snthresh=3, fwhm=10,  step=0.1, steps=2, mzdiff=0.5,
235
-#'                    sleep=0)
228
+#' files <- list.files(path = paste(find.package("gcspikelite"), "data",
229
+#'                     sep = "/"),"CDF", full = TRUE)
230
+#' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
231
+#' ## create settings object
232
+#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
233
+#' cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
234
+#'  prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
235
+#'  extendLengthMSW = TRUE, mzCenterFun = "wMean")
236
+#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
237
+#'  multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
238
+#'  list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
239
+#' data
236 240
 #' ## review peak picking
237
-#' plotChrom(pd, rtrange=c(7.5, 10.5), runs=c(1:3))
241
+#' plotChrom(data, rtrange=c(7.5, 10.5), runs=c(1:2))
238 242
 #' 
239
-#' r <- corPrt(pd@peaksdata[[1]], pd@peaksdata[[2]],
240
-#'            pd@peaksrt[[1]], pd@peaksrt[[2]], D=50, penality=0.2)
243
+#' r <- corPrt(data@peaksdata[[1]], data@peaksdata[[2]],
244
+#'            data@peaksrt[[1]], data@peaksrt[[2]], D = 50, penality = 0.2)
241 245
 #' ## End (Not Run)
242 246
 #'
243 247
 #' @importFrom stats complete.cases
244 248
 #' @export corPrt
245
-corPrt <- function(d1, d2, t1, t2, D, penality=0.2){
249
+corPrt <- function(d1, d2, t1, t2, D, penality = 0.2) {
246 250
     D <- as.numeric(D) # time window in second
247 251
     pn <- as.numeric(penality)# penality if out of time window
248
-    pearson <- function(x,y){
252
+    pearson <- function(x,y) {
249 253
         size <- length(x)
250
-        cfun <- .C("pearson", size=as.integer(size), x=as.double(x),
251
-                   y=as.double(y), result=double(1), PACKAGE='flagme')
254
+        cfun <- .C("pearson", size = as.integer(size), x = as.double(x),
255
+                   y = as.double(y), result = double(1), PACKAGE = 'flagme')
252 256
         return(cfun[["result"]])
253 257
     }
254
-    Normalize <- function(j){
255
-        n <- apply(j, 2, function(k){
258
+    Normalize <- function(j) {
259
+        n <- apply(j, 2, function(k) {
256 260
             m <- k[which.max(k)]
257
-            norm <- k/m*100
261
+            norm <- k / m * 100
258 262
         })
259 263
     }
260 264
     Rank <- function(u) {
261
-        if (length(u) == 0L) 
265
+        if (length(u) == 0L)
262 266
             u
263 267
         else if (is.matrix(u)) {
264
-            if (nrow(u) > 1L) 
265
-                apply(u, 2L, rank, na.last="keep")
268
+            if (nrow(u) > 1L)
269
+                apply(u, 2L, rank, na.last = "keep")
266 270
             else row(u)
267 271
         }
268
-        else rank(u, na.last="keep")
272
+        else rank(u, na.last = "keep")
269 273
     }
270
-    #
271 274
         x <- Normalize(d1)
272 275
         y <- Normalize(d2)
273
-    
274 276
     ## method <- c("pearson", "kendall", "spearman")
275 277
     ncx <- ncol(x)
276 278
     ncy <- ncol(y)
277
-    r <- matrix(0, nrow=ncx, ncol=ncy)
279
+    r <- matrix(0, nrow = ncx, ncol = ncy)
278 280
     for (i in seq_len(ncx)) {
279 281
         for (j in seq_len(ncy)) {
280 282
             x2 <- x[, i]
... ...
@@ -283,24 +285,20 @@ corPrt <- function(d1, d2, t1, t2, D, penality=0.2){
283 285
             x2 <- rank(x2[ok])
284 286
             y2 <- rank(y2[ok])
285 287
             ## insert rt penality in seconds
286
-            rtDiff <- t1[i]*60 - t2[j]*60 # retention time in seconds
288
+            rtDiff <- t1[i] * 60 - t2[j] * 60 # retention time in seconds
287 289
             rtDiff <- abs(rtDiff)
288 290
             r[i, j] <- if (any(ok))
289
-                           if(rtDiff <= D)
291
+                           if (rtDiff <= D)
290 292
                                pearson(x2, y2)
291 293
                            else 
292 294
                                pearson(x2, y2) - pn
293 295
                        else 0
294 296
         }
295 297
     }
296
-    
297
-    r <- apply(r, MARGIN=c(1,2), function(x){
298
-        if(x < 0.2)
299
-        {
298
+    r <- apply(r, MARGIN = c(1, 2), function(x) {
299
+        if (x < 0.2) {
300 300
             x <- 0
301
-        }
302
-        else
303
-        {
301
+        } else {
304 302
             x <- x
305 303
         }
306 304
     })
... ...
@@ -138,36 +138,39 @@ setMethod("show","peaksAlignment",
138 138
 #' ## see clusterAlignment, it calls peaksAlignment
139 139
 #' 
140 140
 #' ## Not Run:
141
-#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
142
-#' cdfFiles <- dir(gcmsPath,"CDF", full=TRUE)
143
-#' 
144
-#' # read data, peak detection results
145
-#' pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,10.5))
146
-#' pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'),
147
-#'                    snthresh=3, fwhm=10,  step=0.1, steps=2, mzdiff=0.5,
148
-#'                    sleep=0)
149
-#' ## review peak picking
150
-#' plotChrom(pd, rtrange=c(7.5, 10.5), runs=c(1:3))
141
+#' files <- list.files(path = paste(find.package("gcspikelite"), "data",
142
+#'                     sep = "/"),"CDF", full = TRUE)
143
+#' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
144
+#' ## create settings object
145
+#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
146
+#' cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
147
+#'  prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
148
+#'  extendLengthMSW = TRUE, mzCenterFun = "wMean")
149
+#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
150
+#'  multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
151
+#'  list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
152
+#' data
153
+#' plotChrom(data, rtrange=c(7.5, 10.5), runs=c(1:2))
151 154
 #' 
152 155
 #' ## align two chromatogram
153
-#' pA <- peaksAlignment(pd@peaksdata[[1]], pd@peaksdata[[2]],
154
-#'                      pd@peaksrt[[1]], pd@peaksrt[[2]], D=50,
155
-#'                      metric=3, compress=FALSE, type=2, penality=0.2)
156
+#' pA <- peaksAlignment(data@peaksdata[[1]], data@peaksdata[[2]],
157
+#'                      data@peaksrt[[1]], data@peaksrt[[2]], D = 50,
158
+#'                      metric = 3, compress = FALSE, type = 2, penality = 0.2)
156 159
 #' 
157 160
 #' plotAlignment(pA)
158 161
 #' pA@v$match
159 162
 #' 
160 163
 #' par(mfrow=c(2,1))
161
-#' plot(pd@peaksdata[[1]][,15], type='h', main=paste(pd@peaksrt[[1]][[15]]))
162
-#' plot(pd@peaksdata[[2]][,17], type='h',
163
-#'      main=paste(pd@peaksrt[[2]][[17]]))
164
+#' plot(data@peaksdata[[1]][,15], type = 'h', main = paste(data@peaksrt[[1]][[15]]))
165
+#' plot(data@peaksdata[[2]][,17], type = 'h',
166
+#'      main = paste(data@peaksrt[[2]][[17]]))
164 167
 #' ## End (Not Run)
165 168
 #'
166 169
 #' @export
167
-peaksAlignment <- function(d1, d2, t1, t2, gap=0.5, D=50,
168
-                           timedf=NULL, df=30, verbose=TRUE, 
169
-                           usePeaks=TRUE, compress=TRUE, metric=2,
170
-                           type=2, penality=0.2){
170
+peaksAlignment <- function(d1, d2, t1, t2, gap = 0.5, D = 50,
171
+                           timedf = NULL, df = 30, verbose = TRUE, 
172
+                           usePeaks = TRUE, compress = TRUE, metric = 2,
173
+                           type = 2, penality = 0.2) {
171 174
 
172 175
     ## r <- switch(metric,
173 176
     ##             normDotProduct(d1,d2,t1,t2,D=D,
... ...
@@ -190,48 +193,47 @@ peaksAlignment <- function(d1, d2, t1, t2, gap=0.5, D=50,
190 193
     ##     D <- D/100 
191 194
     ## }
192 195
     r <- switch(metric,
193
-                normDotProduct(d1, d2, t1, t2, D=D,
194
-                               df=df+abs(ncol(d1)-ncol(d2)),
195
-                               timedf=timedf, verbose=verbose),
196
-                ndpRT(s1=d1, s2=d2, t1, t2, D=D),
197
-                corPrt(d1, d2, t1, t2, D=D, penality=penality)
196
+                normDotProduct(d1, d2, t1, t2, D = D,
197
+                               df = df + abs(ncol(d1) - ncol(d2)),
198
+                               timedf = timedf, verbose = verbose),
199
+                ndpRT(s1 = d1, s2 = d2, t1, t2, D = D),
200
+                corPrt(d1, d2, t1, t2, D = D, penality = penality)
198 201
                 )
199 202
     
200 203
     r[is.nan(r)] <- 1 ## remove NaN
201 204
 
202
-    if(type == 1)
203
-    {
205
+    if (type == 1) {
204 206
         if(verbose) 
205 207
             cat("[peaksAlignment] Comparing", ncol(d1), "peaks to", 
206 208
                 ncol(d2), "peaks -- gap=", gap, "D=", D, ', metric=',
207 209
                 metric, ', type=', type, "\n")
208
-        v <- dp(r, gap=gap, verbose=verbose) # dynamic programming
210
+        v <- dp(r, gap = gap, verbose = verbose) # dynamic programming
209 211
     }
210 212
     
211
-    if(type == 2)
212
-    {
213
+    if (type == 2) {
213 214
         if(verbose) 
214 215
             cat("[peaksAlignment] Comparing", ncol(d1), "peaks to", 
215 216
                 ncol(d2), "peaks -- D=", D, "seconds,", 'metric=',
216 217
                 metric, ', type=', type, '\n')
217
-        v <- dynRT(S=r) # RR, modified to be used with normDotProduct()
218
+        v <- dynRT(S = r) # RR, modified to be used with normDotProduct()
218 219
     }
219 220
     
220
-    v$match <- v$match[!is.na(v$match[,2]),] # remove non-matched peaks
221
+    v$match <- v$match[!is.na(v$match[,2]), ] # remove non-matched peaks
221 222
     
222 223
     sim <- 0
223
-    for(i in 1:nrow(v$match)){
224
+    for(i in 1:nrow(v$match)) {
224 225
         sim <- sim + r[v$match[i, 1], v$match[i, 2]]#
225 226
     }
226
-    sim <- sim/nrow(v$match)
227
-    if(verbose) 
228
-        cat("[peaksAlignment] ", nrow(v$match), "matched.  Similarity=", 
229
-            sim, "\n")
230
-    object <- new("peaksAlignment", v=v, r=r, dist=sim, 
231
-                  compressed=FALSE, gap=gap, D=D)
232
-    if(compress){
227
+    sim <- sim / nrow(v$match)
228
+    if(verbose) {
229
+        cat("[peaksAlignment] ", nrow(v$match), "matched.  Similarity=",
230
+        sim, "\n")
231
+        }
232
+    object <- new("peaksAlignment", v = v, r = r, dist = sim, 
233
+                  compressed = FALSE, gap = gap, D = D)
234
+    if(compress) {
233 235
         compress(object)
234
-    }else{
236
+    } else {
235 237
         object
236 238
     }
237 239
 }
... ...
@@ -267,23 +269,24 @@ peaksAlignment <- function(d1, d2, t1, t2, gap=0.5, D=50,
267 269
 #' @examples
268 270
 #'
269 271
 #' require(gcspikelite)
270
-#'
271
-#' ## paths and files
272
-#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
273
-#' cdfFiles <- dir(gcmsPath, "CDF", full=TRUE)
274
-#' eluFiles <- dir(gcmsPath, "ELU", full=TRUE)
275
-#'
276
-#' ## read data
277
-#' pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,8.5))
278
-#' pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'),
279
-#'                    snthresh=3, fwhm=10,  step=0.1, steps=2, mzdiff=0.5)
280
-#'
272
+#' files <- list.files(path = paste(find.package("gcspikelite"), "data",
273
+#'                     sep = "/"),"CDF", full = TRUE)
274
+#' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
275
+#' ## create settings object
276
+#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
277
+#' cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
278
+#'  prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
279
+#'  extendLengthMSW = TRUE, mzCenterFun = "wMean")
280
+#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
281
+#'  multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
282
+#'  list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
283
+#' data
281 284
 #' ## image plot
282
-#' plotChrom(pd, rtrange=c(7.5,8.5), plotPeaks=TRUE, plotPeakLabels=TRUE)
285
+#' plotChrom(data, rtrange = c(7.5,8.5), plotPeaks = TRUE, plotPeakLabels =TRUE)
283 286
 #'
284 287
 #' ## align two chromatogram
285
-#' pA <- peaksAlignment(pd@peaksdata[[1]], pd@peaksdata[[2]],
286
-#'                      pd@peaksrt[[1]], pd@peaksrt[[2]], D = 50,
288
+#' pA <- peaksAlignment(data@peaksdata[[1]], data@peaksdata[[2]],
289
+#'                      data@peaksrt[[1]], data@peaksrt[[2]], D = 50,
287 290
 #'                      compress = FALSE, type = 1, metric = 1,
288 291
 #'                      gap = 0.5)
289 292
 #' plotAlignment(pA)
... ...
@@ -16,53 +16,52 @@
16 16
 #' @author riccardo.romoli@@unifi.it
17 17
 #' @examples
18 18
 #' 
19
-#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
20
-#' cdfFiles <- dir(gcmsPath,"CDF", full=TRUE)
21
-#' # read data, peak detection results
22
-#' pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,10.5))
23
-#' pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'),
24
-#'                    snthresh=3, fwhm=10,  step=0.1, steps=2, mzdiff=0.5,
25
-#'                    sleep=0)
19
+#' files <- list.files(path = paste(find.package("gcspikelite"), "data",
20
+#'                     sep = "/"),"CDF", full = TRUE)
21
+#' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
22
+#' ## create settings object
23
+#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
24
+#' cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
25
+#'  prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
26
+#'  extendLengthMSW = TRUE, mzCenterFun = "wMean")
27
+#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
28
+#'  multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
29
+#'  list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
30
+#' data
26 31
 #' ## align two chromatogram
27
-#' pA <- peaksAlignment(pd@peaksdata[[1]], pd@peaksdata[[2]],
28
-#'                      pd@peaksrt[[1]], pd@peaksrt[[2]], D=50,
29
-#'                      metric=3, compress=FALSE, type=2, penality=0.2)
32
+#' pA <- peaksAlignment(data@peaksdata[[1]], data@peaksdata[[2]],
33
+#'                      data@peaksrt[[1]], data@peaksrt[[2]], D = 50,
34
+#'                      metric = 3, compress = FALSE, type = 2, penality = 0.2)
30 35
 #' pA@v$match
31 36
 #' ## plot the mass spectra
32 37
 #' par(mfrow=c(2,1))
33
-#' plotFrags(object=pd, sample=1, specID=10)
34
-#' plotFrags(object=pd, sample=2, specID=12)
38
+#' plotFrags(object=data, sample=1, specID=10)
39
+#' plotFrags(object=data, sample=2, specID=12)
35 40
 #' 
36 41
 #' @export plotFrags
37
-plotFrags <- function(object, sample, specID, normalize = TRUE, ...){
42
+plotFrags <- function(object, sample, specID, normalize = TRUE, ...) {
38 43
     sp <- as.numeric(specID)
39
-    ## sample <- object@files[sample]
40
-    ## i <- grep(pattern = sample, object@files)
41
-    if(length(sp) > 0){
42
-        y <- object@peaksdata[[sample]][,sp]        
43
-        if(normalize){
44
-            y <- sapply(y, function(x){
44
+    if (length(sp) > 0) {
45
+        y <- object@peaksdata[[sample]][, sp]
46
+        if (normalize) {
47
+            y <- sapply(y, function(x) {
45 48
                 i <- which.max(y)
46
-                (100*x)/y[i]}
47
-                )}   
48
-        plot(y,
49
-             type = 'h',
50
-             main = paste('Sample', names(object@peaksdata[sample]), 'RT', object@peaksrt[[sample]][sp], 'min'),
51
-             xlab = 'm/z',
49
+                (100 * x) / y[i]}
50
+                )
51
+                }
52
+        plot(y, type = "h", main = paste("Sample",
53
+            names(object@peaksdata[sample]), "RT",
54
+            object@peaksrt[[sample]][sp], "min"), xlab = "m/z",
52 55
              ylab =
53
-                 if(normalize)
54
-                 {
55
-                     'Rel. Abundance'
56
-                 }
57
-                 else
58
-                 {
59
-                     'Abs. Abundance'
56
+                 if (normalize) {
57
+                     "Rel. Abundance"
58
+                 } else {
59
+                     "Abs. Abundance"
60 60
                  }, ...)
61
-    }
62
-    else
63
-    {
64
-        stop(paste('The spectrum is not present in the sample', object@files[sample], '\n'))
65
-    }
61
+        } else {
62
+        stop(paste("The spectrum is not present in the sample",
63
+        object@files[sample], '\n'))
64
+        }
66 65
 }
67 66
 
68 67
 
... ...
@@ -89,41 +88,42 @@ plotFrags <- function(object, sample, specID, normalize = TRUE, ...){
89 88
 #' @keywords gatherInfo() plot()
90 89
 #' @examples
91 90
 #' 
92
-#' ## Rd workflow
93
-#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep = "/")
94
-#' cdfFiles <- dir(gcmsPath,"CDF", full = TRUE)
95
-#' 
96
-#' # read data, peak detection results
97
-#' pd <- peaksDataset(cdfFiles[1:4], mz = seq(50,550), rtrange = c(7.5,10.5))
98
-#' pd <- addXCMSPeaks(files = cdfFiles[1:4], object = pd, peakPicking = c('mF'),
99
-#'                    snthresh = 2, fwhm = 8,  step = 0.5, steps = 2, mzdiff = 0.5,
100
-#'                    sleep = 0)
91
+#' files <- list.files(path = paste(find.package("gcspikelite"), "data",
92
+#'                     sep = "/"),"CDF", full = TRUE)
93
+#' data <- peaksDataset(files[1:4], mz = seq(50, 550), rtrange = c(7.5, 8.5))
94
+#' ## create settings object
95
+#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
96
+#' cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
97
+#'  prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
98
+#'  extendLengthMSW = TRUE, mzCenterFun = "wMean")
99
+#' data <- addXCMSPeaks(files[1:4], data, settings = mfp, minintens = 100,
100
+#'  multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
101
+#'  list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
102
+#' data
101 103
 #' ## multiple alignment
102
-#' ma <- multipleAlignment(pd, c(1,1,2,2), wn.gap = 0.5, wn.D = 0.05, bw.gap = 0.6, 
103
-#'                         bw.D = 0.2, usePeaks = TRUE, filterMin = 1, df = 50,
104
-#'                         verbose = TRUE, metric = 2, type = 2)
104
+#' ma <- multipleAlignment(data, c(1,1,2,2), wn.gap = 0.5, wn.D = 0.05, 
105
+#'  bw.gap = 0.6, bw.D = 0.2, usePeaks = TRUE, filterMin = 1, df = 50,
106
+#'  verbose = TRUE, metric = 2, type = 2)
105 107
 #' 
106 108
 #' ## gather apex intensities
107
-#' gip <- gatherInfo(pd, ma)
109
+#' gip <- gatherInfo(data, ma)
108 110
 #' gip[[33]]
109
-#' plotAlignedFrags(object = pd, outList = gip, specID = 33)
111
+#' plotAlignedFrags(object = data, outList = gip, specID = 33)
110 112
 #' 
111 113
 #' @export plotAlignedFrags
112
-plotAlignedFrags <- function(object, outList, specID, fullRange = TRUE, normalize = TRUE, ...)
113
-{
114
-specID <- as.numeric(specID)
115
-mz <- outList[[specID]]$mz
116
-abundance <- outList[[specID]]$data
117
-
118
-if(normalize)
119
-{
120
-    for(i in 1:ncol(abundance))
121
-    {
122
-        if(is.na(sum(abundance[,i])))
123
-            next
124
-        abundance[,i] <- 100 * abundance[,i] / abundance[which.max(abundance[,i]), i]
125
-    }
126
-}   
114
+plotAlignedFrags <- function(object, outList, specID, fullRange = TRUE, 
115
+    normalize = TRUE, ...) {
116
+        specID <- as.numeric(specID)
117
+        mz <- outList[[specID]]$mz
118
+        abundance <- outList[[specID]]$data
119
+        if (normalize) {
120
+            for(i in 1:ncol(abundance)) {
121
+                if(is.na(sum(abundance[,i])))
122
+                next
123
+                abundance[,i] <- 100 * abundance[,i] / 
124
+                    abundance[which.max(abundance[,i]), i]
125
+                }
126
+            }
127 127
 
128 128
 ## set the plot grid
129 129
 specnum <- table(apply(abundance, 2, sum)) # count number of massSpec different from NA
... ...
@@ -1,27 +1,34 @@
1
-##' Compress method for progressiveAlignment
1
+##' Decompress method for progressiveAlignment
2 2
 ##'
3
-##' Compress method for progressiveAlignment
3
+##' Decompress method for progressiveAlignment
4 4
 ##' @title Compress method for progressiveAlignment
5
-##' @param object dummy 
6
-##' @param verbose dummy
5
+##' @param object progressiveAlignment object
6
+##' @param verbose logical
7 7
 ##' @param ... dummy
8 8
 ##' @author MR
9
+##' @importFrom SparseM as.matrix
10
+##' @import methods
11
+##' @importFrom methods setMethod new
9 12
 ##' @keywords internal
10 13
 setMethod("decompress", "progressiveAlignment",
11
-          function(object, verbose=TRUE, ...){
12
-              if(object@merges[[1]]$compressed == FALSE) {
13
-                  if(verbose)
14
-                      cat("[decompress.progressiveAlignment] Already decompressed.\n")
14
+          function(object, verbose = TRUE, ...) {
15
+              if (object@merges[[1]]$compressed == FALSE) {
16
+                  if (verbose)
17
+                      cat("[decompress.progressiveAlignment] 
18
+                      Already decompressed.\n")
15 19
                   return(object)
16 20
               }
17
-              for(i in 1:length(object@merges)) {
18
-                  if(object@merges[[i]]$compressed) {
19
-                      object@merges[[i]]$r <- 1-as.matrix(object@merges[[i]]$r)
21
+              for (i in 1:length(object@merges)) {
22
+                  if (object@merges[[i]]$compressed) {
23
+                      object@merges[[i]]$r <- 
24
+                        1 - as.matrix(object@merges[[i]]$r)
20 25
                       object@merges[[i]]$compressed <- FALSE
21 26
                   }
22 27
               }
23 28
               new("progressiveAlignment", object)
24
-          })
29
+          }
30
+          )
31
+
25 32
 
26 33
 ##' Decompress method for progressiveAlignment
27 34
 ##'
... ...
@@ -30,24 +37,29 @@ setMethod("decompress", "progressiveAlignment",
30 37
 ##' @param object dummy
31 38
 ##' @param verbose dummy
32 39
 ##' @param ... dummy
33
-##' @keywords internal
34 40
 ##' @author MR
35 41
 ##' @importFrom SparseM as.matrix.csc
36
-setMethod("compress","progressiveAlignment",
37
-          function(object,verbose=TRUE,...) {
38
-              if(object@merges[[1]]$compressed) {
39
-                  if(verbose)
40
-                      cat("[compress.progressiveAlignment] Already compressed.\n")
42
+##' @import methods
43
+##' @importFrom methods setMethod new
44
+##' @keywords internal
45
+setMethod("compress", "progressiveAlignment",
46
+          function(object, verbose = TRUE, ...) {
47
+              if (object@merges[[1]]$compressed) {
48
+                  if (verbose)
49
+                      cat("[compress.progressiveAlignment]
50
+                      Already compressed.\n")
41 51
                   return(object)
42 52
               }
43
-              for(i in 1:length(object@merges)) {
44
-                  if(!(object@merges[[i]]$compressed)) {
45
-                      object@merges[[i]]$r <- as.matrix.csc(1-object@merges[[i]]$r)
53
+              for (i in 1:length(object@merges)) {
54
+                  if (!(object@merges[[i]]$compressed)) {
55
+                      object@merges[[i]]$r <-
56
+                        as.matrix.csc(1 - object@merges[[i]]$r)
46 57
                       object@merges[[i]]$compressed <- TRUE
47 58
                   }
48 59
               }
49
-              new("progressiveAlignment",object)
50
-          })
60
+              new("progressiveAlignment", object)
61
+          }
62
+          )
51 63
 
52 64
 
53 65
 ##' Show method for progressiveAlignment object
... ...
@@ -57,19 +69,19 @@ setMethod("compress","progressiveAlignment",
57 69
 ##' @author MR
58 70
 ##' @export
59 71
 ##' @noRd
60
-setMethod("show","progressiveAlignment",
61
-function(object){
62
-   cat("An object of class \"", class(object), "\"\n", sep="")
72
+setMethod("show", "progressiveAlignment", function(object){
73
+   cat("An object of class \"", class(object), "\"\n", sep = "")
63 74
    cat(length(object@merges), "merges\n")
64
-})
75
+}
76
+)
65 77
 
66 78
 
67 79
 
68 80
 #' Data Structure for progressive alignment of many GCMS samples
69
-#' 
81
+#'
70 82
 #' Performs a progressive peak alignment (clustalw style) of multiple GCMS peak
71 83
 #' lists
72
-#' 
84
+#'
73 85
 #' The progressive peak alignment we implemented here for multiple GCMS peak
74 86
 #' lists is analogous to how \code{clustalw} takes a set of pairwise sequence
75 87
 #' alignments and progressively builds a multiple alignment.  More details can
... ...
@@ -97,237 +109,188 @@ function(object){
97 109
 #' of Melbourne.
98 110
 #' @keywords classes
99 111
 #' @examples
100
-#' 
112
+#'
101 113
 #' require(gcspikelite)
102
-#' ## paths and files
103
-#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
104
-#' cdfFiles <- dir(gcmsPath, "CDF", full=TRUE)
105
-#' eluFiles <- dir(gcmsPath, "ELU", full=TRUE)
106
-#' 
107
-#' ## read data, peak detection results
108
-#' pd <- peaksDataset(cdfFiles[1:2], mz=seq(50,550), rtrange=c(7.5,8.5))
109
-#' pd <- addAMDISPeaks(pd, eluFiles[1:2])
110
-#' 
111
-#' ca <- clusterAlignment(pd, gap=.5, D=.05, df=30, metric=1, type=1,
112
-#'                        compress = FALSE)
113
-#' pa <- progressiveAlignment(pd, ca, gap=.6, D=.1, df=30, type=1, compress = FALSE)
114
+#' files <- list.files(path = paste(find.package("gcspikelite"), "data",
115
+#'                     sep = "/"),"CDF", full = TRUE)
116
+#' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
117
+#' ## create settings object
118
+#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
119
+#' cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
120
+#'  prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
121
+#'  extendLengthMSW = TRUE, mzCenterFun = "wMean")
122
+#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
123
+#'  multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
124
+#'  list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
125
+#' data
126
+#' ca <- clusterAlignment(data, gap = 0.5, D = 0.05, df = 30, metric = 1,
127
+#'   type = 1, compress = FALSE)
128
+#' pa <- progressiveAlignment(data, ca, gap = 0.6, D = 0.1, df = 30,
129
+#'  type = 1, compress = FALSE)
114 130
 #'
115 131
 #' @export
116 132
 progressiveAlignment <- function(pD, cA, D = 50, gap = 0.5, verbose = TRUE,
117 133
                                  usePeaks = TRUE, df = 30, compress = FALSE,
118
-                                 type=2)
119
-{
120
-    ## options(error = recover)
134
+                                 type = 2) {
121 135
     m <- cA@merge
122 136
     merges <- vector("list", nrow(m))
123
-    if(usePeaks)
124
-    {
125
-        pd <- pD@peaksdata
126
-    }
127
-    else
128
-    {
129
-        pd <- pD@rawdata
137
+    if (usePeaks) {
138
+      pd <- pD@peaksdata
139
+    } else {
140
+      pd <- pD@rawdata
130 141
     }
131 142
     pD <- NULL #?
132 143
     cA <- decompress(cA, verbose = verbose)
133
-    
134
-    for(i in 1:nrow(m))
135
-    {
136
-        if(verbose)
137
-        {
138
-            cat("[progressiveAlignment] Doing merge", m[i,], "\n")
139
-        }
140
-        
141
-        ## left  
142
-        if(m[i,1] < 0)
143
-        {
144
-            left.runs <- (-m[i,1])
145
-            left.ind <- matrix(1:ncol(pd[[left.runs]]), ncol = 1)
146
-        }
147
-        else
148
-        {
149
-            left.runs <- merges[[m[i,1]]]$runs
150
-            left.ind <- merges[[m[i,1]]]$ind
151
-        }
152
-  
153
-        ## right
154
-        if(m[i,2] < 0)
155
-        {
156
-            right.runs <- abs(m[i,2])
157
-            right.ind <- matrix(1:ncol(pd[[right.runs]]), ncol = 1) ## error
158
-            ## subscript out of bound
144
+
145
+    for (i in 1:nrow(m)) {
146
+      if (verbose) {
147
+            cat("[progressiveAlignment] Doing merge", m[i, ], "\n")
159 148
         }
160
-        else
161
-        {
162
-            right.ind <- merges[[m[i,2]]]$ind
163
-            right.runs <- merges[[m[i,2]]]$runs
149
+      ## left
150
+      if (m[i, 1] < 0) {
151
+        left.runs <- (-m[i, 1])
152
+        left.ind <- matrix(1:ncol(pd[[left.runs]]), ncol = 1)
153
+        } else {
154
+          left.runs <- merges[[m[i, 1]]]$runs
155
+          left.ind <- merges[[m[i, 1]]]$ind
164 156
         }
165
-	
166
-	  if(verbose)
167
-      {
157
+      ## right
158
+      if (m[i, 2] < 0) {
159
+        right.runs <- abs(m[i, 2])
160
+        # subscript out of bound
161
+        right.ind <- matrix(1:ncol(pd[[right.runs]]), ncol = 1)
162
+        } else {
163
+          right.ind <- merges[[m[i, 2]]]$ind
164
+          right.runs <- merges[[m[i, 2]]]$runs
165
+          }
166
+      if (verbose) {
168 167
         cat("[progressiveAlignment] left.runs:", left.runs, ", ")
169 168
         cat("right.runs:", right.runs, "\n")
170 169
       }
171
-	
172
-	  if(length(right.runs) == 1 & length(left.runs) == 1)
173
-      {
174
-        al <- cA@alignments[[cA@aligned[left.runs,right.runs]]]
175
-        v <- al@v
176
-        sim <- al@r
177
-        mi <- .merge.indices(nrow(left.ind), nrow(right.ind), v$match)
178
-        new.ind <- mi
179
-      }
180
-    else
181
-      {
170
+      if (length(right.runs) == 1 & length(left.runs) == 1) {
171
+      al <- cA@alignments[[cA@aligned[left.runs, right.runs]]]
172
+      v <- al@v
173
+      sim <- al@r
174
+      mi <- .merge.indices(nrow(left.ind), nrow(right.ind), v$match)
175
+      new.ind <- mi
176
+      } else {
182 177
         sim <- matrix(0, nrow = nrow(left.ind), ncol = nrow(right.ind))
183 178
         count <- matrix(0, nrow = nrow(left.ind), ncol = nrow(right.ind))
184
-        if(verbose)
185
-          {
186
-            cat("[progressiveAlignment] (dot=50) going to", nrow(sim), ":")
179
+        if (verbose) {
180
+          cat("[progressiveAlignment] (dot=50) going to", nrow(sim), ":")
187 181
           }
188
-        for(r in 1:nrow(sim))
189
-        {
190
-          if(verbose)
191
-            {
192
-              if(r %% 50 == 0)
193
-                {
194
-                  cat(".")
182
+          for (r in 1:nrow(sim)) {
183
+            if (verbose) {
184
+              if(r %% 50 == 0) {
185
+                cat(".")
195 186
                 }
196
-            }
197
-          for(cc in max(1, r - df) : min(r + df, ncol(sim)))
198
-          {
199
-          # cc generate subsrcipt out of bound
200
-            if(cc > dim(sim)[2])
201
-            {
202
-              cat("\n\n", "Try to increase the df parameter to get a better alignment", "\n\n")
187
+                }
188
+                for (cc in max(1, r - df) : min(r + df, ncol(sim))) {
189
+                  # cc generate subsrcipt out of bound
190
+                  if (cc > dim(sim)[2]) {
191
+                    cat("\n\n", "Try to increase the df parameter to get a 
192
+                    better alignment", "\n\n")
203 193
               # cc <- dim(sim)[2]
204
-            }
205
-            for(lr in 1:length(left.runs))
206
-            {
207
-              for(rr in 1:length(right.runs))
208
-              {
209
-                ai <- cA@aligned[left.runs[lr], right.runs[rr]]
210
-                # this.sim <- cA$alignments[[ai]]$r
211
-                lind <- left.ind[r,lr]
212
-                rind <- right.ind[cc,rr]# Error: subscript out of bounds. Solution: increase df
213
-                if(!is.na(lind) & !is.na(rind))
214
-                  {
215
-                    if(left.runs[lr] < right.runs[rr])
216
-                      {
217
-                        sim[r,cc] <- sim[r,cc] + cA@alignments[[ai]]@r[lind,rind]
194
+              }
195
+              for (lr in 1:length(left.runs)) {
196
+                for(rr in 1:length(right.runs)) {
197
+                  ai <- cA@aligned[left.runs[lr], right.runs[rr]]
198
+                  # this.sim <- cA$alignments[[ai]]$r
199
+                  lind <- left.ind[r, lr]
200
+                  rind <- right.ind[cc, rr]
201
+                  # Error: subscript out of bounds. Solution: increase df
202
+                  if (!is.na(lind) & !is.na(rind)) {
203
+                    if(left.runs[lr] < right.runs[rr]) {
204
+                      sim[r, cc] <- sim[r, cc] + cA@alignments[[ai]]@r[lind, rind]
205
+                      } else {
206
+                        sim[r, cc] <- sim[r, cc] + cA@alignments[[ai]]@r[rind, lind]
218 207
                       }
219
-                    else
220
-                      {
221
-                        sim[r,cc] <- sim[r,cc] + cA@alignments[[ai]]@r[rind,lind]
208
+                      count[r, cc] = count[r, cc] + 1
222 209
                       }
223
-                    count[r,cc] = count[r,cc] + 1
224 210
                   }
211
+                }
225 212
               }
226 213
             }
227
-          }
228
-        }
229
-        if(verbose)
230
-          {
231
-            cat("\n")
232
-          }
233
-            
234
-        if(type == 2) # RR
235
-          {
236
-            v <- dynRT(S = sim)
237
-            v$match <- v$match[!is.na(v$match[,2]),] # remove non-matched peaks
238
-          }
239
-        if(type == 1)
240
-          {
241
-            sim[count > 0] <- sim[count > 0] / count[count > 0]
242
-            sim[sim == 0] <- 1
243
-            v <- dp(sim, gap = gap, verbose = verbose)#
244
-          }
245
-          mi <- .merge.indices(nrow(left.ind), nrow(right.ind), v$match)
246
-          new.ind <- cbind(left.ind[mi[,1],], right.ind[mi[,2],])
247
-        }
248
-        rownames(new.ind) <- 1:nrow(new.ind)
249
-        merges[[i]] <- list(ind = new.ind, mi = mi, runs = c(left.runs, right.runs),
250
-                            left = left.runs, right = right.runs, r = sim, 
251
-                            compressed = FALSE)
252
-  }
253
-
214
+            if (verbose) {
215
+              cat("\n")
216
+              } 
217
+              if (type == 2) {
218
+                v <- dynRT(S = sim)
219
+                # remove non-matched peaks
220
+                v$match <- v$match[!is.na(v$match[, 2]), ]
221
+                }
222
+              if (type == 1) {
223
+                sim[count > 0] <- sim[count > 0] / count[count > 0]
224
+                sim[sim == 0] <- 1
225
+                v <- dp(sim, gap = gap, verbose = verbose)
226
+                }
227
+    mi <- .merge.indices(nrow(left.ind), nrow(right.ind), v$match)
228
+    new.ind <- cbind(left.ind[mi[, 1], ], right.ind[mi[, 2], ])
229
+    }
230
+    rownames(new.ind) <- 1:nrow(new.ind)
231
+    merges[[i]] <- list(ind = new.ind, mi = mi, runs = c(left.runs, right.runs),
232
+      left = left.runs, right = right.runs, r = sim, compressed = FALSE)
233
+      }
254 234
   cA <- NULL
255
-  if(verbose)
256
-    {
235
+  if (verbose) {
257 236
       print(gc())
258 237
     }
259 238
   pA <- new("progressiveAlignment", merges = merges)
260
-  if(compress)
261
-    {
262
-      return(compress(pA, verbose=verbose))
263
-    }
264
-  else
265
-    {
239
+  if (compress) {
240
+      return(compress(pA, verbose = verbose))
241
+    } else {
266 242
       return(pA)
267 243
     }
268 244
 }
269 245
 
270 246
 
271 247
 .merge.indices <- function(nl, nr, m) {
272
-    lind <- cbind(1:nl, 0)
273
-    lind[lind[,1] %in% m[,1],2] <- 1
274
-    rind <- cbind(1:nr,0)
275
-    rind[rind[,1] %in% m[,2],2] <- 1
276
-    mg <- matrix(NA, nrow=nrow(lind)+nrow(rind)-nrow(m), ncol=2)
277
-    #print(dim(mg))
278
-    li <- 1; ri <- 1; i <- 1
279
-    while(i <= nrow(mg)){
280
-        if(li > nrow(lind) & ri <= nrow(rind))
281
-        {
282
-            mg[i,] <- c(NA,rind[ri,1])
283
-            ri <- ri+1
284
-            i <- i+1
285
-            next
286
-	}
287
-        if(li <= nrow(lind) & ri > nrow(rind))
288
-        {
289
-            mg[i,] <- c(lind[li,1], NA)
290
-            li <- li+1	
291
-            i <- i+1    
292
-            next
293
-	}
294
-        if(lind[li,2] == 1)
295
-        {
296
-            if(rind[ri,2] == 1)
297
-            {  # match
298
-                mg[i,] <- c(lind[li,1], rind[ri,1])
299
-		ri <- ri+1
300
-		li <- li+1
301
-		#cat("match",i,li,ri,"-",mg[i,],"\n")
302
-            }
303
-            else
304
-            {             # right unmatched
305
-                mg[i,] <- c(NA, rind[ri,1])
306
-		ri <- ri+1
307
-		#cat("right unmatched",i,li,ri,"-",mg[i,],"\n")
308
-            }
309
-	}
310
-        else
311
-        {
312
-            if(rind[ri,2] == 1)
313
-            {  # left unmatched
314
-                mg[i,] <- c(lind[li,1], NA)
315
-		li <- li+1	    
316
-		#cat("left unmatched",i,li,ri,"-",mg[i,],"\n")
317
-            }
318
-            else
319
-            {             # both unmatched
320
-                mg[i,] <- c(lind[li,1], NA)
321
-		#cat("both unmatched - A",i,li,ri,"-",mg[i,],"\n")
322
-		i <- i+1
323
-                mg[i,] <- c(NA, rind[ri,1])
324
-		li <- li+1
325
-		ri <- ri+1
326
-		#cat("both unmatched - B",i,li,ri,"-",mg[i,],"\n")
248
+  lind <- cbind(1:nl, 0)
249
+  lind[lind[, 1] %in% m[, 1], 2] <- 1
250
+  rind <- cbind(1:nr, 0)
251
+  rind[rind[, 1] %in% m[, 2], 2] <- 1
252
+  mg <- matrix(NA, nrow = nrow(lind) + nrow(rind) - nrow(m), ncol = 2)
253
+  li <- 1; ri <- 1; i <- 1
254
+  while (i <= nrow(mg)) {
255
+    if (li > nrow(lind) & ri <= nrow(rind)) {
256
+      mg[i, ] <- c(NA, rind[ri, 1])
257
+      ri <- ri + 1
258
+      i <- i + 1
259
+      next
260
+      }
261
+    if (li <= nrow(lind) & ri > nrow(rind)) {
262
+      mg[i, ] <- c(lind[li, 1], NA)
263
+      li <- li + 1
264
+      i <- i + 1
265
+      next
266
+      }
267
+    if (lind[li, 2] == 1) {
268
+      if(rind[ri, 2] == 1) { # match
269
+        mg[i, ] <- c(lind[li, 1], rind[ri, 1])
270
+        ri <- ri + 1
271
+		    li <- li + 1
272
+        # cat("match", i, li, ri, "-", mg[i, ], "\n")
273
+        } else { # right unmatched
274
+          mg[i, ] <- c(NA, rind[ri, 1])
275
+          ri <- ri + 1
276
+		      # cat("right unmatched", i, li, ri, "-", mg[i, ], "\n")
277
+          }
278
+      } else {
279
+        if (rind[ri, 2] == 1) { # left unmatched
280
+          mg[i, ] <- c(lind[li, 1], NA)
281
+		      li <- li + 1
282
+          # cat("left unmatched", i, li, ri, "-", mg[i, ], "\n")
283
+          } else {# both unmatched
284
+            mg[i, ] <- c(lind[li, 1], NA)
285
+            # cat("both unmatched - A", i, li, ri, "-", mg[i, ], "\n")
286
+            i <- i + 1
287
+            mg[i, ] <- c(NA, rind[ri, 1])
288
+            li <- li + 1
289
+            ri <- ri + 1
290
+            # cat("both unmatched - B", i, li, ri, "-", mg[i, ], "\n")
327 291
             }
328
-	}
329
-	i <- i+1
292
+        }
293
+        i <- i + 1
330 294
     }
331 295
     mg
332
-}
333
-
296
+}
334 297
\ No newline at end of file
... ...
@@ -19,52 +19,55 @@
19 19
 #' @examples
20 20
 #' 
21 21
 #' require(gcspikelite)
22
-#' # paths and files
23
-#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep = "/")
24
-#' cdfFiles <- dir(gcmsPath,"CDF",full=TRUE)
25
-#' # read data, peak detection results
26
-#' pd <- peaksDataset(cdfFiles[1:2], mz=seq(50,550),
27
-#'                    rtrange=c(7.5,8.5))
28
-#' pd <- addXCMSPeaks(files=cdfFiles[1:2], object=pd,
29
-#'                    peakPicking=c('mF'), snthresh=3, fwhm=4,
30
-#'                    step=1, steps=2, mzdiff=0.5)
31
-#' ma <- multipleAlignment(pd = pd, group = c(1,1),
22
+#' files <- list.files(path = paste(find.package("gcspikelite"), "data",
23
+#'                     sep = "/"),"CDF", full = TRUE)
24
+#' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
25
+#' ## create settings object
26
+#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
27
+#' cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
28
+#'  prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
29
+#'  extendLengthMSW = TRUE, mzCenterFun = "wMean")
30
+#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
31
+#'  multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
32
+#'  list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
33
+#' data
34
+#' ma <- multipleAlignment(pd = data, group = c(1,1),
32 35
 #'                         filterMin = 1, metric = 2, type = 2)
33
-#' outList <- gatherInfo(pd, ma)
34
-#' mtxD <- retFatMatrix(object = pd, data = outList, minFilter = 1)
36
+#' outList <- gatherInfo(data, ma)
37
+#' mtxD <- retFatMatrix(object = data, data = outList, minFilter = 1)
35 38
 #' 
36 39
 #' @export retFatMatrix
37
-retFatMatrix <- function (object, data, minFilter = round(length(object@files)/3*2)) 
38
-{
39
-    a <- lapply(seq(along = data), function(x)
40
-    {
40
+retFatMatrix <- function (object, data, 
41
+    minFilter = round(length(object@files) / 3 * 2)) {
42
+        a <- lapply(seq(along = data), function(x) {
41 43
         apply(data[[x]]$data, 2, sum)
42
-    }
43
-    )
44
+        }
45
+        )
44 46
     ## i nomi delle colonne equivalgono al numero del file;
45 47
     ## il numero della riga equivale alla tasca della lista di gatherInfo()
46
-    abumtx <- do.call(rbind, a) 
48
+    abumtx <- do.call(rbind, a)
47 49
     abumtx <- apply(abumtx, 1, "[")
48 50
     files_to_merge <- rownames(abumtx)
49
-    if(length(grep(pattern = "^[1-9].",  files_to_merge)) == 0)
50
-    {
51
-        files.idx <- as.numeric(sub(pattern = "^.", replacement = "", files_to_merge))
51
+    if (length(grep(pattern = "^[1-9].",  files_to_merge)) == 0) {
52
+        files.idx <- as.numeric(sub(pattern = "^.", replacement = "", 
53
+            files_to_merge))
52 54
     }
53
-    if(length(grep(pattern = "^[1-9].",  files_to_merge)) > 0)
54
-    {
55
-        files.idx <- as.numeric(sub(pattern = "^[1-9].", replacement = "", files_to_merge))
55
+    if (length(grep(pattern = "^[1-9].",  files_to_merge)) > 0) {
56
+        files.idx <- as.numeric(sub(pattern = "^[1-9].", replacement = "", 
57
+            files_to_merge))
56 58
     }
57 59
     sample <- object@files[files.idx]
58
-    colnames(abumtx) <- sapply(1:ncol(abumtx), function(x){paste0("Feat", x)})
60
+    colnames(abumtx) <- sapply(1:ncol(abumtx), function(x) {
61
+        paste0("Feat", x)
62
+        }
63
+        )
59 64
     mf <- minFilter
60 65
     keep <- c()
61
-    for(g in 1:ncol(abumtx))
62
-    {
66
+    for (g in 1:ncol(abumtx)) {
63 67
         keep[g] <- sum(!is.na(abumtx[, g])) >= mf
64
-    }
68
+        }
65 69
 
66 70
     abumtx[is.na(abumtx)] <- c(0)
67 71
     df <- cbind.data.frame(sample, abumtx[, keep])
68 72
     return(df)
69
-}
70
-
73
+}
71 74
\ No newline at end of file
... ...
@@ -4,8 +4,13 @@
4 4
 \alias{addChromaTOFPeaks}
5 5
 \title{Add ChromaTOF peak detection results}
6 6
 \usage{
7
-addChromaTOFPeaks(object, fns = dir(, "[Tt][Xx][Tx]"), rtDivide = 60,
8
-  verbose = TRUE, ...)
7
+addChromaTOFPeaks(
8
+  object,
9
+  fns = dir(, "[Tt][Xx][Tx]"),
10
+  rtDivide = 60,
11
+  verbose = TRUE,
12
+  ...
13
+)
9 14
 }
10 15
 \arguments{
11 16
 \item{object}{a \code{peaksDataset} object.}
... ...
@@ -2,35 +2,57 @@
2 2
 % Please edit documentation in R/addXCMSPeaks.R
3 3
 \name{addXCMSPeaks}
4 4
 \alias{addXCMSPeaks}
5
-\title{Add xcms/CAMERA peak detection results}
5
+\title{addXCMSPeaks}
6 6
 \usage{
7
-addXCMSPeaks(files, object, peakPicking = c("cwt", "mF"),
8
-  perfwhm = 0.75, quick = TRUE, ...)
7
+addXCMSPeaks(
8
+  files,
9
+  object,
10
+  settings,
11
+  rtrange = NULL,
12
+  mzrange = NULL,
13
+  perfwhm = 0.75,
14
+  minintens = 100,
15
+  minfeat = 6,
16
+  multipleMatchedFilter = FALSE,
17
+  multipleMatchedFilterParam = list(fwhm = c(5, 10, 20), mz_abs = 0.1, rt_abs = 3)
18
+)
9 19
 }
10 20
 \arguments{
11
-\item{files}{character vector of same length as \code{object@rawdata} (user
12
-ensures the order matches)}
21
+\item{files}{list of chromatogram files}
13 22
 
14
-\item{object}{a \code{peaksDataset} object.}
23
+\item{object}{a \code{peakDataset} object}
15 24
 
16
-\item{peakPicking}{Methods to use for peak detection. See details.}
25
+\item{settings}{list. It conteins the settings for the peak-picking}
17 26
 
18
-\item{perfwhm}{percentage of full width half maximum. See
19
-CAMERA::groupFWHM() for more details}
27
+\item{rtrange}{vector; retention time range}
20 28
 
21
-\item{quick}{logical. See CAMERA::annotate() for more details}
29
+\item{mzrange}{vector, mz range}
22 30
 
23
-\item{...}{arguments passed on to \code{xcmsSet} and \code{annotate}}
31
+\item{perfwhm}{etermines the maximal retentiontime difference of features in
32
+one pseudospectrum.}
33
+
34
+\item{minintens}{minimum ion intensity to be included into a pseudospectra}
35
+
36
+\item{minfeat}{minimum number of ion to be created a pseudospectra}
37
+
38
+\item{multipleMatchedFilter}{logical Try to remove redundant peaks, in 
39
+this case where there are any peaks within an absolute m/z value of 0.2 and 
40
+within 3 s for any one sample in the xcmsSet (the largest peak is kept)}
41
+
42
+\item{multipleMatchedFilterParam}{list. It conteins the settings for the
43
+peak-picking. mz_abs represent the the mz range; rt_abs represent thert range}
24 44
 }
25 45
 \value{
26 46
 \code{peaksDataset} object
27 47
 }
28 48
 \description{
49
+Add xcms/CAMERA peak detection results
50
+}
51
+\details{
29 52
 Reads the raw data using xcms, group each extracted ion according to their
30 53
 retention time using CAMERA and attaches them to an already created
31 54
 \code{peaksDataset} object
32
-}
33
-\details{
55
+
34 56
 Repeated calls to xcmsSet and annotate to perform peak-picking and
35 57
 deconvolution. The peak detection results are added to the original
36 58
 \code{peaksDataset} object. Two peak detection alorithms are available:
... ...
@@ -39,18 +61,18 @@ approach (peakPicking=c('mF')) described by Smith et al (2006). For further
39 61
 information consult the xcms package manual.
40 62
 }
41 63
 \examples{
42
-
43
-# need access to CDF (raw data)
44
-require(gcspikelite)
45
-gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
46
-
47
-# full paths to file names
48
-cdfFiles <- dir(gcmsPath, "CDF", full=TRUE)
49
-
50
-# create a 'peaksDataset' object and add XCMS peaks to it
51
-pd <- peaksDataset(cdfFiles[1], mz=seq(50,550), rtrange=c(7.5,8.5))
52
-pd <- addXCMSPeaks(cdfFiles[1], pd, peakPicking=c('mF'),
53
-                   snthresh=3, fwhm=4, step=1, steps=2, mzdiff=0.5)
64
+files <- list.files(path = paste(find.package("gcspikelite"), "data",
65
+                    sep = "/"),"CDF", full = TRUE)
66
+data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
67
+## create settings object
68
+mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
69
+cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
70
+ prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
71
+ extendLengthMSW = TRUE, mzCenterFun = "wMean")
72
+data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
73
+ multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
74
+ list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
75
+data
54 76
 
55 77
 }
56 78
 \seealso{
... ...
@@ -8,9 +8,22 @@
8 8
 \alias{betweenAlignment-method}
9 9
 \title{Data Structure for "between" alignment of many GCMS samples}
10 10
 \usage{
11
-betweenAlignment(pD, cAList, pAList, impList, filterMin = 1, gap = 0.7,
12
-  D = 10, usePeaks = TRUE, df = 30, verbose = TRUE, metric = 2,
13
-  type = 2, penality = 0.2, compress = FALSE)
11
+betweenAlignment(
12
+  pD,
13
+  cAList,
14
+  pAList,
15
+  impList,
16
+  filterMin = 1,
17
+  gap = 0.7,
18
+  D = 10,
19
+  usePeaks = TRUE,
20
+  df = 30,
21
+  verbose = TRUE,
22
+  metric = 2,
23
+  type = 2,
24
+  penality = 0.2,
25
+  compress = FALSE
26
+)
14 27
 }
15 28
 \arguments{
16 29
 \item{pD}{a \code{peaksDataset} object}
... ...
@@ -10,8 +10,14 @@
10 10
 \alias{plot,clusterAlignment,ANY-method}
11 11
 \title{Data Structure for a collection of all pairwise alignments of GCMS runs}
12 12
 \usage{
13
-clusterAlignment(pD, runs = 1:length(pD@rawdata), timedf = NULL,
14
-  usePeaks = TRUE, verbose = TRUE, ...)
13
+clusterAlignment(
14
+  pD,
15
+  runs = 1:length(pD@rawdata),
16
+  timedf = NULL,
17
+  usePeaks = TRUE,
18
+  verbose = TRUE,
19
+  ...
20
+)
15 21
 }
16 22
 \arguments{
17 23
 \item{pD}{a \code{peaksDataset} object.}
... ...
@@ -1,6 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2 2
 % Please edit documentation in R/peaksAlignment.R
3
-\docType{methods}
4 3
 \name{compress,peaksAlignment-method}
5 4
 \alias{compress,peaksAlignment-method}
6 5
 \title{Compression method for peaksAlignment object}
... ...
@@ -1,6 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2 2
 % Please edit documentation in R/progressiveAlignment.R
3
-\docType{methods}
4 3
 \name{compress,progressiveAlignment-method}
5 4
 \alias{compress,progressiveAlignment-method}
6 5
 \title{Compress method for progressiveAlignment}
... ...
@@ -35,18 +35,23 @@ retention time window (\code{D})and returns the similarity matrix.
35 35
 
36 36
 ## Not Run
37 37
 require(gcspikelite)
38
-gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
39
-cdfFiles <- dir(gcmsPath,"CDF", full=TRUE)
40
-## read data, peak detection results
41
-pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,10.5))
42
-pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'),
43
-                   snthresh=3, fwhm=10,  step=0.1, steps=2, mzdiff=0.5,
44
-                   sleep=0)
38
+files <- list.files(path = paste(find.package("gcspikelite"), "data",
39
+                    sep = "/"),"CDF", full = TRUE)
40
+data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
41
+## create settings object
42
+mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
43
+cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
44
+ prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
45
+ extendLengthMSW = TRUE, mzCenterFun = "wMean")
46
+data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
47
+ multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
48
+ list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
49
+data
45 50
 ## review peak picking
46
-plotChrom(pd, rtrange=c(7.5, 10.5), runs=c(1:3))
51
+plotChrom(data, rtrange=c(7.5, 10.5), runs=c(1:2))
47 52
 
48
-r <- corPrt(pd@peaksdata[[1]], pd@peaksdata[[2]],
49
-           pd@peaksrt[[1]], pd@peaksrt[[2]], D=50, penality=0.2)
53
+r <- corPrt(data@peaksdata[[1]], data@peaksdata[[2]],
54
+           data@peaksrt[[1]], data@peaksrt[[2]], D = 50, penality = 0.2)
50 55
 ## End (Not Run)
51 56
 
52 57
 }
53 58
new file mode 100644
... ...
@@ -0,0 +1,29 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/addXCMSPeaks.R
3
+\name{de_Duper}
4
+\alias{de_Duper}
5
+\title{deDuper}
6
+\usage{
7
+de_Duper(object, mz_abs = 0.1, rt_abs = 2)
8
+}
9
+\arguments{
10
+\item{object}{xcms object}
11
+
12
+\item{mz_abs}{mz range}
13
+
14
+\item{rt_abs}{rt range}
15
+}
16
+\value{
17
+an object of xcms class
18
+}
19
+\description{
20
+Duplicate peak removal function
21
+}
22
+\details{
23
+Remove redundant peaks, in this case where there are any peaks within an
24
+absolute m/z value of 0.2 and within 3 s for any one sample in the xcmsSet
25
+(the largest peak is kept)
26
+}
27
+\author{
28
+r
29
+}
... ...
@@ -1,6 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2 2
 % Please edit documentation in R/peaksAlignment.R
3
-\docType{methods}
4 3
 \name{decompress,peaksAlignment-method}
5 4
 \alias{decompress,peaksAlignment-method}
6 5
 \title{Decompression method for peaksAlignment object}
... ...
@@ -1,6 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2 2
 % Please edit documentation in R/progressiveAlignment.R
3
-\docType{methods}
4 3
 \name{decompress,progressiveAlignment-method}
5 4
 \alias{decompress,progressiveAlignment-method}
6 5
 \title{Compress method for progressiveAlignment}
... ...
@@ -8,17 +7,17 @@
8 7
 \S4method{decompress}{progressiveAlignment}(object, verbose = TRUE, ...)
9 8
 }
10 9
 \arguments{
11
-\item{object}{dummy}
10
+\item{object}{progressiveAlignment object}
12 11
 
13
-\item{verbose}{dummy}
12
+\item{verbose}{logical}
14 13
 
15 14
 \item{...}{dummy}
16 15
 }
17 16
 \description{
18
-Compress method for progressiveAlignment
17
+Decompress method for progressiveAlignment
19 18
 }
20 19
 \details{
21
-Compress method for progressiveAlignment
20
+Decompress method for progressiveAlignment
22 21
 }
23 22
 \author{
24 23
 MR
25 24
new file mode 100644
... ...
@@ -0,0 +1,26 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/importSpectra.R
3
+\name{distToLib}
4
+\alias{distToLib}
5
+\title{distToLib}
6
+\usage{
7
+distToLib(mspLib, outList)
8
+}
9
+\arguments{
10
+\item{mspLib}{a .msp file from NIST}
11
+
12
+\item{outList}{an object from gatherInfo()}
13
+}
14
+\value{
15
+the distance matrix between the mass spec and the aligned spec
16
+}
17
+\description{
18
+The function calculate the distance between each mas spec in the msp file
19
+and the aligned mass spec from each sampe
20
+}
21
+\details{
22
+Return the distance matrix
23
+}
24
+\author{
25
+Riccardo Romoli
26
+}
... ...
@@ -23,21 +23,25 @@ the mass spectra
23 23
 \examples{
24 24
 
25 25
 require(gcspikelite)
26
-gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
27
-cdfFiles <- dir(gcmsPath,"CDF", full=TRUE)
28
-## read data, peak detection results
29
-pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550),
30
-    rtrange=c(7.5,10.5))
31
-pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd,
32
-    peakPicking=c('mF'),snthresh=3, fwhm=10,  step=0.1, steps=2,
33
-    mzdiff=0.5, sleep=0)
26
+files <- list.files(path = paste(find.package("gcspikelite"), "data",
27
+                    sep = "/"),"CDF", full = TRUE)
28
+data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
29
+## create settings object
30
+mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
31
+cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
32
+ prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
33
+ extendLengthMSW = TRUE, mzCenterFun = "wMean")
34
+data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
35
+ multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
36
+ list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
37
+data
34 38
 ## review peak picking
35
-plotChrom(pd, rtrange=c(7.5, 10.5), runs=c(1:3))
39
+plotChrom(data, rtrange=c(7.5, 10.5), runs=c(1:2))
36 40
 ## similarity
37
-r <- ndpRT(pd@peaksdata[[1]], pd@peaksdata[[2]], pd@peaksrt[[1]],
38
-    pd@peaksrt[[2]], D=50)
41
+r <- ndpRT(data@peaksdata[[1]], data@peaksdata[[2]], data@peaksrt[[1]],
42
+    data@peaksrt[[2]], D = 50)
39 43
 ## dynamic retention time based alignment algorithm
40
-v <- dynRT(S=r)
44
+v <- dynRT(S = r)
41 45
 
42 46
 }
43 47
 \author{
... ...
@@ -4,9 +4,16 @@
4 4
 \alias{gatherInfo}
5 5
 \title{Gathers abundance informations from an alignment}
6 6
 \usage{
7
-gatherInfo(pD, obj, newind = NULL, method = c("apex"),
8
-  findmzind = TRUE, useTIC = FALSE, top = NULL,
9
-  intensity.cut = 0.05)
7
+gatherInfo(
8
+  pD,
9
+  obj,
10
+  newind = NULL,
11
+  method = c("apex"),
12
+  findmzind = TRUE,
13
+  useTIC = FALSE,
14
+  top = NULL,
15
+  intensity.cut = 0.05
16
+)
10 17
 }
11 18
 \arguments{
12 19
 \item{pD}{a \code{peaksDataset} object, to get the abundance data from}
13 20
new file mode 100644
... ...
@@ -0,0 +1,25 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/importSpectra.R
3
+\name{headToTailPlot}
4
+\alias{headToTailPlot}
5
+\title{Head to tail plot}
6
+\usage{
7
+headToTailPlot(specFromLib, specFromList)
8
+}
9
+\arguments{
10
+\item{specFromLib}{the mass spectra obtained from the .msp file}
11
+
12
+\item{specFromList}{the mass spectra obtained from \code{\link{gatherInfo}}}
13
+}
14
+\value{
15
+the plot
16
+}
17
+\description{
18
+The head-to-tail-plot for the mass spectra
19
+}
20
+\details{
21
+Head-to-tail-plot to visually compare the mass spectra
22
+}
23
+\author{
24
+Riccardo Romoli
25
+}
0 26
new file mode 100644
... ...
@@ -0,0 +1,24 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/importSpectra.R
3
+\name{importSpec}
4
+\alias{importSpec}
5
+\title{importSpec}
6
+\usage{
7
+importSpec(file)
8
+}
9
+\arguments{
10
+\item{file}{a .msp file from NIST search library database}
11
+}
12
+\value{
13
+list conaining the mass spctra
14
+}
15
+\description{
16
+Read the mass spectra from an external msp file
17
+}
18
+\details{
19
+Read the mass spectra from an external file in msp format. The format is
20
+used in NIST search library database.
21
+}
22
+\author{
23
[email protected]
24
+}
... ...
@@ -4,8 +4,7 @@
4 4
 \alias{imputePeaks}
5 5
 \title{Imputatin of locations of peaks that were undetected}
6 6
 \usage{
7
-imputePeaks(pD, obj, typ = 1, obj2 = NULL, filterMin = 1,
8
-  verbose = TRUE)
7
+imputePeaks(pD, obj, typ = 1, obj2 = NULL, filterMin = 1, verbose = TRUE)
9 8
 }
10 9
 \arguments{
11 10
 \item{pD}{a \code{peaksDataset} object}
12 11
new file mode 100644
... ...
@@ -0,0 +1,28 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/importSpectra.R
3
+\name{matchSpec}
4
+\alias{matchSpec}
5
+\title{matchSpec}
6
+\usage{
7
+matchSpec(spec1, outList, whichSpec)
8
+}
9
+\arguments{
10
+\item{spec1}{reference mass spectrum}
11
+
12
+\item{outList}{the return of \code{\link{gatherInfo}}}
13
+
14
+\item{whichSpec}{the entry number of outList}
15
+}
16
+\value{
17
+the distance between the reference mass spectrum and the others
18
+}
19
+\description{
20
+Calculate the distance between a reference mass spectrum
21
+}
22
+\details{
23
+Calculate the distance between a reference mass spectrum and one from the
24
+sample
25
+}
26
+\author{
27
+Riccardo Romoli
28
+}
... ...
@@ -8,10 +8,25 @@
8 8
 \alias{multipleAlignment-method}
9 9
 \title{Data Structure for multiple alignment of many GCMS samples}
10 10
 \usage{
11
-multipleAlignment(pd, group, bw.gap = 0.8, wn.gap = 0.6, bw.D = 0.2,
12
-  wn.D = 0.05, filterMin = 1, lite = FALSE, usePeaks = TRUE,
13
-  df = 50, verbose = TRUE, timeAdjust = FALSE, doImpute = FALSE,
14
-  metric = 2, type = 2, penality = 0.2, compress = FALSE)
11
+multipleAlignment(
12
+  pd,
13
+  group,
14
+  bw.gap = 0.8,
15
+  wn.gap = 0.6,
16
+  bw.D = 0.2,
17
+  wn.D = 0.05,
18
+  filterMin = 1,
19
+  lite = FALSE,
20
+  usePeaks = TRUE,
21
+  df = 50,
22
+  verbose = TRUE,
23
+  timeAdjust = FALSE,
24
+  doImpute = FALSE,
25
+  metric = 2,
26
+  type = 2,
27
+  penality = 0.2,
28
+  compress = FALSE
29
+)
15 30
 }
16 31
 \arguments{
17 32
 \item{pd}{a \code{peaksDataset} object}
... ...
@@ -22,7 +22,7 @@ matrix of similarities
22 22
 }
23 23
 \description{
24 24
 This function calculates the similarity of all pairs of peaks from 2
25
-samples, using the spectra similarity and the rretention time differencies
25
+samples, using the spectra similarity and the retention time differencies
26 26
 }
27 27
 \details{
28 28
 Computes the normalized dot product between every pair of peak vectors in
... ...
@@ -32,19 +32,23 @@ the retention time window (\code{D})and returns a similarity matrix.
32 32
 
33 33
 ## Not Run
34 34
 require(gcspikelite)
35
-gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
36
-cdfFiles <- dir(gcmsPath,"CDF", full=TRUE)
37
-
38
-                                        # read data, peak detection results
39
-pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,10.5))
40
-pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'),
41
-                   snthresh=3, fwhm=10,  step=0.1, steps=2, mzdiff=0.5,
42
-                   sleep=0)
35
+files <- list.files(path = paste(find.package("gcspikelite"), "data",
36
+                    sep = "/"),"CDF", full = TRUE)
37
+data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
38
+## create settings object
39
+mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
40
+cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
41
+ prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
42
+ extendLengthMSW = TRUE, mzCenterFun = "wMean")
43
+data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
44
+ multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
45
+ list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
46
+data
43 47
 ## review peak picking
44
-plotChrom(pd, rtrange=c(7.5, 10.5), runs=c(1:3))
48
+plotChrom(data, rtrange = c(7.5, 10.5), runs = c(1:2))
45 49
 
46
-r <- ndpRT(pd@peaksdata[[1]], pd@peaksdata[[2]],
47
-           pd@peaksrt[[1]], pd@peaksrt[[2]], D=50)
50
+r <- ndpRT(data@peaksdata[[1]], data@peaksdata[[2]],
51
+           data@peaksrt[[1]], data@peaksrt[[2]], D = 50)
48 52
 ## End (Not Run)
49 53
 
50 54
 }
... ...
@@ -4,8 +4,16 @@
4 4
 \alias{normDotProduct}
5 5
 \title{Normalized Dot Product}
6 6
 \usage{
7
-normDotProduct(x1, x2, t1 = NULL, t2 = NULL, df = max(ncol(x1),
8
-  ncol(x2)), D = 1e+05, timedf = NULL, verbose = FALSE)
7
+normDotProduct(
8
+  x1,
9
+  x2,
10
+  t1 = NULL,
11
+  t2 = NULL,
12
+  df = max(ncol(x1), ncol(x2)),
13
+  D = 1e+05,
14
+  timedf = NULL,
15
+  verbose = FALSE
16
+)
9 17
 }
10 18
 \arguments{
11 19
 \item{x1}{data matrix for sample 1}
... ...
@@ -4,8 +4,15 @@
4 4
 \alias{parseChromaTOF}
5 5
 \title{Parser for ChromaTOF files}
6 6
 \usage{
7
-parseChromaTOF(fn, min.pc = 0.01, mz = seq(85, 500), rt.cut = 0.008,
8
-  rtrange = NULL, skip = 1, rtDivide = 60)
7
+parseChromaTOF(
8
+  fn,
9
+  min.pc = 0.01,
10
+  mz = seq(85, 500),
11
+  rt.cut = 0.008,
12
+  rtrange = NULL,
13
+  skip = 1,
14
+  rtDivide = 60
15
+)
9 16
 }
10 17
 \arguments{
11 18
 \item{fn}{ChromaTOF filename to read.}
... ...
@@ -4,8 +4,7 @@
4 4
 \alias{parseELU}
5 5
 \title{Parser for ELU files}
6 6
 \usage{
7
-parseELU(f, min.pc = 0.01, mz = seq(50, 550), rt.cut = 0.008,
8
-  rtrange = NULL)
7
+parseELU(f, min.pc = 0.01, mz = seq(50, 550), rt.cut = 0.008, rtrange = NULL)
9 8
 }
10 9
 \arguments{
11 10
 \item{f}{ELU filename to read.}
... ...
@@ -10,9 +10,22 @@
10 10
 \alias{plot,peaksAlignment,ANY-method}
11 11
 \title{Data Structure for pairwise alignment of 2 GCMS samples}
12 12
 \usage{
13
-peaksAlignment(d1, d2, t1, t2, gap = 0.5, D = 50, timedf = NULL,
14
-  df = 30, verbose = TRUE, usePeaks = TRUE, compress = TRUE,
15
-  metric = 2, type = 2, penality = 0.2)
13
+peaksAlignment(
14
+  d1,
15
+  d2,
16
+  t1,
17
+  t2,
18
+  gap = 0.5,
19
+  D = 50,
20
+  timedf = NULL,
21
+  df = 30,
22
+  verbose = TRUE,
23
+  usePeaks = TRUE,
24
+  compress = TRUE,
25
+  metric = 2,
26
+  type = 2,
27
+  penality = 0.2
28
+)
16 29
 }
17 30
 \arguments{
18 31
 \item{d1}{matrix of MS intensities for 1st sample (if doing a peak
... ...
@@ -76,29 +89,32 @@ data.
76 89
 ## see clusterAlignment, it calls peaksAlignment
77 90
 
78 91
 ## Not Run:
79
-gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
80
-cdfFiles <- dir(gcmsPath,"CDF", full=TRUE)
81
-
82
-# read data, peak detection results
83
-pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,10.5))
84
-pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'),
85
-                   snthresh=3, fwhm=10,  step=0.1, steps=2, mzdiff=0.5,
86
-                   sleep=0)
87
-## review peak picking
88
-plotChrom(pd, rtrange=c(7.5, 10.5), runs=c(1:3))
92
+files <- list.files(path = paste(find.package("gcspikelite"), "data",
93
+                    sep = "/"),"CDF", full = TRUE)
94
+data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
95
+## create settings object
96
+mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
97
+cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
98
+ prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
99
+ extendLengthMSW = TRUE, mzCenterFun = "wMean")
100
+data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
101
+ multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
102
+ list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
103
+data
104
+plotChrom(data, rtrange=c(7.5, 10.5), runs=c(1:2))
89 105
 
90 106
 ## align two chromatogram
91
-pA <- peaksAlignment(pd@peaksdata[[1]], pd@peaksdata[[2]],
92
-                     pd@peaksrt[[1]], pd@peaksrt[[2]], D=50,
93
-                     metric=3, compress=FALSE, type=2, penality=0.2)
107
+pA <- peaksAlignment(data@peaksdata[[1]], data@peaksdata[[2]],
108
+                     data@peaksrt[[1]], data@peaksrt[[2]], D = 50,
109
+                     metric = 3, compress = FALSE, type = 2, penality = 0.2)
94 110
 
95 111
 plotAlignment(pA)
96 112
 pA@v$match
97 113
 
98 114
 par(mfrow=c(2,1))
99
-plot(pd@peaksdata[[1]][,15], type='h', main=paste(pd@peaksrt[[1]][[15]]))
100
-plot(pd@peaksdata[[2]][,17], type='h',
101
-     main=paste(pd@peaksrt[[2]][[17]]))
115
+plot(data@peaksdata[[1]][,15], type = 'h', main = paste(data@peaksrt[[1]][[15]]))
116
+plot(data@peaksdata[[2]][,17], type = 'h',
117
+     main = paste(data@peaksrt[[2]][[17]]))
102 118
 ## End (Not Run)
103 119
 
104 120
 }
... ...
@@ -10,8 +10,13 @@
10 10
 \alias{plot,peaksDataset,ANY-method}
11 11
 \title{Data Structure for raw GCMS data and peak detection results}
12 12
 \usage{
13
-peaksDataset(fns = dir(, "[Cc][Dd][Ff]"), verbose = TRUE,
14
-  mz = seq(50, 550), rtDivide = 60, rtrange = NULL)
13
+peaksDataset(
14
+  fns = dir(, "[Cc][Dd][Ff]"),
15
+  verbose = TRUE,
16
+  mz = seq(50, 550),
17
+  rtDivide = 60,
18
+  rtrange = NULL
19
+)
15 20
 }
16 21
 \arguments{
17 22
 \item{fns}{character vector, filenames of raw data in CDF format.}
... ...
@@ -4,8 +4,14 @@
4 4
 \alias{plotAlignedFrags}
5 5
 \title{plotAlignedFrags}
6 6
 \usage{
7
-plotAlignedFrags(object, outList, specID, fullRange = TRUE,
8
-  normalize = TRUE, ...)
7
+plotAlignedFrags(
8
+  object,
9
+  outList,
10
+  specID,
11
+  fullRange = TRUE,
12
+  normalize = TRUE,
13
+  ...
14
+)
9 15
 }
10 16
 \arguments{
11 17
 \item{object}{where to keep the mass range of the experiment}
... ...
@@ -31,24 +37,27 @@ Plot the deconvoluted and aligned mass spectra collected using gatherInfo()
31 37
 }
32 38
 \examples{
33 39
 
34
-## Rd workflow
35
-gcmsPath <- paste(find.package("gcspikelite"), "data", sep = "/")
36
-cdfFiles <- dir(gcmsPath,"CDF", full = TRUE)
37
-
38
-# read data, peak detection results
39
-pd <- peaksDataset(cdfFiles[1:4], mz = seq(50,550), rtrange = c(7.5,10.5))
40
-pd <- addXCMSPeaks(files = cdfFiles[1:4], object = pd, peakPicking = c('mF'),
41
-                   snthresh = 2, fwhm = 8,  step = 0.5, steps = 2, mzdiff = 0.5,
42
-                   sleep = 0)
40
+files <- list.files(path = paste(find.package("gcspikelite"), "data",
41
+                    sep = "/"),"CDF", full = TRUE)
42
+data <- peaksDataset(files[1:4], mz = seq(50, 550), rtrange = c(7.5, 8.5))
43
+## create settings object
44
+mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
45
+cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
46
+ prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
47
+ extendLengthMSW = TRUE, mzCenterFun = "wMean")
48
+data <- addXCMSPeaks(files[1:4], data, settings = mfp, minintens = 100,
49
+ multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
50
+ list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
51
+data
43 52
 ## multiple alignment
44
-ma <- multipleAlignment(pd, c(1,1,2,2), wn.gap = 0.5, wn.D = 0.05, bw.gap = 0.6, 
45
-                        bw.D = 0.2, usePeaks = TRUE, filterMin = 1, df = 50,
46
-                        verbose = TRUE, metric = 2, type = 2)
53
+ma <- multipleAlignment(data, c(1,1,2,2), wn.gap = 0.5, wn.D = 0.05, 
54
+ bw.gap = 0.6, bw.D = 0.2, usePeaks = TRUE, filterMin = 1, df = 50,
55
+ verbose = TRUE, metric = 2, type = 2)
47 56
 
48 57
 ## gather apex intensities
49
-gip <- gatherInfo(pd, ma)
58
+gip <- gatherInfo(data, ma)
50 59
 gip[[33]]
51
-plotAlignedFrags(object = pd, outList = gip, specID = 33)
60
+plotAlignedFrags(object = data, outList = gip, specID = 33)
52 61
 
53 62
 }
54 63
 \author{
... ...
@@ -1,15 +1,22 @@
1 1
 % Generated by roxygen2: do not edit by hand
2 2
 % Please edit documentation in R/peaksAlignment.R
3
-\docType{methods}
4 3
 \name{plotAlignment,peaksAlignment-method}
5 4
 \alias{plotAlignment,peaksAlignment-method}
6 5
 \title{plotAlignment}
7 6
 \usage{
8
-\S4method{plotAlignment}{peaksAlignment}(object, xlab = "Peaks - run 1",
9
-  ylab = "Peaks - run 2", plotMatches = TRUE, matchPch = 19,
10
-  matchLwd = 3, matchCex = 0.5, matchCol = "black",
11
-  col = colorpanel(50, "white", "green", "navyblue"), breaks = seq(0,
12
-  1, length = 51), ...)
7
+\S4method{plotAlignment}{peaksAlignment}(
8
+  object,
9
+  xlab = "Peaks - run 1",
10
+  ylab = "Peaks - run 2",
11
+  plotMatches = TRUE,
12
+  matchPch = 19,
13
+  matchLwd = 3,
14
+  matchCex = 0.5,
15
+  matchCol = "black",
16
+  col = colorpanel(50, "white", "green", "navyblue"),
17
+  breaks = seq(0, 1, length = 51),
18
+  ...
19
+)
13 20
 }
14 21
 \arguments{
15 22
 \item{object}{a \code{clusterAlignment} object}
... ...
@@ -50,23 +57,24 @@ The similarity matrix is plotted and optionally, the set of matching peaks.
50 57
 \examples{
51 58
 
52 59
 require(gcspikelite)
53
-
54
-## paths and files
55
-gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
56
-cdfFiles <- dir(gcmsPath, "CDF", full=TRUE)
57
-eluFiles <- dir(gcmsPath, "ELU", full=TRUE)
58
-
59
-## read data
60
-pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,8.5))
61
-pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'),
62
-                   snthresh=3, fwhm=10,  step=0.1, steps=2, mzdiff=0.5)
63
-
60
+files <- list.files(path = paste(find.package("gcspikelite"), "data",
61
+                    sep = "/"),"CDF", full = TRUE)
62
+data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
63
+## create settings object
64
+mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
65
+cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
66
+ prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
67
+ extendLengthMSW = TRUE, mzCenterFun = "wMean")
68
+data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
69
+ multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
70
+ list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
71
+data
64 72
 ## image plot
65
-plotChrom(pd, rtrange=c(7.5,8.5), plotPeaks=TRUE, plotPeakLabels=TRUE)
73
+plotChrom(data, rtrange = c(7.5,8.5), plotPeaks = TRUE, plotPeakLabels =TRUE)
66 74
 
67 75
 ## align two chromatogram
68
-pA <- peaksAlignment(pd@peaksdata[[1]], pd@peaksdata[[2]],
69
-                     pd@peaksrt[[1]], pd@peaksrt[[2]], D = 50,
76
+pA <- peaksAlignment(data@peaksdata[[1]], data@peaksdata[[2]],
77
+                     data@peaksrt[[1]], data@peaksrt[[2]], D = 50,
70 78
                      compress = FALSE, type = 1, metric = 1,
71 79
                      gap = 0.5)
72 80
 plotAlignment(pA)
... ...
@@ -1,19 +1,33 @@
1 1
 % Generated by roxygen2: do not edit by hand
2 2
 % Please edit documentation in R/peaksDataset.R
3
-\docType{methods}
4 3
 \name{plotChrom,peaksDataset-method}
5 4
 \alias{plotChrom,peaksDataset-method}
6 5
 \title{Plotting functions for GCMS data objects}
7 6
 \usage{
8
-\S4method{plotChrom}{peaksDataset}(object,
9
-  runs = 1:length(object@rawdata), mzind = 1:nrow(object@rawdata[[1]]),
10
-  mind = NULL, plotSampleLabels = TRUE, calcGlobalMax = FALSE,
11
-  peakCex = 0.8, plotPeaks = TRUE, plotPeakBoundaries = FALSE,
12
-  plotPeakLabels = FALSE, plotMergedPeakLabels = TRUE, mlwd = 3,
13
-  usePeaks = TRUE, plotAcrossRuns = FALSE, overlap = F,
14
-  rtrange = NULL, cols = NULL, thin = 1,
15
-  max.near = median(object@rawrt[[1]]), how.near = 50, scale.up = 1,
16
-  ...)
7
+\S4method{plotChrom}{peaksDataset}(
8
+  object,
9
+  runs = 1:length(object@rawdata),
10
+  mzind = 1:nrow(object@rawdata[[1]]),
11
+  mind = NULL,
12
+  plotSampleLabels = TRUE,
13
+  calcGlobalMax = FALSE,
14
+  peakCex = 0.8,
15
+  plotPeaks = TRUE,
16
+  plotPeakBoundaries = FALSE,
17
+  plotPeakLabels = FALSE,
18
+  plotMergedPeakLabels = TRUE,
19
+  mlwd = 3,
20
+  usePeaks = TRUE,
21
+  plotAcrossRuns = FALSE,
22
+  overlap = F,
23
+  rtrange = NULL,
24
+  cols = NULL,
25
+  thin = 1,
26
+  max.near = median(object@rawrt[[1]]),
27
+  how.near = 50,
28
+  scale.up = 1,
29
+  ...
30
+)
17 31
 }
18 32
 \arguments{
19 33
 \item{object}{a \code{peaksDataset} object.}
... ...
@@ -1,12 +1,10 @@
1 1
 % Generated by roxygen2: do not edit by hand
2 2
 % Please edit documentation in R/clusterAlignment.R
3
-\docType{methods}
4 3
 \name{plotClustAlignment,clusterAlignment-method}
5 4
 \alias{plotClustAlignment,clusterAlignment-method}
6 5
 \title{plotClustAlignment}
7 6
 \usage{
8
-\S4method{plotClustAlignment}{clusterAlignment}(object, alignment = 1,
9
-  ...)
7
+\S4method{plotClustAlignment}{clusterAlignment}(object, alignment = 1, ...)
10 8
 }
11 9
 \arguments{
12 10
 \item{object}{\code{clusterAlignment} object.}
... ...
@@ -31,19 +29,19 @@ just a collection of all pairwise \code{peakAlignment} objects.
31 29
 
32 30
 require(gcspikelite)
33 31
 
34
-## paths and files
32
+# paths and files
35 33
 gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
36 34
 cdfFiles <- dir(gcmsPath, "CDF", full=TRUE)
37 35
 eluFiles <- dir(gcmsPath, "ELU", full=TRUE)
38 36
 
39
-## read data
40
-pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,8.5))
41
-pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'),
42
-                   snthresh=3, fwhm=10,  step=0.1, steps=2, mzdiff=0.5)
43
-ca <- clusterAlignment(pd, metric = 1, D = 50, type = 1, gap = 0.5)
37
+# read data, peak detection results
38
+pd <- peaksDataset(cdfFiles[1:2], mz=seq(50,550), rtrange=c(7.5,8.5))
39
+pd <- addAMDISPeaks(pd, eluFiles[1:2])
40
+
41
+ca <- clusterAlignment(pd, gap=0.5, D=0.05, df=30, metric=1, type=1)
44 42
 plotClustAlignment(ca, run = 1)
45 43
 plotClustAlignment(ca, run = 2)
46
-plotClustAlignment(ca, run = 3) 
44
+plotClustAlignment(ca, run = 3)
47 45
 
48 46
 }
49 47
 \references{
... ...
@@ -29,22 +29,27 @@ Plot the deconvoluted mass spectra from the profile matrix
29 29
 }
30 30
 \examples{
31 31
 
32
-gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
33
-cdfFiles <- dir(gcmsPath,"CDF", full=TRUE)
34
-# read data, peak detection results
35
-pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,10.5))
36
-pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'),
37
-                   snthresh=3, fwhm=10,  step=0.1, steps=2, mzdiff=0.5,
38
-                   sleep=0)
32
+files <- list.files(path = paste(find.package("gcspikelite"), "data",
33
+                    sep = "/"),"CDF", full = TRUE)
34
+data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
35
+## create settings object
36
+mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
37
+cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
38
+ prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
39
+ extendLengthMSW = TRUE, mzCenterFun = "wMean")
40
+data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
41
+ multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
42
+ list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
43
+data
39 44
 ## align two chromatogram
40
-pA <- peaksAlignment(pd@peaksdata[[1]], pd@peaksdata[[2]],
41
-                     pd@peaksrt[[1]], pd@peaksrt[[2]], D=50,
42
-                     metric=3, compress=FALSE, type=2, penality=0.2)
45
+pA <- peaksAlignment(data@peaksdata[[1]], data@peaksdata[[2]],
46
+                     data@peaksrt[[1]], data@peaksrt[[2]], D = 50,
47
+                     metric = 3, compress = FALSE, type = 2, penality = 0.2)
43 48
 pA@v$match
44 49
 ## plot the mass spectra
45 50
 par(mfrow=c(2,1))
46
-plotFrags(object=pd, sample=1, specID=10)
47
-plotFrags(object=pd, sample=2, specID=12)
51
+plotFrags(object=data, sample=1, specID=10)
52
+plotFrags(object=data, sample=2, specID=12)
48 53
 
49 54
 }
50 55
 \author{
... ...
@@ -1,13 +1,19 @@
1 1
 % Generated by roxygen2: do not edit by hand
2 2
 % Please edit documentation in R/peaksDataset.R
3
-\docType{methods}
4 3
 \name{plotImage}
5 4
 \alias{plotImage}
6 5
 \alias{plotImage,peaksDataset-method}
7 6
 \title{Plot of images of GCMS data}
8 7
 \usage{
9
-\S4method{plotImage}{peaksDataset}(object, run = 1, rtrange = c(11,
10
-  13), main = NULL, mzrange = c(50, 200), SCALE = log2, ...)
8
+\S4method{plotImage}{peaksDataset}(
9
+  object,
10
+  run = 1,
11
+  rtrange = c(11, 13),
12
+  main = NULL,
13
+  mzrange = c(50, 200),
14
+  SCALE = log2,
15
+  ...
16
+)
11 17
 }
12 18
 \arguments{
13 19
 \item{object}{a \code{peaksDataset} object}
... ...
@@ -7,8 +7,17 @@
7 7
 \alias{show,progressiveAlignment-method}
8 8
 \title{Data Structure for progressive alignment of many GCMS samples}
9 9
 \usage{
10
-progressiveAlignment(pD, cA, D = 50, gap = 0.5, verbose = TRUE,
11
-  usePeaks = TRUE, df = 30, compress = FALSE, type = 2)
10
+progressiveAlignment(
11
+  pD,
12
+  cA,
13
+  D = 50,
14
+  gap = 0.5,
15
+  verbose = TRUE,
16
+  usePeaks = TRUE,
17
+  df = 30,
18
+  compress = FALSE,
19
+  type = 2
20
+)
12 21
 }
13 22
 \arguments{
14 23
 \item{pD}{a \code{peaksDataset} object}
... ...
@@ -47,18 +56,22 @@ be found in the reference below.
47 56
 \examples{
48 57
 
49 58
 require(gcspikelite)
50
-## paths and files
51
-gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
52
-cdfFiles <- dir(gcmsPath, "CDF", full=TRUE)
53
-eluFiles <- dir(gcmsPath, "ELU", full=TRUE)
54
-
55
-## read data, peak detection results
56
-pd <- peaksDataset(cdfFiles[1:2], mz=seq(50,550), rtrange=c(7.5,8.5))
57
-pd <- addAMDISPeaks(pd, eluFiles[1:2])
58
-
59
-ca <- clusterAlignment(pd, gap=.5, D=.05, df=30, metric=1, type=1,
60
-                       compress = FALSE)
61
-pa <- progressiveAlignment(pd, ca, gap=.6, D=.1, df=30, type=1, compress = FALSE)
59
+files <- list.files(path = paste(find.package("gcspikelite"), "data",
60
+                    sep = "/"),"CDF", full = TRUE)
61
+data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
62
+## create settings object
63
+mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
64
+cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
65
+ prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
66
+ extendLengthMSW = TRUE, mzCenterFun = "wMean")
67
+data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
68
+ multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
69
+ list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
70
+data
71
+ca <- clusterAlignment(data, gap = 0.5, D = 0.05, df = 30, metric = 1,
72
+  type = 1, compress = FALSE)
73
+pa <- progressiveAlignment(data, ca, gap = 0.6, D = 0.1, df = 30,
74
+ type = 1, compress = FALSE)
62 75
 
63 76
 }
64 77
 \references{
... ...
@@ -31,19 +31,22 @@ different peaks.
31 31
 \examples{
32 32
 
33 33
 require(gcspikelite)
34
-# paths and files
35
-gcmsPath <- paste(find.package("gcspikelite"), "data", sep = "/")
36
-cdfFiles <- dir(gcmsPath,"CDF",full=TRUE)
37
-# read data, peak detection results
38
-pd <- peaksDataset(cdfFiles[1:2], mz=seq(50,550),
39
-                   rtrange=c(7.5,8.5))
40
-pd <- addXCMSPeaks(files=cdfFiles[1:2], object=pd,
41
-                   peakPicking=c('mF'), snthresh=3, fwhm=4,
42
-                   step=1, steps=2, mzdiff=0.5)
43
-ma <- multipleAlignment(pd = pd, group = c(1,1),
34
+files <- list.files(path = paste(find.package("gcspikelite"), "data",
35
+                    sep = "/"),"CDF", full = TRUE)
36
+data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
37
+## create settings object
38
+mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
39
+cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
40
+ prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
41
+ extendLengthMSW = TRUE, mzCenterFun = "wMean")
42
+data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
43
+ multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
44
+ list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
45
+data
46
+ma <- multipleAlignment(pd = data, group = c(1,1),
44 47
                         filterMin = 1, metric = 2, type = 2)
45
-outList <- gatherInfo(pd, ma)
46
-mtxD <- retFatMatrix(object = pd, data = outList, minFilter = 1)
48
+outList <- gatherInfo(data, ma)
49
+mtxD <- retFatMatrix(object = data, data = outList, minFilter = 1)
47 50
 
48 51
 }
49 52
 \seealso{
... ...
@@ -4,8 +4,15 @@
4 4
 \alias{rmaFitUnit}
5 5
 \title{Fits a robust linear model (RLM) for one metabolite}
6 6
 \usage{
7
-rmaFitUnit(u, maxit = 5, mzEffect = TRUE, cls = NULL,
8
-  fitSample = TRUE, fitOrCoef = c("coef", "fit"), TRANSFORM = log2)
7
+rmaFitUnit(
8
+  u,
9
+  maxit = 5,
10
+  mzEffect = TRUE,
11
+  cls = NULL,
12
+  fitSample = TRUE,
13
+  fitOrCoef = c("coef", "fit"),
14
+  TRANSFORM = log2
15
+)
9 16
 }
10 17
 \arguments{
11 18
 \item{u}{a metabolite unit (list object with vectors \code{mz} and \code{rt}
... ...
@@ -1,6 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2 2
 % Please edit documentation in R/multipleAlignment.R
3
-\docType{methods}
4 3
 \name{show,multipleAlignment-method}
5 4
 \alias{show,multipleAlignment-method}
6 5
 \title{Store the raw data and optionally, information regarding signal peaks for
7 6
deleted file mode 100644
... ...
@@ -1,16 +0,0 @@
1
-(TeX-add-style-hook
2
- "flagme"
3
- (lambda ()
4
-   (TeX-add-to-alist 'LaTeX-provided-package-options
5
-                     '(("caption" "tableposition=top") ("inputenc" "utf8")))
6
-   (TeX-run-style-hooks
7
-    "latex2e"
8
-    "article"
9
-    "art10"
10
-    "amsmath"
11
-    "amscd"
12
-    "caption"
13
-    "ifthen"
14
-    "inputenc"))
15
- :latex)
16
-
17 0
new file mode 100644
... ...
@@ -0,0 +1,416 @@
1
+\documentclass{article}
2
+
3
+\usepackage{amsmath}
4
+\usepackage{amscd}
5
+\usepackage[tableposition=top]{caption}
6
+\usepackage{ifthen}
7
+\usepackage[utf8]{inputenc}
8
+\topmargin 0in
9
+\headheight 0in
10
+\headsep 0in
11
+\oddsidemargin 0in
12
+\evensidemargin 0in
13
+\textwidth 176mm
14
+\textheight 215mm
15
+
16
+
17
+\begin{document}
18
+
19
+%\VignetteIndexEntry{Using flagme -- Fragment-level analysis of GC-MS-based metabolomics data}
20
+
21
+\title{\texttt{flagme}: Fragment-level analysis of \\ GC-MS-based 
22
+  metabolomics data}
23
+\author{Mark Robinson \\ \texttt{[email protected]} \\ Riccardo  
24
+  Romoli \\ \texttt{[email protected]}} 
25
+\maketitle
26
+
27
+
28
+\section{Introduction}
29
+\noindent This document gives a brief introduction to the
30
+\texttt{flagme} package, which is designed to process, visualise and
31
+statistically analyze sets of GC-MS samples. The ideas discussed here
32
+were originally designed with GC-MS-based metabolomics in mind, but
33
+indeed some of the methods and visualizations could be useful for
34
+LC-MS data sets. The {\em fragment-level analysis} though, takes
35
+advantage of the rich fragmentation patterns observed from electron
36
+interaction (EI) ionization. 
37
+
38
+There are many aspects of data processing for GC-MS data. Generally,
39
+algorithms are run separately on each sample to detect features, or
40
+{\em peaks} (e.g. AMDIS). Due to retention time shifts from
41
+run-to-run, an alignment algorithm is employed to allow the matching
42
+of the same feature across multiple samples.  Alternatively, if known
43
+standards are introduced to the samples, retention {\em indices} can
44
+be computed for each peak and used for alignment. After peaks are
45
+matched across all samples, further processing steps are employed to
46
+create a matrix of abundances, leading into detecting differences in
47
+abundance. 
48
+
49
+Many of these data processing steps are prone to errors and they often
50
+tend to be black boxes. But, with effective exploratory data
51
+analysis, many of the pitfalls can be avoided and any problems can be
52
+fixed before proceeding to the downstream statistical analysis. The
53
+package provides various visualizations to ensure the methods applied
54
+are not black boxes. 
55
+
56
+The \texttt{flagme} package gives a complete suite of methods to go
57
+through all common stages of data processing. In addition, R is
58
+especially well suited to the downstream data analysis tasks since it
59
+is very rich in analysis tools and has excellent visualization
60
+capabilities. In addition, it is freely available
61
+(\texttt{www.r-project.org}), extensible and there is a growing
62
+community of users and developers. For routine analyses, graphical
63
+user interfaces could be designed. 
64
+
65
+
66
+\section{Reading and visualizing GC-MS data}
67
+To run these examples, you must have the \texttt{gcspikelite} package
68
+installed.  This data package contains several GC-MS samples from a
69
+spike-in experiment we designed to interrogate data processing
70
+methods.  So, first, we load the packages: 
71
+
72
+<<libraries, echo=FALSE>>=
73
+require(gcspikelite)
74
+library(flagme)
75
+@
76
+
77
+
78
+To load the data and corresponding peak detection results, we simply
79
+create vectors of the file-names and create a \texttt{peakDataset}
80
+object. Note that we can speed up the import time by setting the
81
+retention time range to a subset of the elution, as below: 
82
+
83
+<<rawdata>>=
84
+gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
85
+data(targets)
86
+cdfFiles <- paste(gcmsPath, targets$FileName, sep="/")
87
+eluFiles <- gsub("CDF", "ELU", cdfFiles)
88
+pd <- peaksDataset(cdfFiles, mz=seq(50,550), rtrange=c(7.5,8.5))
89
+pd <- addAMDISPeaks(pd, eluFiles)
90
+pd
91
+@
92
+
93
+Here, we have added peaks from AMDIS, a well known and mature
94
+algorithm for deconvolution of GC-MS data. For GC-TOF-MS data, we have
95
+implemented a parser for the \texttt{ChromaTOF} output (see the
96
+analogous \texttt{addChromaTOFPeaks} function). The
97
+\texttt{addXCMSPeaks} allows to use all the XCMS peak-picking
98
+algorithms; using this approach it is also possible to elaborate the
99
+raw data file from within R instead of using an external software.
100
+%% Support for XMCS or MzMine may be added in the future. Ask the author
101
+%% if another detection result format is desired as the parsers are
102
+%% generally easy to design.   
103
+In particular the function reads the raw data using XCMS, group each extracted ion
104
+according to their retention time using CAMERA and attaches them to an
105
+already created \texttt{peaksDataset} object:
106
+
107
+<<addXCMS>>=
108
+pd.2 <- peaksDataset(cdfFiles[1:3], mz = seq(50, 550), rtrange = c(7.5, 8.5))
109
+cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
110
+  prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
111
+  extendLengthMSW = TRUE, mzCenterFun = "wMean")
112
+mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
113
+pd.2 <- addXCMSPeaks(cdfFiles[1:3], pd.2, settings = mfp, minintens = 100,
114
+  multipleMatchedFilter = FALSE, multipleMatchedFilterParam = 
115
+  list(fwhm = c(5, 10, 20), rt_abs = 2, mz_abs = 0.1)
116
+  )
117
+pd.2
118
+@ 
119
+
120
+The possibility to work using computer cluster will be added in the future. 
121
+
122
+Regardless of platform and peak detection algorithm, a useful
123
+visualization of a set of samples is the set of total ion currents
124
+(TIC), or extracted ion currents (XICs). To view TICs, you can call:
125
+
126
+<<plotexample1, fig.width=9, fig.height=7>>=
127
+plotChrom(pd, rtrange=c(7.5,8.5), plotPeaks=TRUE, plotPeakLabels=TRUE,
128
+     max.near=8, how.near=0.5, col=rep(c("blue","red","black"), each=3))
129
+@
130
+
131
+Note here the little {\em hashes} represent the detected peaks and are
132
+labelled with an integer index. One of the main challenges is to match
133
+these peak detections across several samples, given that the appear at
134
+slightly different times in different runs.
135
+
136
+For XICs, you need to give the indices (of \texttt{pd@mz}, the grid of
137
+mass-to-charge values) that you want to plot through the
138
+\texttt{mzind} argument.  This could be a single ion
139
+(e.g. \texttt{mzind=24}) or could be a range of indices if multiple
140
+ions are of interest (e.g. \texttt{mzind=c(24,25,98,99)}). 
141
+
142
+There are several other features within the \texttt{plot} command on
143
+\texttt{peaksDataset} objects that can be useful. See \texttt{?plot}
144
+(and select the flagme version) for full details. 
145
+
146
+Another useful visualization, at least for individual samples, is a 2D
147
+heatmap of intensity. Such plots can be enlightening, especially when
148
+peak detection results are overlaid. For example (with detected
149
+fragment peaks from AMDIS shown in white): 
150
+
151
+<<plotexample2,fig.width=9,fig.height=7>>=
152
+r <- 1
153
+plotImage(pd, run=r, rtrange=c(7.5,8.5), main="")
154
+v <- which(pd@peaksdata[[r]] > 0, arr.ind=TRUE) # find detected peaks
155
+abline(v=pd@peaksrt[[r]])
156
+points(pd@peaksrt[[r]][v[,2]], pd@mz[v[,1]], pch=19, cex=.6, col="white")
157
+@
158
+
159
+
160
+\section{Pairwise alignment with dynamic programming algorithm}
161
+One of the first challenges of GC-MS data is the matching of detected
162
+peaks (i.e. metabolites) across several samples. Although gas
163
+chromatography is quite robust, there can be some drift in the elution
164
+time of the same analyte from run to run. We have devised a strategy,
165
+based on dynamic programming, that takes into account both the
166
+similarity in spectrum (at the apex of the called peak) and the
167
+similarity in retention time, without requiring the identity of each
168
+peak; this matching uses the data alone. If each sample gives a `peak
169
+list' of the detected peaks (such as that from AMDIS that we have
170
+attached to our \texttt{peaksDataset} object), the challenge is to
171
+introduce gaps into these lists such that they are best aligned. From
172
+this a matrix of retention times or a matrix of peak abundances can be
173
+extracted for further statistical analysis, visualization and
174
+interpretation. For this matching, we created a procedure analogous to
175
+a multiple {\em sequence} alignment. 
176
+
177
+To highlight the dynamic programming-based alignment strategy, we
178
+first illustrate a pairwise alignment of two peak lists. This example
179
+also illustrates the selection of parameters necessary for the
180
+alignment. From the data read in previously, let us consider the
181
+alignment of two samples, denoted \texttt{0709\_468} and
182
+\texttt{0709\_474}. First, a similarity matrix for two samples is
183
+calculated. This is calculated based on a scoring function and takes
184
+into account the similarity in retention time and in the similarity of
185
+the apex spectra, according to: 
186
+\[
187
+S_{ij}(D) = \frac{\sum_{k=1}^K x_{ik} y_{jk}}{\sqrt{ \sum_{k=1}^K
188
+    x_{ik}^2 \cdot \sum_{k=1}^K y_{jk}^2 } } \cdot \exp \left( -
189
+  \frac{1}{2} \frac{(t_i-t_j)^2}{D^2} \right) 
190
+\]
191
+\noindent where $i$ is the index of the peak in the first sample and
192
+$j$ is the index of the peak in the second sample, $\mathbf{x}_i$ and
193
+$\mathbf{y}_j$ are the spectra vectors and $t_i$ and $t_j$ are their
194
+respective retention times. As you can see, there are two components
195
+to the similarity: spectra similarity (left term) and similarity in
196
+retention time (right term). Of course, other metrics for spectra
197
+similarity are feasible. Ask the author if you want to see other
198
+metrices implemented. We have some non-optimized code for a few
199
+alternative metrics. 
200
+
201
+The peak alignment algorithm, much like sequence alignments, requires
202
+a \texttt{gap} parameter to be set, here a number between 0 and 1.  A
203
+high gap penalty discourages gaps when matching the two lists of peaks
204
+and a low gap penalty allows gaps at a very low {\em cost}.  We find
205
+that a gap penalty in the middle range (0.4-0.6) works well for GC-MS
206
+peak matching.  Another parameter, \texttt{D}, modulates the impact of
207
+the difference in retention time penalty. A large value for
208
+\texttt{D} essentially eliminates the effect. Generally, we set this
209
+parameter to be a bit larger than the average width of a peak,
210
+allowing a little flexibility for retention time shifts between
211
+samples. Keep in mind the \texttt{D} parameter should be set on the
212
+scale (i.e. seconds or minutes) of the \texttt{peaksrt} slot of the
213
+\texttt{peaksDataset} object. The next example shows the effect of
214
+the \texttt{gap} and \texttt{D} penalty on the matching of a small
215
+ranges of peaks. 
216
+
217
+<<pairwisealignexample, fig.width=7, fig.height=7>>=
218
+Ds <- c(0.1, 10, 0.1, 0.1)
219
+gaps <- c(0.5, 0.5, 0.1, 0.9)
220
+par(mfrow=c(2,2), mai=c(0.8466,0.4806,0.4806,0.1486))
221
+for(i in 1:4){
222
+  pa <- peaksAlignment(pd@peaksdata[[1]], pd@peaksdata[[2]],
223
+                       pd@peaksrt[[1]], pd@peaksrt[[2]], D=Ds[i],
224
+                       gap=gaps[i], metric=1, type=1, compress = FALSE) 
225
+  plotAlignment(pa, xlim=c(0, 17), ylim=c(0, 16), matchCol="yellow",
226
+       main=paste("D=", Ds[i], " gap=", gaps[i], sep=""))
227
+}
228
+@
229
+
230
+You might ask: is the flagme package useful without peak detection
231
+results? Possibly. There have been some developments in alignment
232
+(generally on LC-MS proteomics experiments) without peak/feature
233
+detection, such as Prince et al. 2006, where a very similar dynamic
234
+programming is used for a pairwise alignment. We have experimented
235
+with alignments without using the peaks, but do not have any
236
+convincing results. It does introduce a new set of challenges in terms
237
+of highlighting differentially abundant metabolites. However, in the
238
+\texttt{peaksAlignment} routine above (and those mentioned below), you
239
+can set \texttt{usePeaks=FALSE} in order to do {\em scan}-based
240
+alignments instead of {\em peak}-based alignments. In addition, the
241
+\texttt{flagme} package may be useful simply for its bare-bones
242
+dynamic programming algorithm. 
243
+
244
+
245
+\subsection{Normalizing retention time score to drift estimates}
246
+In what is mentioned above for pairwise alignments, we are penalizing
247
+for differences in retention times that are non-zero. But, as you can
248
+see from the TICs, some differences in retention time are
249
+consistent. For example, all of the peaks from sample
250
+\texttt{0709\_485} elute at later times than peaks from sample
251
+\texttt{0709\_496}. We should be able to estimate this drift and
252
+normalize the time penalty to that estimate, instead of penalizing to
253
+zero. That is, we should replace $t_i-t_j$ with $t_i-t_j-\hat{d}_{ij}$
254
+where $\hat{d}_{ij}$ is the expected drift between peak $i$ of the
255
+first sample and peak $j$ of the second sample. 
256
+
257
+More details coming soon.
258
+
259
+
260
+\subsection{Imputing location of undetected peaks}
261
+One goal of the alignment leading into downstream data analyses is the
262
+generation of a table of abundances for each metabolite across all
263
+samples. As you can see from the TICs above, there are some low
264
+intensity peaks that fall below detection in some but not all
265
+samples. Our view is that instead of inserting arbitrary low constants
266
+(such as 0 or half the detection limit) or imputing the intensities
267
+post-hoc or having missing data in the data matrices, it is best to
268
+return to the area of the where the peak should be and give some kind
269
+of abundance. The alignments themselves are rich in information with
270
+respect to the location of undetected peaks. We feel this is a more
271
+conservative and statistically valid approach than introducing
272
+arbitrary values. 
273
+
274
+More details coming soon.
275
+
276
+
277
+\section{Multiple alignment of several experimental groups}
278
+Next, we discuss the multiple alignment of peaks across many
279
+samples. With replicates, we typically do an alignment within
280
+replicates, then combine these together into a summarized form. This
281
+cuts down on the computational cost. For example, consider 2 sets of
282
+samples, each with 5 replicates. Aligning first within replicates
283
+requires 10+10+1 total alignments whereas an all-pairwise alignment
284
+requires 45 pairwise alignments. In addition, this allows some
285
+flexibility in setting different gap and distance penalty parameters
286
+for the {\em within} alignment and {\em between} alignment. An
287
+example follows. 
288
+
289
+<<multiplealignment>>=
290
+print(targets)
291
+ma <- multipleAlignment(pd, group=targets$Group, wn.gap=0.5, wn.D=.05,
292
+                        bw.gap=.6, bw.D=0.05, usePeaks=TRUE, filterMin=1, 
293
+                        df=50, verbose=FALSE, metric = 1, type = 1) # bug
294
+ma
295
+@
296
+
297
+If you set \texttt{verbose=TRUE}, many nitty-gritty details of the
298
+alignment procedure are given.  Next, we can take the alignment
299
+results and overlay it onto the TICs, allowing a visual inspection. 
300
+
301
+<<multiplealignmentfig,fig.width=9,fig.height=7>>=
302
+plotChrom(pd, rtrange=c(7.5,8.5), runs=ma@betweenAlignment@runs,
303
+     mind=ma@betweenAlignment@ind, plotPeaks=TRUE,
304
+     plotPeakLabels=TRUE, max.near=8, how.near=.5,
305
+     col=rep(c("blue","red","black"), each=3))
306
+@
307
+
308
+
309
+% \section{Correlation Alignment algorithm}
310
+% Another approach, represented by the \texttt{correlationAlignment}
311
+% function, is to use a modified form of the Pearson correlation
312
+% algorithm. After the correlation between two samples is calculated, a
313
+% penalization coefficient, based on the retention time differences, is
314
+% applied to the result. It is also possible to set a retention time
315
+% range in which the penalization is 0, this because in gas
316
+% chromatography we can have a little deviation in the retention time of
317
+% the metabolite so, based on the experimental data, we can choose the
318
+% retention time window for the penalization coefficient being applied.
319
+
320
+<<correlationAlignment, eval=FALSE>>=
321
+mp <- correlationAlignment(object=pd.2, thr=0.85, D=20, penality=0.2,
322
+                           normalize=TRUE, minFilter=1)
323
+mp
324
+@ 
325
+
326
+% \noindent where \texttt{thr} represent correlation threshold from 0
327
+% (min) to 1 (max); \texttt{D} represent the retention time window in
328
+% seconds; \texttt{penality} represent the penality inflicted to a match
329
+% between two peaks when the retention time difference exceed the
330
+% parameter \texttt{D}; \texttt{normalize} is about the peak
331
+% normalization-to-100 before the correlation is calculated;
332
+% \texttt{minFilter} give the opportunity to exclude from the resulting
333
+% correlation matrix each feature that in represented in our samples
334
+% less time than this value. The value of minFilter must be smaller than
335
+% the number of samples. 
336
+
337
+% The correlation-based peak alignment for multiple GC-MS
338
+% peak lists uses a center-star technique to the alignment of the
339
+% peaks. The combination of the \texttt{D} and \texttt{penality} parameters
340
+% allow the users to force the algorithm to match the peaks close to the
341
+% reference. The \texttt{thr} parameter control the matching factor.
342
+
343
+
344
+\subsection{Gathering results}
345
+The alignment results can be extracted from the \texttt{multipleAlignment}
346
+object as: 
347
+<<multiplealignmentres>>=
348
+ma@betweenAlignment@runs
349
+ma@betweenAlignment@ind
350
+@
351
+
352
+\noindent This table would suggest that matched peak \texttt{8} (see
353
+numbers below the TICs in the figure above) corresponds to detected
354
+peaks \texttt{9, 12, 11} in runs \texttt{4, 5, 6} and so on, same as
355
+shown in the above plot. 
356
+
357
+In addition, you can gather a list of all the merged peaks with the
358
+\texttt{gatherInfo} function, giving elements for the retention times,
359
+the detected fragment ions and their intensities.  The example below
360
+also shows the how to construct a table of retention times of the
361
+matched peaks (No attempt is made here to adjust retention times onto
362
+a common scale.  Instead, the peaks are matched to each other on their
363
+original scale).  For example: 
364
+
365
+<<alignmentres>>=
366
+outList <- gatherInfo(pd,ma)
367
+outList[[8]]
368
+rtmat <- matrix(unlist(lapply(outList,.subset,"rt"), use.names=FALSE),
369
+                nr=length(outList), byrow=TRUE)
370
+colnames(rtmat) <- names(outList[[1]]$rt); rownames(rtmat) <- 1:nrow(rtmat)
371
+round(rtmat, 3)
372
+@
373
+
374
+
375
+\section{Future improvements and extension}
376
+There are many procedures that we have implemented in our
377
+investigation of GC-MS data, but have not made part of the package just
378
+yet. Some of the most useful procedures will be released, such as: 
379
+
380
+\begin{enumerate}
381
+\item Parsers for other peak detection algorithms (e.g. % XCMS,
382
+  MzMine) and parsers for other alignment procedures
383
+  (e.g. SpectConnect) and perhaps retention indices procedures. 
384
+\item More convenient access to the alignment information and
385
+  abundance table. 
386
+\item Statistical analysis of differential metabolite abundance.
387
+\item Fragment-level analysis, an alternative method to summarize
388
+  abundance across all detected fragments of a metabolite peak. 
389
+\end{enumerate}
390
+
391
+\section{References}
392
+See the following for further details:
393
+
394
+\begin{enumerate}
395
+\item Robinson MD. {\em Methods for the analysis of gas chromatography
396
+    - mass spectrometry data.} {\bf Ph.D. Thesis}. October 2008.
397
+  Department of Medical Biology (Walter and Eliza Hall Institute of
398
+  Medical Research), University of Melbourne. 
399
+\item Robinson MD, De Souza DP, Keen WW, Saunders EC, McConville MJ,
400
+  Speed TP, Liki\'{c} VA. (2007) {\em A dynamic programming approach
401
+    for the alignment of signal peaks in multiple gas
402
+    chromatography-mass spectrometry experiments.} {\bf BMC
403
+    Bioinformatics}. 8:419. 
404
+\item Prince JT, Marcotte EM (2006) {\em Chromatographic alignment of
405
+    ESI-LC-MS proteomics data sets by ordered bijective interpolated
406
+    warping}. {\bf Anal Chem}. 78(17):6140-52. 
407
+\end{enumerate}
408
+
409
+\section{This vignette was built with/at ...}
410
+
411
+<<session>>=
412
+sessionInfo()
413
+date()
414
+@
415
+
416
+\end{document}
... ...
@@ -16,7 +16,7 @@
16 16
 
17 17
 \begin{document}
18 18
 
19
-%\VignetteIndexEntry{Using flagme -- Fragment-level analysis of GC-MS-based metabolomics data}
19
+%\VignetteIndexEntry{\texttt{flagme}: Fragment-level analysis of \\ GC-MS-based metabolomics data}
20 20
 
21 21
 \title{\texttt{flagme}: Fragment-level analysis of \\ GC-MS-based 
22 22
   metabolomics data}
... ...
@@ -105,9 +105,15 @@ according to their retention time using CAMERA and attaches them to an
105 105
 already created \texttt{peaksDataset} object:
106 106
 
107 107
 <<addXCMS>>=
108
-pd.2 <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,8.5))
109
-pd.2 <- addXCMSPeaks(cdfFiles[1:3], pd.2, peakPicking=c('mF'),
110
-                     snthresh=3, fwhm=4, step=1, steps=2, mzdiff=0.5)
108
+pd.2 <- peaksDataset(cdfFiles[1:3], mz = seq(50, 550), rtrange = c(7.5, 8.5))
109
+cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
110
+  prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
111
+  extendLengthMSW = TRUE, mzCenterFun = "wMean")
112
+mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
113
+pd.2 <- addXCMSPeaks(cdfFiles[1:3], pd.2, settings = mfp, minintens = 100,
114
+  multipleMatchedFilter = FALSE, multipleMatchedFilterParam = 
115
+  list(fwhm = c(5, 10, 20), rt_abs = 2, mz_abs = 0.1)
116
+  )
111 117
 pd.2
112 118
 @ 
113 119
 
... ...
@@ -117,7 +123,7 @@ Regardless of platform and peak detection algorithm, a useful
117 123
 visualization of a set of samples is the set of total ion currents
118 124
 (TIC), or extracted ion currents (XICs). To view TICs, you can call:
119 125
 
120
-<<plotexample1, fig=TRUE, width=9, height=7>>=
126
+<<plotexample1, fig.width=9, fig.height=7>>=
121 127
 plotChrom(pd, rtrange=c(7.5,8.5), plotPeaks=TRUE, plotPeakLabels=TRUE,
122 128
      max.near=8, how.near=0.5, col=rep(c("blue","red","black"), each=3))
123 129
 @
... ...
@@ -142,7 +148,7 @@ heatmap of intensity. Such plots can be enlightening, especially when
142 148
 peak detection results are overlaid. For example (with detected
143 149
 fragment peaks from AMDIS shown in white): 
144 150
 
145
-<<plotexample2,fig=TRUE,width=9,height=7>>=
151
+<<plotexample2,fig.width=9,fig.height=7>>=
146 152
 r <- 1
147 153
 plotImage(pd, run=r, rtrange=c(7.5,8.5), main="")
148 154
 v <- which(pd@peaksdata[[r]] > 0, arr.ind=TRUE) # find detected peaks
... ...
@@ -208,7 +214,7 @@ scale (i.e. seconds or minutes) of the \texttt{peaksrt} slot of the
208 214
 the \texttt{gap} and \texttt{D} penalty on the matching of a small
209 215
 ranges of peaks. 
210 216
 
211
-<<pairwisealignexample, fig=TRUE, width=7, height=7>>=
217
+<<pairwisealignexample, fig.width=7, fig.height=7>>=
212 218
 Ds <- c(0.1, 10, 0.1, 0.1)
213 219
 gaps <- c(0.5, 0.5, 0.1, 0.9)
214 220
 par(mfrow=c(2,2), mai=c(0.8466,0.4806,0.4806,0.1486))
... ...
@@ -292,7 +298,7 @@ If you set \texttt{verbose=TRUE}, many nitty-gritty details of the
292 298
 alignment procedure are given.  Next, we can take the alignment
293 299
 results and overlay it onto the TICs, allowing a visual inspection. 
294 300
 
295
-<<multiplealignmentfig,fig=TRUE,width=9,height=7>>=
301
+<<multiplealignmentfig,fig.width=9,fig.height=7>>=
296 302
 plotChrom(pd, rtrange=c(7.5,8.5), runs=ma@betweenAlignment@runs,
297 303
      mind=ma@betweenAlignment@ind, plotPeaks=TRUE,
298 304
      plotPeakLabels=TRUE, max.near=8, how.near=.5,
299 305
Binary files a/vignettes/flagme.pdf and b/vignettes/flagme.pdf differ
300 306
new file mode 100644
... ...
@@ -0,0 +1,965 @@
1
+\documentclass{article}\usepackage[]{graphicx}\usepackage[]{color}
2
+% maxwidth is the original width if it is less than linewidth
3
+% otherwise use linewidth (to make sure the graphics do not exceed the margin)
4
+\makeatletter
5
+\def\maxwidth{ %
6
+  \ifdim\Gin@nat@width>\linewidth
7
+    \linewidth
8
+  \else
9
+    \Gin@nat@width
10
+  \fi
11
+}
12
+\makeatother
13
+
14
+\definecolor{fgcolor}{rgb}{0.345, 0.345, 0.345}
15
+\newcommand{\hlnum}[1]{\textcolor[rgb]{0.686,0.059,0.569}{#1}}%
16
+\newcommand{\hlstr}[1]{\textcolor[rgb]{0.192,0.494,0.8}{#1}}%
17
+\newcommand{\hlcom}[1]{\textcolor[rgb]{0.678,0.584,0.686}{\textit{#1}}}%
18
+\newcommand{\hlopt}[1]{\textcolor[rgb]{0,0,0}{#1}}%
19
+\newcommand{\hlstd}[1]{\textcolor[rgb]{0.345,0.345,0.345}{#1}}%
20
+\newcommand{\hlkwa}[1]{\textcolor[rgb]{0.161,0.373,0.58}{\textbf{#1}}}%
21
+\newcommand{\hlkwb}[1]{\textcolor[rgb]{0.69,0.353,0.396}{#1}}%
22
+\newcommand{\hlkwc}[1]{\textcolor[rgb]{0.333,0.667,0.333}{#1}}%
23
+\newcommand{\hlkwd}[1]{\textcolor[rgb]{0.737,0.353,0.396}{\textbf{#1}}}%
24
+\let\hlipl\hlkwb
25
+
26
+\usepackage{framed}
27
+\makeatletter
28
+\newenvironment{kframe}{%
29
+ \def\at@end@of@kframe{}%
30
+ \ifinner\ifhmode%
31
+  \def\at@end@of@kframe{\end{minipage}}%
32
+  \begin{minipage}{\columnwidth}%
33
+ \fi\fi%
34
+ \def\FrameCommand##1{\hskip\@totalleftmargin \hskip-\fboxsep
35
+ \colorbox{shadecolor}{##1}\hskip-\fboxsep
36
+     % There is no \\@totalrightmargin, so:
37
+     \hskip-\linewidth \hskip-\@totalleftmargin \hskip\columnwidth}%
38
+ \MakeFramed {\advance\hsize-\width
39
+   \@totalleftmargin\z@ \linewidth\hsize
40
+   \@setminipage}}%
41
+ {\par\unskip\endMakeFramed%
42
+ \at@end@of@kframe}
43
+\makeatother
44
+
45
+\definecolor{shadecolor}{rgb}{.97, .97, .97}
46
+\definecolor{messagecolor}{rgb}{0, 0, 0}
47
+\definecolor{warningcolor}{rgb}{1, 0, 1}
48
+\definecolor{errorcolor}{rgb}{1, 0, 0}
49
+\newenvironment{knitrout}{}{} % an empty environment to be redefined in TeX
50
+
51
+\usepackage{alltt}
52
+
53
+\usepackage{amsmath}
54
+\usepackage{amscd}
55
+\usepackage[tableposition=top]{caption}
56
+\usepackage{ifthen}
57
+\usepackage[utf8]{inputenc}
58
+\topmargin 0in
59
+\headheight 0in
60
+\headsep 0in
61
+\oddsidemargin 0in
62
+\evensidemargin 0in
63
+\textwidth 176mm
64
+\textheight 215mm
65
+\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
66
+\begin{document}
67
+
68
+%\VignetteIndexEntry{\texttt{flagme}: Fragment-level analysis of \\ GC-MS-based metabolomics data}
69
+
70
+\title{\texttt{flagme}: Fragment-level analysis of \\ GC-MS-based 
71
+  metabolomics data}
72
+\author{Mark Robinson \\ \texttt{[email protected]} \\ Riccardo  
73
+  Romoli \\ \texttt{[email protected]}} 
74
+\maketitle
75
+
76
+
77
+\section{Introduction}
78
+\noindent This document gives a brief introduction to the
79
+\texttt{flagme} package, which is designed to process, visualise and
80
+statistically analyze sets of GC-MS samples. The ideas discussed here
81
+were originally designed with GC-MS-based metabolomics in mind, but
82
+indeed some of the methods and visualizations could be useful for
83
+LC-MS data sets. The {\em fragment-level analysis} though, takes
84
+advantage of the rich fragmentation patterns observed from electron
85
+interaction (EI) ionization. 
86
+
87
+There are many aspects of data processing for GC-MS data. Generally,
88
+algorithms are run separately on each sample to detect features, or
89
+{\em peaks} (e.g. AMDIS). Due to retention time shifts from
90
+run-to-run, an alignment algorithm is employed to allow the matching
91
+of the same feature across multiple samples.  Alternatively, if known
92
+standards are introduced to the samples, retention {\em indices} can
93
+be computed for each peak and used for alignment. After peaks are
94
+matched across all samples, further processing steps are employed to
95
+create a matrix of abundances, leading into detecting differences in
96
+abundance. 
97
+
98
+Many of these data processing steps are prone to errors and they often
99
+tend to be black boxes. But, with effective exploratory data
100
+analysis, many of the pitfalls can be avoided and any problems can be
101
+fixed before proceeding to the downstream statistical analysis. The
102
+package provides various visualizations to ensure the methods applied
103
+are not black boxes. 
104
+
105
+The \texttt{flagme} package gives a complete suite of methods to go
106
+through all common stages of data processing. In addition, R is
107
+especially well suited to the downstream data analysis tasks since it
108
+is very rich in analysis tools and has excellent visualization
109
+capabilities. In addition, it is freely available
110
+(\texttt{www.r-project.org}), extensible and there is a growing
111
+community of users and developers. For routine analyses, graphical
112
+user interfaces could be designed. 
113
+
114
+
115
+\section{Reading and visualizing GC-MS data}
116
+To run these examples, you must have the \texttt{gcspikelite} package
117
+installed.  This data package contains several GC-MS samples from a
118
+spike-in experiment we designed to interrogate data processing
119
+methods.  So, first, we load the packages: 
120
+
121
+\begin{knitrout}
122
+\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
123
+
124
+
125
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: gcspikelite}}
126
+
127
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: xcms}}
128
+
129
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: BiocParallel}}
130
+
131
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: MSnbase}}
132
+
133
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: BiocGenerics}}
134
+
135
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: parallel}}
136
+
137
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# \\\#\# Attaching package: 'BiocGenerics'}}
138
+
139
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# The following objects are masked from 'package:parallel':\\\#\# \\\#\# \ \ \ \ clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,\\\#\# \ \ \ \ clusterExport, clusterMap, parApply, parCapply, parLapply,\\\#\# \ \ \ \ parLapplyLB, parRapply, parSapply, parSapplyLB}}
140
+
141
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# The following objects are masked from 'package:stats':\\\#\# \\\#\# \ \ \ \ IQR, mad, sd, var, xtabs}}
142
+
143
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# The following objects are masked from 'package:base':\\\#\# \\\#\# \ \ \ \ anyDuplicated, append, as.data.frame, basename, cbind, colnames,\\\#\# \ \ \ \ dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,\\\#\# \ \ \ \ grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,\\\#\# \ \ \ \ order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,\\\#\# \ \ \ \ rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,\\\#\# \ \ \ \ union, unique, unsplit, which.max, which.min}}
144
+
145
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: Biobase}}
146
+
147
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Welcome to Bioconductor\\\#\# \\\#\# \ \ \ \ Vignettes contain introductory material; view with\\\#\# \ \ \ \ 'browseVignettes()'. To cite Bioconductor, see\\\#\# \ \ \ \ 'citation("{}Biobase"{})', and for packages 'citation("{}pkgname"{})'.}}
148
+
149
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: mzR}}
150
+
151
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: Rcpp}}
152
+
153
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: S4Vectors}}
154
+
155
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: stats4}}
156
+
157
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# \\\#\# Attaching package: 'S4Vectors'}}
158
+
159
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# The following objects are masked from 'package:base':\\\#\# \\\#\# \ \ \ \ expand.grid, I, unname}}
160
+
161
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: ProtGenerics}}
162
+
163
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# \\\#\# Attaching package: 'ProtGenerics'}}
164
+
165
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# The following object is masked from 'package:stats':\\\#\# \\\#\# \ \ \ \ smooth}}
166
+
167
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# \\\#\# This is MSnbase version 2.18.0 \\\#\# \ \ Visit https://lgatto.github.io/MSnbase/ to get started.}}
168
+
169
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# \\\#\# Attaching package: 'MSnbase'}}
170
+
171
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# The following object is masked from 'package:base':\\\#\# \\\#\# \ \ \ \ trimws}}
172
+
173
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# \\\#\# This is xcms version 3.14.1}}
174
+
175
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# \\\#\# Attaching package: 'xcms'}}
176
+
177
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# The following object is masked from 'package:stats':\\\#\# \\\#\# \ \ \ \ sigma}}
178
+
179
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: CAMERA}}\end{kframe}
180
+\end{knitrout}
181
+
182
+
183
+To load the data and corresponding peak detection results, we simply
184
+create vectors of the file-names and create a \texttt{peakDataset}
185
+object. Note that we can speed up the import time by setting the
186
+retention time range to a subset of the elution, as below: 
187
+
188
+\begin{knitrout}
189
+\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
190
+\begin{alltt}
191
+\hlstd{gcmsPath} \hlkwb{<-} \hlkwd{paste}\hlstd{(}\hlkwd{find.package}\hlstd{(}\hlstr{"gcspikelite"}\hlstd{),} \hlstr{"data"}\hlstd{,} \hlkwc{sep}\hlstd{=}\hlstr{"/"}\hlstd{)}
192
+\hlkwd{data}\hlstd{(targets)}
193
+\hlstd{cdfFiles} \hlkwb{<-} \hlkwd{paste}\hlstd{(gcmsPath, targets}\hlopt{$}\hlstd{FileName,} \hlkwc{sep}\hlstd{=}\hlstr{"/"}\hlstd{)}
194
+\hlstd{eluFiles} \hlkwb{<-} \hlkwd{gsub}\hlstd{(}\hlstr{"CDF"}\hlstd{,} \hlstr{"ELU"}\hlstd{, cdfFiles)}
195
+\hlstd{pd} \hlkwb{<-} \hlkwd{peaksDataset}\hlstd{(cdfFiles,} \hlkwc{mz}\hlstd{=}\hlkwd{seq}\hlstd{(}\hlnum{50}\hlstd{,}\hlnum{550}\hlstd{),} \hlkwc{rtrange}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{7.5}\hlstd{,}\hlnum{8.5}\hlstd{))}
196
+\end{alltt}
197
+\begin{verbatim}
198
+##  Reading  /usr/local/lib/R/site-library/gcspikelite/data/0709_468.CDF
199
+\end{verbatim}
200
+
201
+
202
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{verbatim}
203
+##  Reading  /usr/local/lib/R/site-library/gcspikelite/data/0709_474.CDF
204
+\end{verbatim}
205
+
206
+
207
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{verbatim}
208
+##  Reading  /usr/local/lib/R/site-library/gcspikelite/data/0709_475.CDF
209
+\end{verbatim}
210
+
211
+
212
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{verbatim}
213
+##  Reading  /usr/local/lib/R/site-library/gcspikelite/data/0709_485.CDF
214
+\end{verbatim}
215
+
216
+
217
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{verbatim}
218
+##  Reading  /usr/local/lib/R/site-library/gcspikelite/data/0709_493.CDF
219
+\end{verbatim}
220
+
221
+
222
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{verbatim}
223
+##  Reading  /usr/local/lib/R/site-library/gcspikelite/data/0709_496.CDF
224
+\end{verbatim}
225
+
226
+
227
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{verbatim}
228
+##  Reading  /usr/local/lib/R/site-library/gcspikelite/data/0709_470.CDF
229
+\end{verbatim}
230
+
231
+
232
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{verbatim}
233
+##  Reading  /usr/local/lib/R/site-library/gcspikelite/data/0709_471.CDF
234
+\end{verbatim}
235
+
236
+
237
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{verbatim}
238
+##  Reading  /usr/local/lib/R/site-library/gcspikelite/data/0709_479.CDF
239
+\end{verbatim}
240
+
241
+
242
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{alltt}
243
+\hlstd{pd} \hlkwb{<-} \hlkwd{addAMDISPeaks}\hlstd{(pd, eluFiles)}
244
+\end{alltt}
245
+\begin{verbatim}
246
+## Reading retention time range: 7.500133 8.499917 
247
+## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_468.ELU ... Done.
248
+## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_474.ELU ... Done.
249
+## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_475.ELU ... Done.
250
+## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_485.ELU ... Done.
251
+## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_493.ELU ... Done.
252
+## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_496.ELU ... Done.
253
+## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_470.ELU ... Done.
254
+## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_471.ELU ... Done.
255
+## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_479.ELU ... Done.
256
+\end{verbatim}
257
+\begin{alltt}
258
+\hlstd{pd}
259
+\end{alltt}
260
+\begin{verbatim}
261
+## An object of class "peaksDataset"
262
+## 9 samples: 0709_468 0709_474 0709_475 0709_485 0709_493 0709_496 0709_470 0709_471 0709_479 
263
+## 501 m/z bins - range: ( 50 550 )
264
+## scans: 175 175 175 175 175 174 175 175 175 
265
+## peaks: 24 23 26 20 27 24 24 25 21
266
+\end{verbatim}
267
+\end{kframe}
268
+\end{knitrout}
269
+
270
+Here, we have added peaks from AMDIS, a well known and mature
271
+algorithm for deconvolution of GC-MS data. For GC-TOF-MS data, we have
272
+implemented a parser for the \texttt{ChromaTOF} output (see the
273
+analogous \texttt{addChromaTOFPeaks} function). The
274
+\texttt{addXCMSPeaks} allows to use all the XCMS peak-picking
275
+algorithms; using this approach it is also possible to elaborate the
276
+raw data file from within R instead of using an external software.
277
+%% Support for XMCS or MzMine may be added in the future. Ask the author
278
+%% if another detection result format is desired as the parsers are
279
+%% generally easy to design.   
280
+In particular the function reads the raw data using XCMS, group each extracted ion
281
+according to their retention time using CAMERA and attaches them to an
282
+already created \texttt{peaksDataset} object:
283
+
284
+\begin{knitrout}
285
+\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
286
+\begin{alltt}
287
+\hlstd{pd.2} \hlkwb{<-} \hlkwd{peaksDataset}\hlstd{(cdfFiles[}\hlnum{1}\hlopt{:}\hlnum{3}\hlstd{],} \hlkwc{mz} \hlstd{=} \hlkwd{seq}\hlstd{(}\hlnum{50}\hlstd{,} \hlnum{550}\hlstd{),} \hlkwc{rtrange} \hlstd{=} \hlkwd{c}\hlstd{(}\hlnum{7.5}\hlstd{,} \hlnum{8.5}\hlstd{))}
288
+\end{alltt}
289
+\begin{verbatim}
290
+##  Reading  /usr/local/lib/R/site-library/gcspikelite/data/0709_468.CDF
291
+\end{verbatim}
292
+
293
+
294
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{verbatim}
295
+##  Reading  /usr/local/lib/R/site-library/gcspikelite/data/0709_474.CDF
296
+\end{verbatim}
297
+
298
+
299
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{verbatim}
300
+##  Reading  /usr/local/lib/R/site-library/gcspikelite/data/0709_475.CDF
301
+\end{verbatim}
302
+
303
+
304
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{alltt}
305
+\hlstd{cwt} \hlkwb{<-} \hlstd{xcms}\hlopt{::}\hlkwd{CentWaveParam}\hlstd{(}\hlkwc{snthresh} \hlstd{=} \hlnum{3}\hlstd{,} \hlkwc{ppm} \hlstd{=} \hlnum{3000}\hlstd{,} \hlkwc{peakwidth} \hlstd{=} \hlkwd{c}\hlstd{(}\hlnum{3}\hlstd{,} \hlnum{40}\hlstd{),}
306
+  \hlkwc{prefilter} \hlstd{=} \hlkwd{c}\hlstd{(}\hlnum{3}\hlstd{,} \hlnum{100}\hlstd{),} \hlkwc{fitgauss} \hlstd{=} \hlnum{FALSE}\hlstd{,} \hlkwc{integrate} \hlstd{=} \hlnum{2}\hlstd{,} \hlkwc{noise} \hlstd{=} \hlnum{0}\hlstd{,}
307
+  \hlkwc{extendLengthMSW} \hlstd{=} \hlnum{TRUE}\hlstd{,} \hlkwc{mzCenterFun} \hlstd{=} \hlstr{"wMean"}\hlstd{)}
308
+\hlstd{mfp} \hlkwb{<-} \hlstd{xcms}\hlopt{::}\hlkwd{MatchedFilterParam}\hlstd{(}\hlkwc{fwhm} \hlstd{=} \hlnum{10}\hlstd{,} \hlkwc{snthresh} \hlstd{=} \hlnum{5}\hlstd{)}
309
+\hlstd{pd.2} \hlkwb{<-} \hlkwd{addXCMSPeaks}\hlstd{(cdfFiles[}\hlnum{1}\hlopt{:}\hlnum{3}\hlstd{], pd.2,} \hlkwc{settings} \hlstd{= mfp,} \hlkwc{minintens} \hlstd{=} \hlnum{100}\hlstd{,}
310
+  \hlkwc{multipleMatchedFilter} \hlstd{=} \hlnum{FALSE}\hlstd{,} \hlkwc{multipleMatchedFilterParam} \hlstd{=}
311
+  \hlkwd{list}\hlstd{(}\hlkwc{fwhm} \hlstd{=} \hlkwd{c}\hlstd{(}\hlnum{5}\hlstd{,} \hlnum{10}\hlstd{,} \hlnum{20}\hlstd{),} \hlkwc{rt_abs} \hlstd{=} \hlnum{2}\hlstd{,} \hlkwc{mz_abs} \hlstd{=} \hlnum{0.1}\hlstd{)}
312
+  \hlstd{)}
313
+\end{alltt}
314
+
315
+
316
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}
317
+
318
+{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in xcmsSet(x, method = "{}centWave"{}, prefilter = c(5, 100), scanrange = scanRange, : Chromatographic peak detection failed for all files! The first error was: Error in .local(object, ...): unused arguments (settings = new("{}MatchedFilterParam"{}, binSize = 0.1, impute = "{}none"{}, baseValue = numeric(0), distance = numeric(0), fwhm = 10, sigma = 4.24664515033124, max = 5, snthresh = 5, steps = 2, mzdiff = 0.6, index = FALSE), minintens = 100, multipleMatchedFilter = FALSE, multipleMatchedFilterParam = list(c(5, 10, 20), 2, 0.1))}}\begin{alltt}
319
+\hlstd{pd.2}
320
+\end{alltt}
321
+\begin{verbatim}
322
+## An object of class "peaksDataset"
323
+## 3 samples: 0709_468 0709_474 0709_475 
324
+## 501 m/z bins - range: ( 50 550 )
325
+## scans: 175 175 175 
326
+## peaks:
327
+\end{verbatim}
328
+\end{kframe}
329
+\end{knitrout}
330
+
331
+The possibility to work using computer cluster will be added in the future. 
332
+
333
+Regardless of platform and peak detection algorithm, a useful
334
+visualization of a set of samples is the set of total ion currents
335
+(TIC), or extracted ion currents (XICs). To view TICs, you can call:
336
+
337
+\begin{knitrout}
338
+\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
339
+\begin{alltt}
340
+\hlkwd{plotChrom}\hlstd{(pd,} \hlkwc{rtrange}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{7.5}\hlstd{,}\hlnum{8.5}\hlstd{),} \hlkwc{plotPeaks}\hlstd{=}\hlnum{TRUE}\hlstd{,} \hlkwc{plotPeakLabels}\hlstd{=}\hlnum{TRUE}\hlstd{,}
341
+     \hlkwc{max.near}\hlstd{=}\hlnum{8}\hlstd{,} \hlkwc{how.near}\hlstd{=}\hlnum{0.5}\hlstd{,} \hlkwc{col}\hlstd{=}\hlkwd{rep}\hlstd{(}\hlkwd{c}\hlstd{(}\hlstr{"blue"}\hlstd{,}\hlstr{"red"}\hlstd{,}\hlstr{"black"}\hlstd{),} \hlkwc{each}\hlstd{=}\hlnum{3}\hlstd{))}
342
+\end{alltt}
343
+
344
+
345
+{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in (function (classes, fdef, mtable) : unable to find an inherited method for function 'plotChrom' for signature '"{}peaksDataset"{}'}}\end{kframe}
346
+\end{knitrout}
347
+
348
+Note here the little {\em hashes} represent the detected peaks and are
349
+labelled with an integer index. One of the main challenges is to match
350
+these peak detections across several samples, given that the appear at
351
+slightly different times in different runs.
352
+
353
+For XICs, you need to give the indices (of \texttt{pd@mz}, the grid of
354
+mass-to-charge values) that you want to plot through the
355
+\texttt{mzind} argument.  This could be a single ion
356
+(e.g. \texttt{mzind=24}) or could be a range of indices if multiple
357
+ions are of interest (e.g. \texttt{mzind=c(24,25,98,99)}). 
358
+
359
+There are several other features within the \texttt{plot} command on
360
+\texttt{peaksDataset} objects that can be useful. See \texttt{?plot}
361
+(and select the flagme version) for full details. 
362
+
363
+Another useful visualization, at least for individual samples, is a 2D
364
+heatmap of intensity. Such plots can be enlightening, especially when
365
+peak detection results are overlaid. For example (with detected
366
+fragment peaks from AMDIS shown in white): 
367
+
368
+\begin{knitrout}
369
+\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
370
+\begin{alltt}
371
+\hlstd{r} \hlkwb{<-} \hlnum{1}
372
+\hlkwd{plotImage}\hlstd{(pd,} \hlkwc{run}\hlstd{=r,} \hlkwc{rtrange}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{7.5}\hlstd{,}\hlnum{8.5}\hlstd{),} \hlkwc{main}\hlstd{=}\hlstr{""}\hlstd{)}
373
+\hlstd{v} \hlkwb{<-} \hlkwd{which}\hlstd{(pd}\hlopt{@}\hlkwc{peaksdata}\hlstd{[[r]]} \hlopt{>} \hlnum{0}\hlstd{,} \hlkwc{arr.ind}\hlstd{=}\hlnum{TRUE}\hlstd{)} \hlcom{# find detected peaks}
374
+\hlkwd{abline}\hlstd{(}\hlkwc{v}\hlstd{=pd}\hlopt{@}\hlkwc{peaksrt}\hlstd{[[r]])}
375
+\hlkwd{points}\hlstd{(pd}\hlopt{@}\hlkwc{peaksrt}\hlstd{[[r]][v[,}\hlnum{2}\hlstd{]], pd}\hlopt{@}\hlkwc{mz}\hlstd{[v[,}\hlnum{1}\hlstd{]],} \hlkwc{pch}\hlstd{=}\hlnum{19}\hlstd{,} \hlkwc{cex}\hlstd{=}\hlnum{.6}\hlstd{,} \hlkwc{col}\hlstd{=}\hlstr{"white"}\hlstd{)}
376
+\end{alltt}
377
+\end{kframe}
378
+\includegraphics[width=\maxwidth]{figure/plotexample2-1} 
379
+\end{knitrout}
380
+
381
+
382
+\section{Pairwise alignment with dynamic programming algorithm}
383
+One of the first challenges of GC-MS data is the matching of detected
384
+peaks (i.e. metabolites) across several samples. Although gas
385
+chromatography is quite robust, there can be some drift in the elution
386
+time of the same analyte from run to run. We have devised a strategy,
387
+based on dynamic programming, that takes into account both the
388
+similarity in spectrum (at the apex of the called peak) and the
389
+similarity in retention time, without requiring the identity of each
390
+peak; this matching uses the data alone. If each sample gives a `peak
391
+list' of the detected peaks (such as that from AMDIS that we have
392
+attached to our \texttt{peaksDataset} object), the challenge is to
393
+introduce gaps into these lists such that they are best aligned. From
394
+this a matrix of retention times or a matrix of peak abundances can be
395
+extracted for further statistical analysis, visualization and
396
+interpretation. For this matching, we created a procedure analogous to
397
+a multiple {\em sequence} alignment. 
398
+
399
+To highlight the dynamic programming-based alignment strategy, we
400
+first illustrate a pairwise alignment of two peak lists. This example
401
+also illustrates the selection of parameters necessary for the
402
+alignment. From the data read in previously, let us consider the
403
+alignment of two samples, denoted \texttt{0709\_468} and
404
+\texttt{0709\_474}. First, a similarity matrix for two samples is
405
+calculated. This is calculated based on a scoring function and takes
406
+into account the similarity in retention time and in the similarity of
407
+the apex spectra, according to: 
408
+\[
409
+S_{ij}(D) = \frac{\sum_{k=1}^K x_{ik} y_{jk}}{\sqrt{ \sum_{k=1}^K
410
+    x_{ik}^2 \cdot \sum_{k=1}^K y_{jk}^2 } } \cdot \exp \left( -
411
+  \frac{1}{2} \frac{(t_i-t_j)^2}{D^2} \right) 
412
+\]
413
+\noindent where $i$ is the index of the peak in the first sample and
414
+$j$ is the index of the peak in the second sample, $\mathbf{x}_i$ and
415
+$\mathbf{y}_j$ are the spectra vectors and $t_i$ and $t_j$ are their
416
+respective retention times. As you can see, there are two components
417
+to the similarity: spectra similarity (left term) and similarity in
418
+retention time (right term). Of course, other metrics for spectra
419
+similarity are feasible. Ask the author if you want to see other
420
+metrices implemented. We have some non-optimized code for a few
421
+alternative metrics. 
422
+
423
+The peak alignment algorithm, much like sequence alignments, requires
424
+a \texttt{gap} parameter to be set, here a number between 0 and 1.  A
425
+high gap penalty discourages gaps when matching the two lists of peaks
426
+and a low gap penalty allows gaps at a very low {\em cost}.  We find
427
+that a gap penalty in the middle range (0.4-0.6) works well for GC-MS
428
+peak matching.  Another parameter, \texttt{D}, modulates the impact of
429
+the difference in retention time penalty. A large value for
430
+\texttt{D} essentially eliminates the effect. Generally, we set this
431
+parameter to be a bit larger than the average width of a peak,
432
+allowing a little flexibility for retention time shifts between
433
+samples. Keep in mind the \texttt{D} parameter should be set on the
434
+scale (i.e. seconds or minutes) of the \texttt{peaksrt} slot of the
435
+\texttt{peaksDataset} object. The next example shows the effect of
436
+the \texttt{gap} and \texttt{D} penalty on the matching of a small
437
+ranges of peaks. 
438
+
439
+\begin{knitrout}
440
+\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
441
+\begin{alltt}
442
+\hlstd{Ds} \hlkwb{<-} \hlkwd{c}\hlstd{(}\hlnum{0.1}\hlstd{,} \hlnum{10}\hlstd{,} \hlnum{0.1}\hlstd{,} \hlnum{0.1}\hlstd{)}
443
+\hlstd{gaps} \hlkwb{<-} \hlkwd{c}\hlstd{(}\hlnum{0.5}\hlstd{,} \hlnum{0.5}\hlstd{,} \hlnum{0.1}\hlstd{,} \hlnum{0.9}\hlstd{)}
444
+\hlkwd{par}\hlstd{(}\hlkwc{mfrow}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{2}\hlstd{,}\hlnum{2}\hlstd{),} \hlkwc{mai}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{0.8466}\hlstd{,}\hlnum{0.4806}\hlstd{,}\hlnum{0.4806}\hlstd{,}\hlnum{0.1486}\hlstd{))}
445
+\hlkwa{for}\hlstd{(i} \hlkwa{in} \hlnum{1}\hlopt{:}\hlnum{4}\hlstd{)\{}
446
+  \hlstd{pa} \hlkwb{<-} \hlkwd{peaksAlignment}\hlstd{(pd}\hlopt{@}\hlkwc{peaksdata}\hlstd{[[}\hlnum{1}\hlstd{]], pd}\hlopt{@}\hlkwc{peaksdata}\hlstd{[[}\hlnum{2}\hlstd{]],}
447
+                       \hlstd{pd}\hlopt{@}\hlkwc{peaksrt}\hlstd{[[}\hlnum{1}\hlstd{]], pd}\hlopt{@}\hlkwc{peaksrt}\hlstd{[[}\hlnum{2}\hlstd{]],} \hlkwc{D}\hlstd{=Ds[i],}
448
+                       \hlkwc{gap}\hlstd{=gaps[i],} \hlkwc{metric}\hlstd{=}\hlnum{1}\hlstd{,} \hlkwc{type}\hlstd{=}\hlnum{1}\hlstd{,} \hlkwc{compress} \hlstd{=} \hlnum{FALSE}\hlstd{)}
449
+  \hlkwd{plotAlignment}\hlstd{(pa,} \hlkwc{xlim}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{0}\hlstd{,} \hlnum{17}\hlstd{),} \hlkwc{ylim}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{0}\hlstd{,} \hlnum{16}\hlstd{),} \hlkwc{matchCol}\hlstd{=}\hlstr{"yellow"}\hlstd{,}
450
+       \hlkwc{main}\hlstd{=}\hlkwd{paste}\hlstd{(}\hlstr{"D="}\hlstd{, Ds[i],} \hlstr{" gap="}\hlstd{, gaps[i],} \hlkwc{sep}\hlstd{=}\hlstr{""}\hlstd{))}
451
+\hlstd{\}}
452
+\end{alltt}
453
+\begin{verbatim}
454
+## [peaksAlignment] Comparing 24 peaks to 23 peaks -- gap= 0.5 D= 0.001 , metric= 1 , type= 1 
455
+## [peaksAlignment]  21 matched.  Similarity= 0.7983038
456
+\end{verbatim}
457
+
458
+
459
+{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in plotAlignment(pa, xlim = c(0, 17), ylim = c(0, 16), matchCol = "{}yellow"{}, : could not find function "{}plotAlignment"{}}}\end{kframe}
460
+\end{knitrout}
461
+
462
+You might ask: is the flagme package useful without peak detection
463
+results? Possibly. There have been some developments in alignment
464
+(generally on LC-MS proteomics experiments) without peak/feature
465
+detection, such as Prince et al. 2006, where a very similar dynamic
466
+programming is used for a pairwise alignment. We have experimented
467
+with alignments without using the peaks, but do not have any
468
+convincing results. It does introduce a new set of challenges in terms
469
+of highlighting differentially abundant metabolites. However, in the
470
+\texttt{peaksAlignment} routine above (and those mentioned below), you
471
+can set \texttt{usePeaks=FALSE} in order to do {\em scan}-based
472
+alignments instead of {\em peak}-based alignments. In addition, the
473
+\texttt{flagme} package may be useful simply for its bare-bones
474
+dynamic programming algorithm. 
475
+
476
+
477
+\subsection{Normalizing retention time score to drift estimates}
478
+In what is mentioned above for pairwise alignments, we are penalizing
479
+for differences in retention times that are non-zero. But, as you can
480
+see from the TICs, some differences in retention time are
481
+consistent. For example, all of the peaks from sample
482
+\texttt{0709\_485} elute at later times than peaks from sample
483
+\texttt{0709\_496}. We should be able to estimate this drift and
484
+normalize the time penalty to that estimate, instead of penalizing to
485
+zero. That is, we should replace $t_i-t_j$ with $t_i-t_j-\hat{d}_{ij}$
486
+where $\hat{d}_{ij}$ is the expected drift between peak $i$ of the
487
+first sample and peak $j$ of the second sample. 
488
+
489
+More details coming soon.
490
+
491
+
492
+\subsection{Imputing location of undetected peaks}
493
+One goal of the alignment leading into downstream data analyses is the
494
+generation of a table of abundances for each metabolite across all
495
+samples. As you can see from the TICs above, there are some low
496
+intensity peaks that fall below detection in some but not all
497
+samples. Our view is that instead of inserting arbitrary low constants
498
+(such as 0 or half the detection limit) or imputing the intensities
499
+post-hoc or having missing data in the data matrices, it is best to
500
+return to the area of the where the peak should be and give some kind
501
+of abundance. The alignments themselves are rich in information with
502
+respect to the location of undetected peaks. We feel this is a more
503
+conservative and statistically valid approach than introducing
504
+arbitrary values. 
505
+
506
+More details coming soon.
507
+
508
+
509
+\section{Multiple alignment of several experimental groups}
510
+Next, we discuss the multiple alignment of peaks across many
511
+samples. With replicates, we typically do an alignment within
512
+replicates, then combine these together into a summarized form. This
513
+cuts down on the computational cost. For example, consider 2 sets of
514
+samples, each with 5 replicates. Aligning first within replicates
515
+requires 10+10+1 total alignments whereas an all-pairwise alignment
516
+requires 45 pairwise alignments. In addition, this allows some
517
+flexibility in setting different gap and distance penalty parameters
518
+for the {\em within} alignment and {\em between} alignment. An
519
+example follows. 
520
+
521
+\begin{knitrout}
522
+\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
523
+\begin{alltt}
524
+\hlkwd{print}\hlstd{(targets)}
525
+\end{alltt}
526
+\begin{verbatim}
527
+##       FileName Group
528
+## 1 0709_468.CDF   mmA
529
+## 2 0709_474.CDF   mmA
530
+## 3 0709_475.CDF   mmA
531
+## 4 0709_485.CDF   mmC
532
+## 5 0709_493.CDF   mmC
533
+## 6 0709_496.CDF   mmC
534
+## 7 0709_470.CDF   mmD
535
+## 8 0709_471.CDF   mmD
536
+## 9 0709_479.CDF   mmD
537
+\end{verbatim}
538
+\begin{alltt}
539
+\hlstd{ma} \hlkwb{<-} \hlkwd{multipleAlignment}\hlstd{(pd,} \hlkwc{group}\hlstd{=targets}\hlopt{$}\hlstd{Group,} \hlkwc{wn.gap}\hlstd{=}\hlnum{0.5}\hlstd{,} \hlkwc{wn.D}\hlstd{=}\hlnum{.05}\hlstd{,}
540
+                        \hlkwc{bw.gap}\hlstd{=}\hlnum{.6}\hlstd{,} \hlkwc{bw.D}\hlstd{=}\hlnum{0.05}\hlstd{,} \hlkwc{usePeaks}\hlstd{=}\hlnum{TRUE}\hlstd{,} \hlkwc{filterMin}\hlstd{=}\hlnum{1}\hlstd{,}
541
+                        \hlkwc{df}\hlstd{=}\hlnum{50}\hlstd{,} \hlkwc{verbose}\hlstd{=}\hlnum{FALSE}\hlstd{,} \hlkwc{metric} \hlstd{=} \hlnum{1}\hlstd{,} \hlkwc{type} \hlstd{=} \hlnum{1}\hlstd{)} \hlcom{# bug}
542
+\end{alltt}
543
+\begin{verbatim}
544
+## [clusterAlignment] Aligning 0709_468 to 0709_474 
545
+## [peaksAlignment] Comparing 24 peaks to 23 peaks -- gap= 0.5 D= 5e-04 , metric= 1 , type= 1 
546
+## [peaksAlignment]  22 matched.  Similarity= 0.8625793 
547
+## [clusterAlignment] Aligning 0709_468 to 0709_475 
548
+## [peaksAlignment] Comparing 24 peaks to 26 peaks -- gap= 0.5 D= 5e-04 , metric= 1 , type= 1 
549
+## [peaksAlignment]  15 matched.  Similarity= 0.8 
550
+## [clusterAlignment] Aligning 0709_474 to 0709_475 
551
+## [peaksAlignment] Comparing 23 peaks to 26 peaks -- gap= 0.5 D= 5e-04 , metric= 1 , type= 1 
552
+## [peaksAlignment]  20 matched.  Similarity= 0.899699 
553
+## [progressiveAlignment] Doing merge -1 -3 
554
+## [progressiveAlignment] left.runs: 1 , right.runs: 3 
555
+## [progressiveAlignment] Doing merge -2 1 
556
+## [progressiveAlignment] left.runs: 2 , right.runs: 1 3 
557
+## [progressiveAlignment] (dot=50) going to 23 :
558
+##            used  (Mb) gc trigger  (Mb) max used  (Mb)
559
+## Ncells  7846781 419.1   12592810 672.6 12592810 672.6
560
+## Vcells 14553341 111.1   38545179 294.1 38545179 294.1
561
+## [clusterAlignment] Aligning 0709_485 to 0709_493 
562
+## [peaksAlignment] Comparing 20 peaks to 27 peaks -- gap= 0.5 D= 5e-04 , metric= 1 , type= 1 
563
+## [peaksAlignment]  20 matched.  Similarity= 0.9354748 
564
+## [clusterAlignment] Aligning 0709_485 to 0709_496 
565
+## [peaksAlignment] Comparing 20 peaks to 24 peaks -- gap= 0.5 D= 5e-04 , metric= 1 , type= 1 
566
+## [peaksAlignment]  20 matched.  Similarity= 0.9359244 
567
+## [clusterAlignment] Aligning 0709_493 to 0709_496 
568
+## [peaksAlignment] Comparing 27 peaks to 24 peaks -- gap= 0.5 D= 5e-04 , metric= 1 , type= 1 
569
+## [peaksAlignment]  22 matched.  Similarity= 0.8515771 
570
+## [progressiveAlignment] Doing merge -5 -6 
571
+## [progressiveAlignment] left.runs: 5 , right.runs: 6 
572
+## [progressiveAlignment] Doing merge -4 1 
573
+## [progressiveAlignment] left.runs: 4 , right.runs: 5 6 
574
+## [progressiveAlignment] (dot=50) going to 20 :
575
+##            used  (Mb) gc trigger  (Mb) max used  (Mb)
576
+## Ncells  7846949 419.1   12592810 672.6 12592810 672.6
577
+## Vcells 14554064 111.1   38545179 294.1 38545179 294.1
578
+## [clusterAlignment] Aligning 0709_470 to 0709_471 
579
+## [peaksAlignment] Comparing 24 peaks to 25 peaks -- gap= 0.5 D= 5e-04 , metric= 1 , type= 1 
580
+## [peaksAlignment]  22 matched.  Similarity= 0.8879722 
581
+## [clusterAlignment] Aligning 0709_470 to 0709_479 
582
+## [peaksAlignment] Comparing 24 peaks to 21 peaks -- gap= 0.5 D= 5e-04 , metric= 1 , type= 1 
583
+## [peaksAlignment]  21 matched.  Similarity= 0.8564714 
584
+## [clusterAlignment] Aligning 0709_471 to 0709_479 
585
+## [peaksAlignment] Comparing 25 peaks to 21 peaks -- gap= 0.5 D= 5e-04 , metric= 1 , type= 1 
586
+## [peaksAlignment]  19 matched.  Similarity= 0.8258931 
587
+## [progressiveAlignment] Doing merge -8 -9 
588
+## [progressiveAlignment] left.runs: 8 , right.runs: 9 
589
+## [progressiveAlignment] Doing merge -7 1 
590
+## [progressiveAlignment] left.runs: 7 , right.runs: 8 9 
591
+## [progressiveAlignment] (dot=50) going to 24 :
592
+##            used  (Mb) gc trigger  (Mb) max used  (Mb)
593
+## Ncells  7847143 419.1   12592810 672.6 12592810 672.6
594
+## Vcells 14555317 111.1   38545179 294.1 38545179 294.1
595
+## [clusterAlignment] Aligning to 
596
+## [peaksAlignment] Comparing 36 peaks to 29 peaks -- gap= 0.6 D= 5e-04 , metric= 1 , type= 1 
597
+## [peaksAlignment]  25 matched.  Similarity= 0.9094807 
598
+## [clusterAlignment] Aligning to 
599
+## [peaksAlignment] Comparing 36 peaks to 29 peaks -- gap= 0.6 D= 5e-04 , metric= 1 , type= 1 
600
+## [peaksAlignment]  29 matched.  Similarity= 0.8798354 
601
+## [clusterAlignment] Aligning to 
602
+## [peaksAlignment] Comparing 29 peaks to 29 peaks -- gap= 0.6 D= 5e-04 , metric= 1 , type= 1 
603
+## [peaksAlignment]  29 matched.  Similarity= 0.9655151 
604
+## [progressiveAlignment] Doing merge -1 -3 
605
+## [progressiveAlignment] left.runs: 1 , right.runs: 3 
606
+## [progressiveAlignment] Doing merge -2 1 
607
+## [progressiveAlignment] left.runs: 2 , right.runs: 1 3 
608
+## [progressiveAlignment] (dot=50) going to 29 :
609
+##            used  (Mb) gc trigger  (Mb) max used  (Mb)
610
+## Ncells  7848679 419.2   12592810 672.6 12592810 672.6
611
+## Vcells 14607964 111.5   38545179 294.1 38545179 294.1
612
+\end{verbatim}
613
+\begin{alltt}
614
+\hlstd{ma}
615
+\end{alltt}
616
+\begin{verbatim}
617
+## An object of class "multipleAlignment"
618
+## 3 groups: 3 3 3 samples, respectively.
619
+## 36 merged peaks
620
+\end{verbatim}
621
+\end{kframe}
622
+\end{knitrout}
623
+
624
+If you set \texttt{verbose=TRUE}, many nitty-gritty details of the
625
+alignment procedure are given.  Next, we can take the alignment
626
+results and overlay it onto the TICs, allowing a visual inspection. 
627
+
628
+\begin{knitrout}
629
+\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
630
+\begin{alltt}
631
+\hlkwd{plotChrom}\hlstd{(pd,} \hlkwc{rtrange}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{7.5}\hlstd{,}\hlnum{8.5}\hlstd{),} \hlkwc{runs}\hlstd{=ma}\hlopt{@}\hlkwc{betweenAlignment}\hlopt{@}\hlkwc{runs}\hlstd{,}
632
+     \hlkwc{mind}\hlstd{=ma}\hlopt{@}\hlkwc{betweenAlignment}\hlopt{@}\hlkwc{ind}\hlstd{,} \hlkwc{plotPeaks}\hlstd{=}\hlnum{TRUE}\hlstd{,}
633
+     \hlkwc{plotPeakLabels}\hlstd{=}\hlnum{TRUE}\hlstd{,} \hlkwc{max.near}\hlstd{=}\hlnum{8}\hlstd{,} \hlkwc{how.near}\hlstd{=}\hlnum{.5}\hlstd{,}
634
+     \hlkwc{col}\hlstd{=}\hlkwd{rep}\hlstd{(}\hlkwd{c}\hlstd{(}\hlstr{"blue"}\hlstd{,}\hlstr{"red"}\hlstd{,}\hlstr{"black"}\hlstd{),} \hlkwc{each}\hlstd{=}\hlnum{3}\hlstd{))}
635
+\end{alltt}
636
+
637
+
638
+{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in (function (classes, fdef, mtable) : unable to find an inherited method for function 'plotChrom' for signature '"{}peaksDataset"{}'}}\end{kframe}
639
+\end{knitrout}
640
+
641
+
642
+% \section{Correlation Alignment algorithm}
643
+% Another approach, represented by the \texttt{correlationAlignment}
644
+% function, is to use a modified form of the Pearson correlation
645
+% algorithm. After the correlation between two samples is calculated, a
646
+% penalization coefficient, based on the retention time differences, is
647
+% applied to the result. It is also possible to set a retention time
648
+% range in which the penalization is 0, this because in gas
649
+% chromatography we can have a little deviation in the retention time of
650
+% the metabolite so, based on the experimental data, we can choose the
651
+% retention time window for the penalization coefficient being applied.
652
+
653
+\begin{knitrout}
654
+\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
655
+\begin{alltt}
656
+\hlstd{mp} \hlkwb{<-} \hlkwd{correlationAlignment}\hlstd{(}\hlkwc{object}\hlstd{=pd.2,} \hlkwc{thr}\hlstd{=}\hlnum{0.85}\hlstd{,} \hlkwc{D}\hlstd{=}\hlnum{20}\hlstd{,} \hlkwc{penality}\hlstd{=}\hlnum{0.2}\hlstd{,}
657
+                           \hlkwc{normalize}\hlstd{=}\hlnum{TRUE}\hlstd{,} \hlkwc{minFilter}\hlstd{=}\hlnum{1}\hlstd{)}
658
+\hlstd{mp}
659
+\end{alltt}
660
+\end{kframe}
661
+\end{knitrout}
662
+
663
+% \noindent where \texttt{thr} represent correlation threshold from 0
664
+% (min) to 1 (max); \texttt{D} represent the retention time window in
665
+% seconds; \texttt{penality} represent the penality inflicted to a match
666
+% between two peaks when the retention time difference exceed the
667
+% parameter \texttt{D}; \texttt{normalize} is about the peak
668
+% normalization-to-100 before the correlation is calculated;
669
+% \texttt{minFilter} give the opportunity to exclude from the resulting
670
+% correlation matrix each feature that in represented in our samples
671
+% less time than this value. The value of minFilter must be smaller than
672
+% the number of samples. 
673
+
674
+% The correlation-based peak alignment for multiple GC-MS
675
+% peak lists uses a center-star technique to the alignment of the
676
+% peaks. The combination of the \texttt{D} and \texttt{penality} parameters
677
+% allow the users to force the algorithm to match the peaks close to the
678
+% reference. The \texttt{thr} parameter control the matching factor.
679
+
680
+
681
+\subsection{Gathering results}
682
+The alignment results can be extracted from the \texttt{multipleAlignment}
683
+object as: 
684
+\begin{knitrout}
685
+\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
686
+\begin{alltt}
687
+\hlstd{ma}\hlopt{@}\hlkwc{betweenAlignment}\hlopt{@}\hlkwc{runs}
688
+\end{alltt}
689
+\begin{verbatim}
690
+## [1] 4 5 6 2 1 3 7 8 9
691
+\end{verbatim}
692
+\begin{alltt}
693
+\hlstd{ma}\hlopt{@}\hlkwc{betweenAlignment}\hlopt{@}\hlkwc{ind}
694
+\end{alltt}
695
+\begin{verbatim}
696
+##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
697
+##  [1,]    1    1    1    1    1    1    1    1    1
698
+##  [2,]   NA    2   NA   NA    2   NA    2    2   NA
699
+##  [3,]   NA    3    2    2    3    2    3    3   NA
700
+##  [4,]    2    4    3    3    4    3    4    4    2
701
+##  [5,]    3    5    4    4    5    4    5    5    3
702
+##  [6,]    4    6    5    5    6    5    6    6    4
703
+##  [7,]    5    7    6    6    7    6   NA   NA   NA
704
+##  [8,]    6    8    7    7    8    7    7    7    5
705
+##  [9,]    7    9    8   NA   NA    8    8    8    6
706
+## [10,]   NA   NA   NA   NA   NA    9   NA   NA   NA
707
+## [11,]   NA   NA   NA   NA   NA   10   NA   NA   NA
708
+## [12,]   NA   10   NA   NA   NA   11   NA   NA   NA
709
+## [13,]    8   11    9   NA   NA   12   NA   NA   NA
710
+## [14,]   NA   12   NA   NA   NA   13   NA   NA   NA
711
+## [15,]   NA   13   NA   NA   NA   14   NA   NA    7
712
+## [16,]    9   14   NA   NA   NA   15   NA    9    8
713
+## [17,]   10   15   10    8    9   16   NA   10    9
714
+## [18,]   11   16   11    9   10   17    9   11   10
715
+## [19,]   NA   17   12   NA   NA   18   10   12   NA
716
+## [20,]   NA   18   13   NA   NA   19   11   13   NA
717
+## [21,]   12   NA   14   NA   11   20   12   14   NA
718
+## [22,]   NA   NA   NA   10   12   21   13   15   NA
719
+## [23,]   NA   NA   NA   11   13   22   14   16   11
720
+## [24,]   13   19   15   12   NA   NA   15   17   12
721
+## [25,]   NA   NA   NA   13   14   NA   16   NA   NA
722
+## [26,]   NA   NA   NA   14   15   NA   17   18   13
723
+## [27,]   NA   NA   NA   15   16   NA   NA   19   14
724
+## [28,]   NA   20   16   16   17   NA   18   20   15
725
+## [29,]   NA   21   17   17   18   NA   19   21   16
726
+## [30,]   14   22   18   18   19   NA   20   22   17
727
+## [31,]   15   23   19   19   20   NA   21   NA   NA
728
+## [32,]   16   NA   20   20   21   NA   22   23   18
729
+## [33,]   17   24   21   21   22   23   NA   NA   19
730
+## [34,]   18   25   22   22   23   24   23   24   20
731
+## [35,]   19   26   23   NA   NA   25   NA   NA   NA
732
+## [36,]   20   27   24   23   24   26   24   25   21
733
+\end{verbatim}
734
+\end{kframe}
735
+\end{knitrout}
736
+
737
+\noindent This table would suggest that matched peak \texttt{8} (see
738
+numbers below the TICs in the figure above) corresponds to detected
739
+peaks \texttt{9, 12, 11} in runs \texttt{4, 5, 6} and so on, same as
740
+shown in the above plot. 
741
+
742
+In addition, you can gather a list of all the merged peaks with the
743
+\texttt{gatherInfo} function, giving elements for the retention times,
744
+the detected fragment ions and their intensities.  The example below
745
+also shows the how to construct a table of retention times of the
746
+matched peaks (No attempt is made here to adjust retention times onto
747
+a common scale.  Instead, the peaks are matched to each other on their
748
+original scale).  For example: 
749
+
750
+\begin{knitrout}
751
+\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
752
+\begin{alltt}
753
+\hlstd{outList} \hlkwb{<-} \hlkwd{gatherInfo}\hlstd{(pd,ma)}
754
+\hlstd{outList[[}\hlnum{8}\hlstd{]]}
755
+\end{alltt}
756
+\begin{verbatim}
757
+## $rt
758
+##    mmC.4    mmC.5    mmC.6    mmA.2    mmA.1    mmA.3    mmD.7    mmD.8 
759
+## 7.728317 7.740700 7.711417 7.713550 7.708567 7.711600 7.702967 7.701717 
760
+##    mmD.9 
761
+## 7.713933 
762
+## 
763
+## $mz
764
+##  [1]  52  59  66  70  72  73  74  75  79  89 104 116 133 147 148 188 204
765
+## 
766
+## $data
767
+##       mmC.4 mmC.5 mmC.6 mmA.2 mmA.1 mmA.3 mmD.7 mmD.8 mmD.9
768
+##  [1,]     0     0     0     0     0     0 26248     0     0
769
+##  [2,]     0     0     0  5113  4425  4994  4855  4557  4728
770
+##  [3,]     0     0     0  3926  5146  4876  4831  3354  4783
771
+##  [4,]     0     0     0 11568     0     0 10637     0     0
772
+##  [5,]     0     0     0  3680  3910  4492  4051  3427  3907
773
+##  [6,]     0     0     0 61816 65680 66768 65912 52848 61560
774
+##  [7,]     0     0     0  6705  6185  7400  6642  6235  7088
775
+##  [8,]     0     0 24160 26376 23328     0 28016 26304 27184
776
+##  [9,]     0     0     0     0     0     0 38712     0     0
777
+## [10,]     0     0     0  5617  5347  5702  5173  3946  5659
778
+## [11,]     0     0     0 13173 13808 13207 12852  9816 12492
779
+## [12,]     0     0     0  5417  5525  5912  5577  4504  5201
780
+## [13,]     0     0     0  3539  3730  2910  3599  2436  3893
781
+## [14,]     0     0 17904 21864 20016 22280 21904 17896 22400
782
+## [15,]     0     0     0  4413  3430  3890  3006  3335  3851
783
+## [16,]     0     0  7636 14433 14751 13765 14731 10680 14061
784
+## [17,]     0     0     0  6878  6667  7018  6935  5149  6830
785
+\end{verbatim}
786
+\begin{alltt}
787
+\hlstd{rtmat} \hlkwb{<-} \hlkwd{matrix}\hlstd{(}\hlkwd{unlist}\hlstd{(}\hlkwd{lapply}\hlstd{(outList,.subset,}\hlstr{"rt"}\hlstd{),} \hlkwc{use.names}\hlstd{=}\hlnum{FALSE}\hlstd{),}
788
+                \hlkwc{nr}\hlstd{=}\hlkwd{length}\hlstd{(outList),} \hlkwc{byrow}\hlstd{=}\hlnum{TRUE}\hlstd{)}
789
+\hlkwd{colnames}\hlstd{(rtmat)} \hlkwb{<-} \hlkwd{names}\hlstd{(outList[[}\hlnum{1}\hlstd{]]}\hlopt{$}\hlstd{rt);} \hlkwd{rownames}\hlstd{(rtmat)} \hlkwb{<-} \hlnum{1}\hlopt{:}\hlkwd{nrow}\hlstd{(rtmat)}
790
+\hlkwd{round}\hlstd{(rtmat,} \hlnum{3}\hlstd{)}
791
+\end{alltt}
792
+\begin{verbatim}
793
+##    mmC.4 mmC.5 mmC.6 mmA.2 mmA.1 mmA.3 mmD.7 mmD.8 mmD.9
794
+## 1  7.534 7.512 7.506 7.531 7.526 7.540 7.520 7.519 7.531
795
+## 2     NA 7.535    NA    NA 7.549    NA 7.543 7.547    NA
796
+## 3     NA 7.558 7.551 7.559 7.566 7.557 7.560 7.565    NA
797
+## 4  7.580 7.575 7.569 7.576 7.583 7.574 7.577 7.582 7.577
798
+## 5  7.597 7.586 7.586 7.588 7.600 7.592 7.594 7.599 7.594
799
+## 6  7.614 7.615 7.614 7.616 7.617 7.614 7.617 7.610 7.617
800
+## 7  7.717 7.695 7.694 7.691 7.663 7.649    NA    NA    NA
801
+## 8  7.728 7.741 7.711 7.714 7.709 7.712 7.703 7.702 7.714
802
+## 9  7.803 7.804 7.803    NA    NA 7.803 7.806 7.805 7.805
803
+## 10    NA    NA    NA    NA    NA 7.826    NA    NA    NA
804
+## 11    NA    NA    NA    NA    NA 7.975    NA    NA    NA
805
+## 12    NA 7.809    NA    NA    NA 7.997    NA    NA    NA
806
+## 13 7.825 7.849 7.951    NA    NA 8.009    NA    NA    NA
807
+## 14    NA 7.946    NA    NA    NA 8.077    NA    NA    NA
808
+## 15    NA 7.958    NA    NA    NA 8.095    NA    NA 7.817
809
+## 16 7.946 7.969    NA    NA    NA 8.112    NA 7.816 7.823
810
+## 17 7.974 7.986 7.980 7.736 7.783 8.249    NA 7.907 7.874
811
+## 18 8.008 8.009 8.003 7.799 7.800 8.283 7.812 7.936 7.943
812
+## 19    NA 8.049 8.043    NA    NA 8.312 7.966 7.965    NA
813
+## 20    NA 8.061 8.060    NA    NA 8.335 7.989 7.993    NA
814
+## 21 8.077    NA 8.100    NA 7.823 8.357 8.012 8.010    NA
815
+## 22    NA    NA    NA 7.828 7.880 8.375 8.069 8.068    NA
816
+## 23    NA    NA    NA 7.942 7.943 8.403 8.086 8.085 7.977
817
+## 24 8.111 8.107 8.111 7.976    NA    NA 8.109 8.108 8.000
818
+## 25    NA    NA    NA 7.999 7.966    NA 8.172    NA    NA
819
+## 26    NA    NA    NA 8.079 7.994    NA 8.246 8.245 8.080
820
+## 27    NA    NA    NA 8.114 8.011    NA    NA 8.262 8.091
821
+## 28    NA 8.204 8.237 8.182 8.109    NA 8.280 8.330 8.114
822
+## 29    NA 8.244 8.254 8.251 8.246    NA 8.326 8.342 8.251
823
+## 30 8.254 8.301 8.294 8.285 8.263    NA 8.360 8.359 8.263
824
+## 31 8.266 8.324 8.323 8.337 8.332    NA 8.377    NA    NA
825
+## 32 8.334    NA 8.329 8.359 8.360    NA 8.395 8.393 8.337
826
+## 33 8.363 8.352 8.352 8.399 8.394 8.420    NA    NA 8.400
827
+## 34 8.403 8.392 8.386 8.434 8.434 8.437 8.435 8.433 8.440
828
+## 35 8.437 8.432 8.432    NA    NA 8.443    NA    NA    NA
829
+## 36 8.477 8.461 8.460 8.474 8.469 8.472 8.469 8.468 8.474
830
+\end{verbatim}
831
+\end{kframe}
832
+\end{knitrout}
833
+
834
+
835
+\section{Future improvements and extension}
836
+There are many procedures that we have implemented in our
837
+investigation of GC-MS data, but have not made part of the package just
838
+yet. Some of the most useful procedures will be released, such as: 
839
+
840
+\begin{enumerate}
841
+\item Parsers for other peak detection algorithms (e.g. % XCMS,
842
+  MzMine) and parsers for other alignment procedures
843
+  (e.g. SpectConnect) and perhaps retention indices procedures. 
844
+\item More convenient access to the alignment information and
845
+  abundance table. 
846
+\item Statistical analysis of differential metabolite abundance.
847
+\item Fragment-level analysis, an alternative method to summarize
848
+  abundance across all detected fragments of a metabolite peak. 
849
+\end{enumerate}
850
+
851
+\section{References}
852
+See the following for further details:
853
+
854
+\begin{enumerate}
855
+\item Robinson MD. {\em Methods for the analysis of gas chromatography
856
+    - mass spectrometry data.} {\bf Ph.D. Thesis}. October 2008.
857
+  Department of Medical Biology (Walter and Eliza Hall Institute of
858
+  Medical Research), University of Melbourne. 
859
+\item Robinson MD, De Souza DP, Keen WW, Saunders EC, McConville MJ,
860
+  Speed TP, Liki\'{c} VA. (2007) {\em A dynamic programming approach
861
+    for the alignment of signal peaks in multiple gas
862
+    chromatography-mass spectrometry experiments.} {\bf BMC
863
+    Bioinformatics}. 8:419. 
864
+\item Prince JT, Marcotte EM (2006) {\em Chromatographic alignment of
865
+    ESI-LC-MS proteomics data sets by ordered bijective interpolated
866
+    warping}. {\bf Anal Chem}. 78(17):6140-52. 
867
+\end{enumerate}
868
+
869
+\section{This vignette was built with/at ...}
870
+
871
+\begin{knitrout}
872
+\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
873
+\begin{alltt}
874
+\hlkwd{sessionInfo}\hlstd{()}
875
+\end{alltt}
876
+\begin{verbatim}
877
+## R version 4.1.1 (2021-08-10)
878
+## Platform: x86_64-pc-linux-gnu (64-bit)
879
+## Running under: Debian GNU/Linux 10 (buster)
880
+## 
881
+## Matrix products: default
882
+## BLAS:   /usr/lib/x86_64-linux-gnu/openblas/libblas.so.3
883
+## LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.3.5.so
884
+## 
885
+## locale:
886
+##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
887
+##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
888
+##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
889
+##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
890
+##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
891
+## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
892
+## 
893
+## attached base packages:
894
+## [1] stats4    parallel  stats     graphics  grDevices utils     datasets 
895
+## [8] methods   base     
896
+## 
897
+## other attached packages:
898
+##  [1] flagme_1.48.0       CAMERA_1.48.0       xcms_3.14.1        
899
+##  [4] MSnbase_2.18.0      ProtGenerics_1.24.0 S4Vectors_0.30.2   
900
+##  [7] mzR_2.26.1          Rcpp_1.0.7          Biobase_2.52.0     
901
+## [10] BiocGenerics_0.38.0 BiocParallel_1.26.2 gcspikelite_1.30.0 
902
+## 
903
+## loaded via a namespace (and not attached):
904
+##   [1] colorspace_2.0-2            ellipsis_0.3.2             
905
+##   [3] htmlTable_2.3.0             XVector_0.32.0             
906
+##   [5] GenomicRanges_1.44.0        base64enc_0.1-3            
907
+##   [7] clue_0.3-60                 rstudioapi_0.13            
908
+##   [9] affyio_1.62.0               fansi_0.5.0                
909
+##  [11] codetools_0.2-18            splines_4.1.1              
910
+##  [13] ncdf4_1.17                  doParallel_1.0.16          
911
+##  [15] impute_1.66.0               robustbase_0.93-9          
912
+##  [17] knitr_1.36                  Formula_1.2-4              
913
+##  [19] cluster_2.1.2               vsn_3.60.0                 
914
+##  [21] png_0.1-7                   graph_1.70.0               
915
+##  [23] BiocManager_1.30.16         compiler_4.1.1             
916
+##  [25] backports_1.2.1             assertthat_0.2.1           
917
+##  [27] Matrix_1.3-4                fastmap_1.1.0              
918
+##  [29] limma_3.48.3                htmltools_0.5.2            
919
+##  [31] tools_4.1.1                 igraph_1.2.7               
920
+##  [33] gtable_0.3.0                glue_1.4.2                 
921
+##  [35] GenomeInfoDbData_1.2.6      affy_1.70.0                
922
+##  [37] RANN_2.6.1                  dplyr_1.0.7                
923
+##  [39] MALDIquant_1.20             vctrs_0.3.8                
924
+##  [41] preprocessCore_1.54.0       iterators_1.0.13           
925
+##  [43] xfun_0.26                   stringr_1.4.0              
926
+##  [45] lifecycle_1.0.1             gtools_3.9.2               
927
+##  [47] XML_3.99-0.8                DEoptimR_1.0-9             
928
+##  [49] zlibbioc_1.38.0             MASS_7.3-54                
929
+##  [51] scales_1.1.1                pcaMethods_1.84.0          
930
+##  [53] MatrixGenerics_1.4.3        SummarizedExperiment_1.22.0
931
+##  [55] RBGL_1.68.0                 MassSpecWavelet_1.58.0     
932
+##  [57] SparseM_1.81                RColorBrewer_1.1-2         
933
+##  [59] gridExtra_2.3               ggplot2_3.3.5              
934
+##  [61] rpart_4.1-15                latticeExtra_0.6-29        
935
+##  [63] stringi_1.7.5               highr_0.9                  
936
+##  [65] foreach_1.5.1               checkmate_2.0.0            
937
+##  [67] caTools_1.18.2              GenomeInfoDb_1.28.4        
938
+##  [69] rlang_0.4.11                pkgconfig_2.0.3            
939
+##  [71] matrixStats_0.61.0          bitops_1.0-7               
940
+##  [73] mzID_1.30.0                 evaluate_0.14              
941
+##  [75] lattice_0.20-45             purrr_0.3.4                
942
+##  [77] htmlwidgets_1.5.4           tidyselect_1.1.1           
943
+##  [79] plyr_1.8.6                  magrittr_2.0.1             
944
+##  [81] R6_2.5.1                    IRanges_2.26.0             
945
+##  [83] gplots_3.1.1                generics_0.1.0             
946
+##  [85] Hmisc_4.6-0                 DelayedArray_0.18.0        
947
+##  [87] DBI_1.1.1                   pillar_1.6.3               
948
+##  [89] foreign_0.8-81              MsCoreUtils_1.4.0          
949
+##  [91] survival_3.2-13             RCurl_1.98-1.5             
950
+##  [93] nnet_7.3-16                 tibble_3.1.5               
951
+##  [95] crayon_1.4.1                KernSmooth_2.23-20         
952
+##  [97] utf8_1.2.2                  jpeg_0.1-9                 
953
+##  [99] grid_4.1.1                  data.table_1.14.2          
954
+## [101] digest_0.6.28               munsell_0.5.0
955
+\end{verbatim}
956
+\begin{alltt}
957
+\hlkwd{date}\hlstd{()}
958
+\end{alltt}
959
+\begin{verbatim}
960
+## [1] "Mon Oct 18 15:30:58 2021"
961
+\end{verbatim}
962
+\end{kframe}
963
+\end{knitrout}
964
+
965
+\end{document}