Bioconductor Code: flagme

Browse code

addXCMSPeaks using modern interface from xcms

Ricca authored on 26/10/2021 13:08:30
Showing 52 changed files

DESCRIPTION index c3a3c6f..15854a8 100755
NAMESPACE index 23a8180..701bfc3 100755
R/addXCMSPeaks.R index 07a521b..81524ba 100755
R/betweenAlignment.R index bedc9be..62a1056 100755
R/clusterAlignment.R index ced979f..7edb8eb 100755
R/dp.R index 9fe8aff..f5799f3 100755
R/exportSpectra.R index 312dbdf..50cfbcf 100755
R/importSpectra.R index 0000000..f662107
R/metrics.R index 7cbf9cb..b68dd58 100755
R/peaksAlignment.R index 71cf461..fd28bff 100755
R/plotFragments.R index 2ce443d..9ac3360 100755
R/progressiveAlignment.R index a135729..e6e5677 100755
R/retFatMatrix.R index dcfca91..d78c419 100644
man/addChromaTOFPeaks.Rd index 589d0c5..63552f3 100644
man/addXCMSPeaks.Rd index 3427bfc..152d3b0 100644
man/betweenAlignment.Rd index 35ae373..e5aab3a 100644
man/clusterAlignment.Rd index a8aabda..f301e14 100644
man/compress-peaksAlignment-method.Rd index d6da6a6..4c1bdea 100644
man/compress-progressiveAlignment-method.Rd index 2153ed0..ce2b6f8 100644
man/corPrt.Rd index bfc7904..f1011dd 100644
man/de_Duper.Rd index 0000000..2c1063c
man/decompress-peaksAlignment-method.Rd index c98bd65..7703de2 100644
man/decompress-progressiveAlignment-method.Rd index 46a6676..e7671a6 100644
man/distToLib.Rd index 0000000..2f50799
man/dynRT.Rd index 1a3050d..300ec5d 100644
man/gatherInfo.Rd index 4c10816..c7a7851 100644
man/headToTailPlot.Rd index 0000000..85d92b4
man/importSpec.Rd index 0000000..841f774
man/imputePeaks.Rd index 737a2e4..3d40baf 100644
man/matchSpec.Rd index 0000000..276eced
man/multipleAlignment-class.Rd index 65a64ef..cb204a9 100644
man/ndpRT.Rd index 490ecd1..c7359a6 100644
man/normDotProduct.Rd index 23f2a6f..39b18c9 100644
man/parseChromaTOF.Rd index b136fc5..b7c0ee7 100644
man/parseELU.Rd index b52bc49..d712ee9 100644
man/peaksAlignment-class.Rd index 2cb7d58..bcdea86 100644
man/peaksDataset.Rd index a0a1b3c..f195965 100644
man/plotAlignedFrags.Rd index 270c0a6..980e78c 100644
man/plotAlignment-peaksAlignment-method.Rd index f12434c..0d9021c 100644
man/plotChrom-peaksDataset-method.Rd index a383d9d..8917223 100644
man/plotClustAlignment-clusterAlignment-method.Rd index cdc9a62..d6d400a 100644
man/plotFrags.Rd index d6c49f5..c959585 100644
man/plotImage.Rd index 58fd341..76649b0 100644
man/progressiveAlignment-class.Rd index 631fa18..ce01c34 100644
man/retFatMatrix.Rd index 3f77465..2a02f1d 100644
man/rmaFitUnit.Rd index eadb938..a68a492 100644
man/show-multipleAlignment-method.Rd index 8b83073..2be973b 100644
vignettes/auto/flagme.el index d1d65c8..0000000
vignettes/flagme-knitr.Rnw index 0000000..e2efbfe
vignettes/flagme.Rnw index 9010062..3b59817 100755
vignettes/flagme.pdf index 7405403..9223f51 100644
vignettes/flagme.tex index 0000000..2393794

History View file @ eea95d1

@@ -1,5 +1,5 @@
                      Package: flagme
                     -Version: 1.33.5
                     +Version: 1.48.1
                      Date: 2015/04/06
                      Title: Analysis of Metabolomics GC/MS Data
                      Author: Mark Robinson <[email protected]>, Riccardo Romoli <[email protected]>
@@ -12,7 +12,7 @@ License: LGPL (>= 2)
                      Collate: 0classes.R clusterAlignment.R init.R multipleAlignment.R
                              peaksAlignment.R progressiveAlignment.R betweenAlignment.R dp.R
                              metrics.R parse.R peaksDataset.R gatherInfo.R plotFragments.R
                     -        rmaFitUnit.R addXCMSPeaks.R retFatMatrix.R exportSpectra.R
                     +        rmaFitUnit.R addXCMSPeaks.R retFatMatrix.R exportSpectra.R importSpectra.R
                      biocViews: DifferentialExpression, MassSpectrometry
                     -RoxygenNote: 6.1.1
                     +RoxygenNote: 7.1.2
                      Encoding: UTF-8

NAMESPACE

History View file @ eea95d1

@@ -7,11 +7,15 @@ export(betweenAlignment)
                      export(calcTimeDiffs)
                      export(clusterAlignment)
                      export(corPrt)
                     +export(distToLib)
                      export(dp)
                      export(dynRT)
                      export(exportSpectra)
                      export(gatherInfo)
                     +export(headToTailPlot)
                     +export(importSpec)
                      export(imputePeaks)
                     +export(matchSpec)
                      export(multipleAlignment)
                      export(ndpRT)
                      export(normDotProduct)
@@ -36,6 +40,7 @@ importClassesFrom(SparseM,matrix.csc)
                      importFrom(CAMERA,annotate)
                      importFrom(CAMERA,getpspectra)
                      importFrom(MASS,rlm)
                     +importFrom(SparseM,as.matrix)
                      importFrom(SparseM,as.matrix.csc)
                      importFrom(gplots,colorpanel)
                      importFrom(graphics,axis)

R/addXCMSPeaks.R

History View file @ eea95d1

@@ -1,184 +1,287 @@
                      #' Add xcms/CAMERA peak detection results
                     -#'
                     +#'
                      #' Reads the raw data using xcms, group each extracted ion according to their
                      #' retention time using CAMERA and attaches them to an already created
                      #' \code{peaksDataset} object
                     -#'
                     +#'
                      #' Repeated calls to xcmsSet and annotate to perform peak-picking and
                      #' deconvolution. The peak detection results are added to the original
                      #' \code{peaksDataset} object. Two peak detection alorithms are available:
                      #' continuous wavelet transform (peakPicking=c('cwt')) and the matched filter
                      #' approach (peakPicking=c('mF')) described by Smith et al (2006). For further
                      #' information consult the xcms package manual.
                     -#'
                     -#' @param files character vector of same length as \code{object@rawdata} (user
                     -#' ensures the order matches)
                     -#' @param object a \code{peaksDataset} object.
                     -#' @param peakPicking Methods to use for peak detection. See details.
                     -#' @param perfwhm percentage of full width half maximum. See
                     -#' CAMERA::groupFWHM() for more details
                     -#' @param quick logical. See CAMERA::annotate() for more details
                     -#' @param ... arguments passed on to \code{xcmsSet} and \code{annotate}
                     +#' @title addXCMSPeaks
                     +#' @param files list of chromatogram files
                     +#' @param object a \code{peakDataset} object
                     +#' @param settings list. It conteins the settings for the peak-picking
                     +#' @param rtrange vector; retention time range
                     +#' @param mzrange vector, mz range
                     +#' @param perfwhm etermines the maximal retentiontime difference of features in
                     +#'     one pseudospectrum.
                     +#' @param minintens minimum ion intensity to be included into a pseudospectra
                     +#' @param minfeat minimum number of ion to be created a pseudospectra
                     +#' @param multipleMatchedFilter logical Try to remove redundant peaks, in
                     +#' this case where there are any peaks within an absolute m/z value of 0.2 and
                     +#' within 3 s for any one sample in the xcmsSet (the largest peak is kept)
                     +#' @param multipleMatchedFilterParam list. It conteins the settings for the
                     +#' peak-picking. mz_abs represent the the mz range; rt_abs represent thert range
                     +#' @importFrom xcms xcmsRaw xcmsSet
                     +#' @importFrom CAMERA annotate getpspectra
                     +#' @importFrom stats aggregate
                     +#' @export addXCMSPeaks
                      #' @return \code{peaksDataset} object
                      #' @author Riccardo Romoli \email{riccardo.romoli@@unifi.it}
                      #' @seealso \code{\link{peaksDataset}} \code{\link{findPeaks.matchedFilter}}
                      #' \code{\link{findPeaks.centWave}} \code{\link{xcmsRaw-class}}
                      #' @keywords manip
                      #' @examples
                     -#'
                     -#' # need access to CDF (raw data)
                     -#' require(gcspikelite)
                     -#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
                     -#'
                     -#' # full paths to file names
                     -#' cdfFiles <- dir(gcmsPath, "CDF", full=TRUE)
                     -#'
                     -#' # create a 'peaksDataset' object and add XCMS peaks to it
                     -#' pd <- peaksDataset(cdfFiles[1], mz=seq(50,550), rtrange=c(7.5,8.5))
                     -#' pd <- addXCMSPeaks(cdfFiles[1], pd, peakPicking=c('mF'),
                     -#'                    snthresh=3, fwhm=4, step=1, steps=2, mzdiff=0.5)
                     +#' files <- list.files(path = paste(find.package("gcspikelite"), "data",
                     +#'                     sep = "/"),"CDF", full = TRUE)
                     +#' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
                     +#' ## create settings object
                     +#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
                     +#' cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
                     +#'  prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
                     +#'  extendLengthMSW = TRUE, mzCenterFun = "wMean")
                     +#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
                     +#'  multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
                     +#'  list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
                     +#' data
                      #'
                      #' @importFrom xcms xcmsRaw xcmsSet
                      #' @importFrom CAMERA annotate getpspectra
                      #' @importFrom stats aggregate
                      #' @export addXCMSPeaks
                     -addXCMSPeaks <- function (files, object, peakPicking = c("cwt", "mF"), perfwhm = 0.75, quick = TRUE, ...)
                     -{
                     +addXCMSPeaks <- function(files, object, settings, rtrange = NULL,
                     +                           mzrange = NULL, perfwhm = 0.75, minintens = 100,
                     +                           minfeat = 6, multipleMatchedFilter = FALSE,
                     +                           multipleMatchedFilterParam = list(
                     +                               fwhm = c(5, 10, 20), mz_abs = 0.1, rt_abs = 3)
                     +                               ) {
                     +    ## Rmpi tends to give many warnings that are not relevant to end
                     +    ## users: this is an attempt to suppress this output
                     +    owarn <- options("warn")
                     +    on.exit(options(warn = owarn$warn))
                          options(warn = -1)
                     -    cdfFiles <- as.character(files)
                     -    if (length(cdfFiles) != length(object@rawdata))
                     -        stop("Number of files must be the same as the number of runs (and must match).")
                     -    xs <- lapply(cdfFiles, function(x, y) {
                     -        f <- which(cdfFiles %in% x)
                     -        xr <- xcmsRaw(x)
                     -        rtrange <- c(min(object@rawrt[[f]]), max(object@rawrt[[f]])) *
                     -            60
                     -        scanRange <- c(max(1, which(xr@scantime > rtrange[1])[1],
                     -            na.rm = TRUE), min(length(xr@scantime), which(xr@scantime >
                     -            rtrange[2])[1] - 1, na.rm = TRUE))
                     -        if (peakPicking == "cwt") {
                     -            s <- xcmsSet(x, method = "centWave", prefilter = c(5,
                     -                100), scanrange = scanRange, integrate = 1, mzdiff = -0.001,
                     -                fitgauss = TRUE, ...)
+                    +
                     +    ## if an rtrange is given, we first find out which scans correspond
                     +    ## to this and then use the scanRange argument of xcmsSet
                     +    if (!is.null(rtrange)) {
                     +        if (length(rtrange) != 2) {
                     +            stop("Improper rtrange given!")
+                             }
                     -        if (peakPicking == "mF") {
                     -            s <- xcmsSet(x, method = "matchedFilter", scanrange = scanRange,
                     -                max = 500, ...)
                     -        }
                     -        idx <- which(s@peaks[, "mz"] > min(object@mz) & s@peaks[,
                     -            "mz"] < max(object@mz))
                     -        s@peaks <- s@peaks[idx, ]
                     -        if(quick == TRUE)
                     -        {
                     -            a <- annotate(s, perfwhm = perfwhm, quick = quick)
                     -        }
                     -        if(quick == FALSE)
                     -        {
                     -            a <- annotate(s, perfwhm = perfwhm, cor_eic_th = 0.8,
                     -                          pval = 0.05, graphMethod = "hcs",
                     -                          calcIso = FALSE, calcCiS = TRUE,
                     -                          calcCaS = FALSE)
+                    +
                     +        rtrange <- rtrange * 60  ## convert from minutes to seconds
                     +        xr <- xcms::xcmsRaw(files[1])
                     +        scanRange <- c(max(1, which(xr@scantime > rtrange[1])[1], na.rm = TRUE),
                     +                       min(length(xr@scantime),
                     +                           which(xr@scantime > rtrange[2])[1] - 1, na.rm = TRUE)
                     +                           )
                     +        allSettings <- c(list(files = files, scanrange = scanRange), settings)
                     +    }
                     +    else {
                     +        allSettings <- c(list(files = files), settings)
                     +    }
+                    +
                     +  ## peak-picking
                     +  cat("Start peakPicking \n")
                     +    row <- MSnbase::readMSData(files, centroided. = TRUE, mode = "onDisk",
                     +        msLevel. = 1)
                     +    if (class(settings)[1] == "MatchedFilterParam") {
                     +        if (multipleMatchedFilter == TRUE) {
                     +            settings@fwhm <- multipleMatchedFilterParam$fwhm[1]
                     +            set1a <- xcms::findChromPeaks(row, param = settings )
                     +            settings@fwhm <- multipleMatchedFilterParam$fwhm[2]
                     +            set1b <- xcms::findChromPeaks(row, param = settings )
                     +            settings@fwhm <- multipleMatchedFilterParam$fwhm[3]
                     +            set1c <- xcms::findChromPeaks(row, param = settings )
                     +            set1 <- set1c
                     +            xcms::chromPeaks(set1) <- rbind(
                     +                xcms::chromPeaks(set1a), xcms::chromPeaks(set1b),
                     +                xcms::chromPeaks(set1c)
                     +            )
                     +            xcms::chromPeaks(set1) <- xcms::chromPeaks(set1)[
                     +                order(xcms::chromPeaks(set1)[, "sample"], decreasing = FALSE), ]
                     +                s <- de_Duper(set1,
                     +                             mz_abs = multipleMatchedFilterParam$mz_abs,
                     +                             rt_abs = multipleMatchedFilterParam$rt_abs)
+                             }
                     -        return(a)
                     -    }, y = peakPicking)
                     -    if (peakPicking == "cwt") {
                     -        area <- c("intb")
+                         }
                     -    if (peakPicking == "mF") {
                     -        area <- c("intf")
                     +    xset <- xcms::findChromPeaks(row, param = settings)
                     +  cat("peakPicking Done \n")
                     +    pdp <- xcms::PeakDensityParam(sampleGroups = seq(along = files), bw = 20,
                     +        minFraction = 0.5)
                     +    xset <- xcms::groupChromPeaks(xset, pdp)
                     +    xset <- xcms::fillChromPeaks(xset, param = xcms::ChromPeakAreaParam())
                     +    xset <- as(xset, "xcmsSet")
                     +    if (!is.null(mzrange)) {
                     +        idx <-  (xset@peaks[, "mz"] > mzrange[1]) &
                     +                    (xset@peaks[, "mz"] < mzrange[2]
                     +                )
                     +        xset@peaks <- xset@peaks[idx, ]
                     +    }
                     +    ## deconvolution; list of all the xset
                     +    ap <- split(xset, factor(xcms::sampnames(xset),
                     +        levels = xcms::sampnames(xset)))
                     +    apd <-  lapply(ap, function(x) {
                     +        xa <- CAMERA::xsAnnotate(x,  sample = 1)
                     +        xa <- CAMERA::groupFWHM(xa, perfwhm = perfwhm)
+                         }
                     -    data <- lapply(seq(along = cdfFiles), function(x) {
                     -        filt <- sapply(xs[[x]]@pspectra, function(r) {
                     -            length(r)
                     -        })
                     -        spec.idx <- c(1:length(xs[[x]]@pspectra))[which(filt >=
                     -            6)]
                     -        mzrange <- object@mz
                     -        abu <- data.frame(matrix(0, nrow = length(mzrange), ncol = length(spec.idx)))
                     -        rownames(abu) <- mzrange
                     -        colnames(abu) <- spec.idx
                     -        mz <- data.frame(mz = mzrange)
                     -        abu <- sapply(spec.idx, function(z) {
                     -            spec <- getpspectra(xs[[x]], z)[, c("mz", area)]
                     -            spec[, "mz"] <- round(spec[, "mz"])
                     -            if (max(table(spec[, 1])) > 1) {
                     -                spec.noDouble <- cbind(aggregate(spec[, 2], list(spec[,
                     -                  1]), FUN = sum))
                     -                colnames(spec.noDouble) <- c("mz", area)
                     -                spec <- spec.noDouble
                     +                    )
                     +    ## filter pseudo spectra
                     +    intensity <- "maxo"
                     +    res <- lapply(apd, function(x) {
                     +        allpks <- x@groupInfo
                     +        minI <- minintens# * max(allpks[, intensity])
                     +        tooSmall <- which(allpks[, intensity] < minI)
                     +        pspectra <- lapply(x@pspectra, function(x) {
                     +            x[!x %in% tooSmall]})
                     +        npeaks <- sapply(pspectra, length)
                     +        pspectra <- pspectra[npeaks >= minfeat]
                     +        ## list of unique pspec, double masses removed
                     +        listpspec <- lapply(pspectra, function(x) {
                     +            aa <- cbind(mz = round(allpks[x, "mz"], digits = 0),
                     +                        allpks[x, c(intensity, "rt", "rtmin", "rtmax")]
                     +                        )
                     +            double <- duplicated(aa[, 1])
                     +            bb <- cbind(aggregate(aa[, 2], list(aa[, 1]),
                     +                FUN = sum), aa[!double, 3:5])
                     +            setNames(bb, c(colnames(aa)))
                     +        }
                     +                            )
                     +        ## get mzrange from data
                     +        mz.max <- max(sapply(listpspec, function(x) {
                     +            max(x[, "mz"])}))
                     +        mz.min <- min(sapply(listpspec, function(x) {
                     +            min(x[, "mz"])}))
                     +        mz.range <- data.frame(mz = c(mz.min:mz.max))
                     +        ## merge pspec with mzrange
                     +        listpspec.merged <- lapply(listpspec, function(x) {
                     +            merge(x, mz.range, by = "mz", all = TRUE)
+                                 }
                     -            else {
                     -                spec
                     +            )
+                                 }
                     -            abu$z <- merge(spec, mz, by = "mz", all = TRUE)[,
                     -                area]
                     -        })
                     -        colnames(abu) <- spec.idx
                     -        abu[is.na(abu)] <- c(0)
                     -        return(abu)
                     -    })
                     -    apex.rt <- lapply(seq(along = cdfFiles), function(x) {
                     -        filt <- sapply(xs[[x]]@pspectra, function(r) {
                     -            length(r)
                     -        })
                     -        spec.idx <- c(1:length(xs[[x]]@pspectra))[which(filt >=
                     -            6)]
                     -        apex.rt <- sapply(spec.idx, function(z) {
                     -            spec.rt <- getpspectra(xs[[x]], z)[, c("rt")]
                     -            rt <- round(mean(spec.rt)/60, digits = 3)
                     -        })
                     -        return(apex.rt)
                     -    })
                     -    spectra.ind <- lapply(seq(along = cdfFiles), function(x) {
                     -        filt <- sapply(xs[[x]]@pspectra, function(r) {
                     -            length(r)
                     -        })
                     -        spec.idx <- c(1:length(xs[[x]]@pspectra))[which(filt >=
                     -            6)]
                     -    })
                     -    ind.start <- lapply(seq(along = cdfFiles), function(x) {
                     -        filt <- sapply(xs[[x]]@pspectra, function(r) {
                     -            length(r)
                     -        })
                     -        spec.idx <- c(1:length(xs[[x]]@pspectra))[which(filt >=
                     -            6)]
                     -        rt.start <- sapply(spec.idx, function(z) {
                     -            spec.rt <- getpspectra(xs[[x]], z)[, c("rtmin")]
                     -            rt <- round(mean(spec.rt), digits = 3)
                     -        })
                     -        return(rt.start)
                     -    })
                     -    ind.stop <- lapply(seq(along = cdfFiles), function(x) {
                     -        filt <- sapply(xs[[x]]@pspectra, function(r) {
                     -            length(r)
                     -        })
                     -        spec.idx <- c(1:length(xs[[x]]@pspectra))[which(filt >=
                     -            6)]
                     -        rt.stop <- sapply(spec.idx, function(z) {
                     -            spec.rt <- getpspectra(xs[[x]], z)[, c("rtmax")]
                     -            rt <- round(mean(spec.rt), digits = 3)
                     -        })
                     -        return(rt.stop)
                     -    })
                     -    object@files
                     -    object@mz
                     +            )
                     +    ## merge again with a common mz range among all sampless
                     +    max.mz <- max(sapply(res, function(x) {
                     +        max(sapply(x, "[[", "mz"))}))
                     +    min.mz <- min(sapply(res, function(x) {
                     +        min(sapply(x, "[[", "mz"))}))
                     +    mz.range.all <- data.frame(mz = c(min.mz:max.mz))
                     +    res.mz.mrg <- lapply(res, function(x) {
                     +        lapply(x, function(y) {
                     +            merge(y, mz.range.all, by = "mz", all = TRUE)
                     +            }
                     +            )
                     +            }
                     +            )
                     +    ## prepare the S4 slots
                     +    spec.ind <- lapply(res.mz.mrg, function(x) {
                     +        1:length(x)
                     +        }
                     +        )
                     +    apex.rt <- lapply(res.mz.mrg, function(x) {
                     +        rt <- lapply(x, "[[",  "rt")
                     +        round(sapply(rt, function(x) {
                     +            mean(x, na.rm = TRUE) / 60
                     +            }
                     +            ), digits = 3)
                     +            }
                     +            )
                     +    start.rt <- lapply(res.mz.mrg, function(x) {
                     +        rt <- lapply(x, "[[",  "rtmin")
                     +        round(sapply(rt, function(x) {
                     +            mean(x, na.rm = TRUE) / 60
                     +            }
                     +            ), digits = 3)
                     +            }
                     +            )
                     +    stop.rt <- lapply(res.mz.mrg, function(x) {
                     +        rt <- lapply(x, "[[",  "rtmax")
                     +        round(sapply(rt, function(x) {
                     +            mean(x, na.rm = TRUE) / 60
                     +            }
                     +            ), digits = 3)
                     +            }
                     +            )
                     +    data <- lapply(res.mz.mrg, function(x) {
                     +        a <- lapply(x, "[[",  intensity)
                     +        aa <- do.call(cbind, a)
                     +        colnames(aa) <- c(1:ncol(aa))
                     +        aa[is.na(aa)] <- c(0)
                     +        return(aa)
                     +        }
                     +        )
+                    +
                          for (i in 1:length(files)) {
                              ord <- order(apex.rt[[i]])
                              data[[i]] <- data[[i]][, ord]
                              apex.rt[[i]] <- apex.rt[[i]][ord]
                     -        spectra.ind[[i]] <- spectra.ind[[i]][ord]
                     -        ind.start[[i]] <- ind.start[[i]][ord]
                     -        ind.stop[[i]] <- ind.stop[[i]][ord]
                     +        spec.ind[[i]] <- spec.ind[[i]][ord]
                     +        start.rt[[i]] <- start.rt[[i]][ord]
                     +        stop.rt[[i]] <- stop.rt[[i]][ord]
+                         }
                     -    options(warn = 0)
                     -    nm <- lapply(files, function(u) {
                     -        sp <- strsplit(u, split = "/")[[1]]
                     -        sp[length(sp)]
                     -    })
                     -    nm <- sub(".CDF$", "", nm)
                     -    names(data) <- names(apex.rt) <- names(spectra.ind) <- names(ind.start) <- names(ind.stop) <- nm
                     -    new("peaksDataset", files = object@files, peaksdata = data,
                     -        peaksrt = apex.rt, peaksind = spectra.ind, peaksind.start = ind.start,
                     -        peaksind.end = ind.stop, rawdata = object@rawdata, rawrt = object@rawrt,
                     -        mz = object@mz)
                     +    new("peaksDataset",
                     +        files = files,
                     +        peaksdata = data,
                     +        peaksrt = apex.rt,
                     +        peaksind = spec.ind,
                     +        peaksind.start = start.rt,
                     +        peaksind.end = stop.rt,
                     +        rawdata = object@rawdata,
                     +        rawrt = object@rawrt,
                     +        mz = c(min.mz:max.mz)
                     +        )
+                     }
+                    +
+                    +
                     +##' Duplicate peak removal function
                     +##'
                     +##' Remove redundant peaks, in this case where there are any peaks within an
                     +##' absolute m/z value of 0.2 and within 3 s for any one sample in the xcmsSet
                     +##' (the largest peak is kept)
                     +##' @title deDuper
                     +##' @param object xcms object
                     +##' @param mz_abs mz range
                     +##' @param rt_abs rt range
                     +##' @return an object of xcms class
                     +##' @author r
                     +de_Duper <- function(object, mz_abs = 0.1, rt_abs = 2) {
                     +    mzdiff <- 0
                     +    peaks_mat <- xcms::chromPeaks(object)
                     +    mz_min <- peaks_mat[, "mz"] - mz_abs
                     +    mz_max <- peaks_mat[, "mz"] + mz_abs
                     +    rt_min <- peaks_mat[, "rt"] - rt_abs
                     +    rt_max <- peaks_mat[, "rt"] + rt_abs
+                    +
                     +    peaks_mat_out <- NULL
+                    +
                     +    samples <- unique(peaks_mat[, "sample"])
+                    +
                     +    cat("\n", "Duplicate peak removal; % complete: ")
                     +    percplus <- -1
+                    +
                     +    for (i in 1:length(samples)) {
                     +        perc <- round(i / length(samples) * 100)
                     +        if (perc %% 10 == 0 && perc != percplus) {
                     +            cat(perc, " ")
                     +        }
                     +        percplus <- perc
+                    +
                     +        peaks_mat_i <- peaks_mat[which(peaks_mat[, "sample"] == samples[i]), ,
                     +            drop = FALSE]
                     +        mz_min_i <- mz_min[which(peaks_mat[, "sample"] == samples[i])]
                     +        mz_max_i <- mz_max[which(peaks_mat[, "sample"] == samples[i])]
                     +        rt_min_i <- rt_min[which(peaks_mat[, "sample"] == samples[i])]
                     +        rt_max_i <- rt_max[which(peaks_mat[, "sample"] == samples[i])]
                     +        uorder_i <- order(peaks_mat_i[, "into"], decreasing = TRUE)
                     +        uindex_i <- xcms::rectUnique(cbind(mzmin = mz_min_i, mzmax = mz_max_i,
                     +                                            rtmin = rt_min_i, rtmax = rt_max_i),
                     +                                      uorder_i, mzdiff)
                     +        peaks_mat_i <- peaks_mat_i[uindex_i, , drop = FALSE]
                     +        peaks_mat_out <- rbind(peaks_mat_out, peaks_mat_i)
                     +    }
                     +    cat("\n")
                     +    xcms::chromPeaks(object) <- peaks_mat_out
                     +    return(object)
                     +}
                     \ No newline at end of file

R/betweenAlignment.R

History View file @ eea95d1

@@ -176,10 +176,11 @@ betweenAlignment <- function(pD, cAList, pAList, impList, filterMin = 1,
 #' @export
 #' @noRd
 setMethod("show","betweenAlignment",
-          function(object){
+          function(object) {
               cat("An object of class \"", class(object), "\"\n", sep = "")
               cat(length(object@mergedPeaksDataset@peaksrt), "groups:",
-                  sapply(object@mergedPeaksDataset@peaksrt, length), "merged peaks\n"
+                  sapply(object@mergedPeaksDataset@peaksrt, length),
+                  "merged peaks\n"
                   )
           }
           )

R/clusterAlignment.R

History View file @ eea95d1

@@ -66,13 +66,13 @@ setMethod("decompress","clusterAlignment",
                      #' @export clusterAlignment
                      clusterAlignment <- function(pD, runs = 1:length(pD@rawdata),
                                                   timedf = NULL, usePeaks = TRUE,
                     -                             verbose = TRUE, ...){
                     +                             verbose = TRUE, ...) {
                          n <- length(runs)
                     -    if(usePeaks)
                     +    if (usePeaks)
                              nr <- length(pD@peaksdata)
                          else
                              nr <- length(pD@rawdata)
                     -    alignments <- vector("list", n*(n-1)/2)
                     +    alignments <- vector("list", n * (n - 1) / 2)
                          aligned <- matrix(-1, nr, nr)
                          colnames(aligned) <- names(pD@rawdata)
                          rownames(aligned) <- names(pD@rawdata)
@@ -80,48 +80,43 @@ clusterAlignment <- function(pD, runs = 1:length(pD@rawdata),
                          colnames(dist) <- names(pD@rawdata)[runs]
                          rownames(dist) <- names(pD@rawdata)[runs]
                          count <- 0
                     -    for(i in 1:(n-1))
                     -    {
                     +    for (i in 1:( n - 1)) {
                              run.i <- runs[i]
                     -        for(j in (i+1):n)
                     -        {
                     +        for (j in (i + 1):n) {
                                  run.j <- runs[j]
                     -            count <- count+1
                     -            if(verbose)
                     -            {
                     +            count <- count + 1
                     +            if(verbose) {
                                      cat("[clusterAlignment] Aligning",
                                          names(pD@rawdata)[run.i], "to",
                                          names(pD@rawdata)[run.j], "\n")
+                                 }
                     -            if(usePeaks)
                     -            {
                     -              alignments[[count]] <-
                     -                peaksAlignment(pD@peaksdata[[run.i]],
                     -                               pD@peaksdata[[run.j]],
                     -                               pD@peaksrt[[run.i]],
                     -                               pD@peaksrt[[run.j]],
                     -                               usePeaks = usePeaks,
                     -                               timedf = timedf[[count]],
                     +            if (usePeaks) {
                     +              alignments[[count]] <-
                     +                peaksAlignment(pD@peaksdata[[run.i]],
                     +                               pD@peaksdata[[run.j]],
                     +                               pD@peaksrt[[run.i]],
                     +                               pD@peaksrt[[run.j]],
                     +                               usePeaks = usePeaks,
                     +                               timedf = timedf[[count]],
                                                     verbose = verbose, ...)
                     -            }
                     -            else
                     -            {
                     +            } else {
                                      alignments[[count]] <-
                                          peaksAlignment(pD@rawdata[[run.i]],
                                                         pD@rawdata[[run.j]],
                                                         pD@rawrt[[run.i]],
                                                         pD@rawrt[[run.j]],
                     -                                   usePeaks=usePeaks, timedf=NULL,
                     -                                   verbose=verbose, ...)
                     +                                   usePeaks = usePeaks, timedf = NULL,
                     +                                   verbose = verbose, ...)
+                                 }
                     -            aligned[runs[i],runs[j]] <- aligned[runs[j],runs[i]] <- count
                     -            dist[j,i] <- dist[i,j] <- alignments[[count]]@dist
                     -	}
                     -    }
                     +            aligned[runs[i], runs[j]] <- aligned[runs[j], runs[i]] <- count
                     +            dist[j, i] <- dist[i, j] <- alignments[[count]]@dist
                     +          }
                     +  }
                          merge <- hclust(as.dist(dist), method = "average")$merge
                          merge.copy <- merge
                     -    for(i in 1:length(runs))
                     -        {merge[which(merge.copy == (-i))] <- (-runs[i])}
                     +    for (i in 1:length(runs)) {
                     +      merge[which(merge.copy == (-i))] <- (-runs[i])
                     +      }
                          new("clusterAlignment", runs = runs, aligned = aligned,
                              gap = alignments[[1]]@gap, D = alignments[[1]]@D, dist = dist,
                              alignments = alignments, merge = merge)
@@ -170,38 +165,36 @@ function(object) {
                      ##' @keywords classes
                      ##' @examples
                      ##'
                     -##' require(gcspikelite)
                     -##'
                     -##' ## paths and files
                     -##' gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
                     -##' cdfFiles <- dir(gcmsPath, "CDF", full=TRUE)
                     -##' eluFiles <- dir(gcmsPath, "ELU", full=TRUE)
                     -##'
                     -##' ## read data
                     -##' pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,8.5))
                     -##' pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'),
                     -##'                    snthresh=3, fwhm=10,  step=0.1, steps=2, mzdiff=0.5)
                     -##' ca <- clusterAlignment(pd, metric = 1, D = 50, type = 1, gap = 0.5)
                     +#' require(gcspikelite)
                     +#'
                     +#' # paths and files
                     +#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
                     +#' cdfFiles <- dir(gcmsPath, "CDF", full=TRUE)
                     +#' eluFiles <- dir(gcmsPath, "ELU", full=TRUE)
                     +#'
                     +#' # read data, peak detection results
                     +#' pd <- peaksDataset(cdfFiles[1:2], mz=seq(50,550), rtrange=c(7.5,8.5))
                     +#' pd <- addAMDISPeaks(pd, eluFiles[1:2])
                     +#'
                     +#' ca <- clusterAlignment(pd, gap=0.5, D=0.05, df=30, metric=1, type=1)
                      ##' plotClustAlignment(ca, run = 1)
                      ##' plotClustAlignment(ca, run = 2)
                     -##' plotClustAlignment(ca, run = 3)
                     +##' plotClustAlignment(ca, run = 3)
                      ##'
                      ##' @importFrom graphics plot
                      ##' @export
                      setMethod("plotClustAlignment", "clusterAlignment",
                     -          function(object, alignment = 1, ...)
                     -          {
                     +          function(object, alignment = 1, ...) {
                                    rn <- rownames(object@aligned)
                     -              for(i in alignment){
                     -                  ind <- which(object@aligned == i, arr.ind = TRUE)[2,]
                     +              for (i in alignment) {
                     +                  ind <- which(object@aligned == i, arr.ind = TRUE)[2, ]
                                        plot(## object@alignments[[i]],
                                            object@alignments[[i]]@v$match,
                                            main = paste("D=", object@D, " gap=", object@gap,
                                                         sep = ""),
                     -                      xlab = paste("Peaks ",rn[ind[1]], sep = " - "),
                     -                      ylab = paste("Peaks ",rn[ind[2]], sep = " - "),
                     +                      xlab = paste("Peaks ", rn[ind[1]], sep = " - "),
                     +                      ylab = paste("Peaks ", rn[ind[2]], sep = " - "),
                                            ...)
+                                   }
+                               }
+                               )
+                    -

R/dp.R

History View file @ eea95d1

@@ -43,36 +43,29 @@
                      #'
                      #' @useDynLib flagme
                      #' @export dp
                     -dp<-function(M,gap=.5,big=10000000000,verbose=FALSE) {
                     +dp <- function(M, gap = .5, big= 10000000000, verbose = FALSE) {
                        # setup score matrix
                     -  bigr<-c(0,big*(1:ncol(M)))
                     -  bigc<-c(big*(1:nrow(M)))
                     -  D<-rbind(bigr,cbind(bigc,M)); #D[1,1]<-0
                     -  #bigr<-c(0,gap*(1:ncol(M)))
                     -  #bigc<-c(gap*(1:nrow(M)))
                     -  #D<-rbind(bigr,cbind(bigc,M)); #D[1,1]<-0
                     +  bigr <- c(0, big * (1:ncol(M)))
                     +  bigc <- c(big * (1:nrow(M)))
                     +  D <- rbind(bigr, cbind(bigc, M)); # D[1, 1] <-0
                     +  # bigr <- c(0, gap * (1:ncol(M)))
                     +  # bigc <- c(gap * (1:nrow(M)))
                     +  # D <- rbind(bigr, cbind(bigc, M)); # D[1, 1] <-0
                        # setup traceback
                     -  phi<-matrix(0,nrow=nrow(D),ncol=ncol(D))
                     -  phi[1,]<-2; phi[,1]<-1; phi[1,1]<-3
+                    -
                     -  match<-matrix(-1,nrow=nrow(M)+ncol(M),ncol=2) # matrix to store matches
                     +  phi <- matrix(0, nrow = nrow(D), ncol = ncol(D))
                     +  phi[1, ] <- 2; phi[, 1] <- 1; phi[1, 1] <- 3
                     +  match <- matrix(-1, nrow = nrow(M) + ncol(M), ncol = 2) # matrix to store matches
                     -  out<-.C("dp", D=as.double(D), M=as.double(M), gap=as.double(gap), phi=as.integer(phi),
                     -                nr=as.integer(nrow(M)), ncol=as.integer(ncol(M)),
                     -          match=as.integer(match), nmatch=as.integer(0),
                     -          PACKAGE="flagme"
                     -          )
                     +  out <- .C("dp", D = as.double(D), M = as.double(M), gap = as.double(gap),
                     +    phi = as.integer(phi), nr = as.integer(nrow(M)), ncol = as.integer(ncol(M)),
                     +    match = as.integer(match), nmatch = as.integer(0), PACKAGE = "flagme")
                        #list(D=matrix(out$D,ncol=ncol(D)),phi=matrix(out$phi,ncol=ncol(D)),match=1+matrix(out$match,ncol=2)[out$nmatch:1,])
                     -  list(match=1+matrix(out$match,ncol=2)[out$nmatch:1,])
                     +  list(match = 1 + matrix(out$match, ncol = 2)[out$nmatch:1, ])
+                     }
+                    -
+                    -
+                    -
+                    -
                      #' dynRT
                      #'
                      #' Dynamic Retention Time Based Alignment algorithm, given a similarity matrix
@@ -87,77 +80,72 @@ dp<-function(M,gap=.5,big=10000000000,verbose=FALSE) {
                      #' @examples
                      #'
                      #' require(gcspikelite)
                     -#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
                     -#' cdfFiles <- dir(gcmsPath,"CDF", full=TRUE)
                     -#' ## read data, peak detection results
                     -#' pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550),
                     -#'     rtrange=c(7.5,10.5))
                     -#' pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd,
                     -#'     peakPicking=c('mF'),snthresh=3, fwhm=10,  step=0.1, steps=2,
                     -#'     mzdiff=0.5, sleep=0)
                     +#' files <- list.files(path = paste(find.package("gcspikelite"), "data",
                     +#'                     sep = "/"),"CDF", full = TRUE)
                     +#' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
                     +#' ## create settings object
                     +#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
                     +#' cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
                     +#'  prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
                     +#'  extendLengthMSW = TRUE, mzCenterFun = "wMean")
                     +#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
                     +#'  multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
                     +#'  list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
                     +#' data
                      #' ## review peak picking
                     -#' plotChrom(pd, rtrange=c(7.5, 10.5), runs=c(1:3))
                     +#' plotChrom(data, rtrange=c(7.5, 10.5), runs=c(1:2))
                      #' ## similarity
                     -#' r <- ndpRT(pd@peaksdata[[1]], pd@peaksdata[[2]], pd@peaksrt[[1]],
                     -#'     pd@peaksrt[[2]], D=50)
                     +#' r <- ndpRT(data@peaksdata[[1]], data@peaksdata[[2]], data@peaksrt[[1]],
                     +#'     data@peaksrt[[2]], D = 50)
                      #' ## dynamic retention time based alignment algorithm
                     -#' v <- dynRT(S=r)
                     +#' v <- dynRT(S = r)
                      #'
                      #' @export dynRT
                     -dynRT <- function(S){
                     -    options(warn=-1)
                     +dynRT <- function(S) {
                     +    options(warn = -1)
                          ## S similarity matrix
                          ## move trought S to find the highest score
                     -    S <- round(S, digits=3)
                     -    id <- max(table(S))-1
                     +    S <- round(S, digits = 3)
                     +    id <- max(table(S)) - 1
                          ## filling <- which(table(S) > 200)# weak
                          filling <- which(table(S) > id)
                          filling.names <- as.numeric(names(filling))
                     -    if(filling.names > 1)
                     -    {
                     +    if (filling.names > 1) {
                              filling.names <- 1
+                         }
                          trace <- c()
                          ## this boolean workaround is to allow to use both the ndp
                          ## function and the matching function from MR and RR
                     -    if(filling.names == 1)
                     -    {
                     -        for(i in 1:nrow(S)){
                     -            trace[i] <- which(S[i,] == min(S[i,])) # MR
                     +    if (filling.names == 1) {
                     +        for (i in 1:nrow(S)) {
                     +            trace[i] <- which(S[i, ] == min(S[i, ])) # MR
+                             }
+                         }
                     -    if(filling.names == 0)
                     -    {
                     -        for(i in 1:nrow(S)){
                     -            trace[i] <- which(S[i,] == max(S[i,])) # RR
                     +    if (filling.names == 0) {
                     +        for (i in 1:nrow(S)) {
                     +            trace[i] <- which(S[i, ] == max(S[i, ])) # RR
+                             }
+                         }
                     -    trace.mtx <- matrix(NA, nrow=nrow(S), ncol=2)
                     -    trace.mtx[,1] <- 1:nrow(S)
                     +    trace.mtx <- matrix(NA, nrow = nrow(S), ncol = 2)
                     +    trace.mtx[, 1] <- 1:nrow(S)
                          trace[which(trace == 1)] <- NA
                     -    trace.mtx[,2] <- trace
                     +    trace.mtx[, 2] <- trace
                          ## cercare i composti matchati piu di una volta e renderli univoci
                     -    idx <- which(table(trace.mtx[,2]) > 1) # colonne della matrice S
                     +    idx <- which(table(trace.mtx[, 2]) > 1) # colonne della matrice S
                          names.idx <- as.numeric(names(idx)) # i doppioni
                          position <- c() # i doppi con il match piu alto
+                    -
                     -    if(filling.names == 1)
                     -    {
                     -        for(k in 1:length(names.idx)){
                     -            position[k] <- which(S[,names.idx[k]] == min(S[,names.idx[k]]))
                     -        }
                     +    if (filling.names == 1) {
                     +        for (k in 1:length(names.idx)) {
                     +            position[k] <- which(S[, names.idx[k]] == min(S[, names.idx[k]]))
                     +        }
+                         }
                     -    if(filling.names == 0)
                     -    {
                     -        for(k in 1:length(names.idx)){
                     -            position[k] <- which(S[,names.idx[k]] == max(S[,names.idx[k]]))
                     +    if (filling.names == 0) {
                     +        for (k in 1:length(names.idx)) {
                     +            position[k] <- which(S[, names.idx[k]] == max(S[, names.idx[k]]))
+                             }
+                         }
+                    -
                          tutti <- which(trace %in% names.idx)
                     -    trace.mtx[,2][tutti[! tutti %in% position]] <- NA
+                    -
                     -    return(list(match=trace.mtx))
+                    -
                     -    options(warn=0)
                     +    trace.mtx[, 2][tutti[! tutti %in% position]] <- NA
                     +    return(list(match = trace.mtx))
                     +    options(warn = 0)
+                     }

R/exportSpectra.R

History View file @ eea95d1

@@ -1,9 +1,9 @@
                      #' exportSpectra
                     -#'
                     +#'
                      #' Write the mass spectum into a .msp file to be used in NIST search.
                     -#'
                     +#'
                      #' Write the mass spectum into a .msp file to be used in NIST search.
                     -#'
                     +#'
                      #' @param object an object of class "peaksDataset"
                      #' @param outList an object created using the gatherInfo() function
                      #' @param spectra numeric. The number of the mass spectra to be printed. It
@@ -13,7 +13,7 @@
                      #' @return a .msp file
                      #' @author riccardo.romoli@@unifi.com
                      #' @export exportSpectra
                     -exportSpectra <- function (object, outList, spectra, normalize = TRUE)
                     +exportSpectra <- function (object, outList, spectra, normalize = TRUE)
+                     {
                          spectra <- as.numeric(spectra)
                          mz <- outList[[spectra]]$mz
@@ -21,7 +21,7 @@ exportSpectra <- function (object, outList, spectra, normalize = TRUE)
                          if(normalize)
+                         {
                              for(i in 1:ncol(abu)){
                     -            if(is.na(sum(abu[, i])))
                     +            if(is.na(sum(abu[, i])))
                                      next
                                  abu[, i] <- 100 * abu[, i]/abu[which.max(abu[, i]), i]
+                             }
@@ -33,11 +33,10 @@ exportSpectra <- function (object, outList, spectra, normalize = TRUE)
                          idx <- 1
                          spec <- paste(mz, abu[,idx])
                          msp <- rbind(paste("NAME: Variable", spectra),
                     -                 paste("COMMENT:", round(object@peaksrt[[idx]][spectra], digits = 2), "min"),
                     -                 paste("FORMULA:"), paste("MW:"), paste("CAS:"), paste("SYNONYM:"),
                     +                 paste("COMMENT:", round(object@peaksrt[[idx]][spectra], digits = 2), "min"),
                     +                 paste("FORMULA:"), paste("MW:"), paste("CAS:"), paste("SYNONYM:"),
                                       paste("Num Peaks:", length(spec)), matrix(unlist(spec), nrow = length(unlist(spec)), ncol = 1))
                          write(msp, file = paste0(spectra,".msp"), sep = "\n")
+                    -
                     -}
                     +}

R/importSpectra.R

History View file @ eea95d1

                     new file mode 100644
@@ -0,0 +1,195 @@
                     +##' Read the mass spectra from an external msp file
                     +##'
                     +##' Read the mass spectra from an external file in msp format. The format is
                     +##' used in NIST search library database.
                     +##' @title importSpec
                     +##' @param file a .msp file from NIST search library database
                     +##' @return list conaining the mass spctra
                     +##' @author [email protected]
                     +##' @export
                     +importSpec <- function(file){
                     +    ## read msp lib
                     +    lib <- scan(file, what = "", sep = "\n", quiet = TRUE)
                     +    ## separate each mass spec
                     +    starts <- which(regexpr("[Nn][Aa][Mm][Ee]:", lib) == 1)
                     +    ends <- c(starts[-1] - 1, length(lib))
                     +    ## loop to extract the mass spec into a list
                     +    list.spec <- lapply(1:length(starts), function(z){
                     +        ## meta data
                     +#        browser()
                     +        comp <- lib[starts[z]:ends[z]]
                     +        numPeaks.idx <- which(regexpr("[Nn][Uu][Mm] [Pp][Ee][Aa][Kk][Ss]:", comp) == 1)
                     +        metaData <- comp[1:numPeaks.idx - 1]
                     +        md <- strsplit(metaData, split = ": ")
                     +        md1 <- sapply(md, "[[", 1)
                     +        md2 <- sapply(md, "[", 2)
                     +        metaData.list <- setNames(as.list(md2), md1)
                     +        ## mass spec
                     +        nlines <- length(comp)
                     +        npeaks <- as.numeric(strsplit(comp[numPeaks.idx], ":")[[1]][2])
                     +        peaks.idx <- (numPeaks.idx + 1):nlines
                     +        pks <- gsub("^ +", "", unlist(strsplit(comp[peaks.idx], ";")))
                     +        ## il separatore potrebbe essere anche (), va trovata una soluzione
                     +        pks.2 <- gsub("^ +", "", unlist(strsplit(comp[peaks.idx], "[()]")))
                     +gsub("\"", "", pks.2)
+                    +
                     +        pks <- pks[pks != ""]
                     +        if (length(pks) != npeaks)
                     +        {
                     +            stop("Not the right number of peaks in compound", metaData.list$NAME)
                     +        }
                     +        ## error due to the presence of tab \t instead of a blank space
                     +        if(length(grep(pattern = "\t", pks)) > 0)
                     +        {
                     +            pklst <- strsplit(pks, "\t")
                     +            pklst <- lapply(pklst, function(x) x[x != ""])
                     +        }
                     +        else if(length(grep(pattern = "\\s", pks)) > 0)
                     +        {
                     +            pklst <- strsplit(pks, " ")
                     +            pklst <- lapply(pklst, function(x) x[x != ""])
                     +        }
                     +        else
                     +        {
                     +            cat("Some formatting errors in the msp library \n")
                     +        }
+                    +
                     +        mz <- as.numeric(sapply(pklst, "[[", 1))
                     +        mz <- round(mz)
                     +        int <- as.numeric(sapply(pklst, "[[", 2)) # error
                     +        ##
                     +        finaltab <- matrix(c(mz, int), ncol = 2)
+                    +
                     +        if (any(table(mz) > 1))
                     +        {
                     +            warning("Duplicate mass in compound ", metaData.list$NAME,
                     +                    " (CAS ", metaData.list$NAME, ")... summing up intensities")
                     +            finaltab <- aggregate(finaltab[,2],
                     +                                  by = list(finaltab[,1]),
                     +                                  FUN = sum)
                     +        }
                     +        colnames(finaltab) <- c("mz", "intensity")
                     +        c(metaData.list, list(spec = finaltab))
                     +    }
                     +    )
                     +    return(list.spec)
                     +}
+                    +
+                    +
                     +##' Calculate the distance between a reference mass spectrum
                     +##'
                     +##' Calculate the distance between a reference mass spectrum and one from the
                     +##' sample
                     +##' @title matchSpec
                     +##' @param spec1 reference mass spectrum
                     +##' @param outList the return of \code{\link{gatherInfo}}
                     +##' @param whichSpec the entry number of outList
                     +##' @return the distance between the reference mass spectrum and the others
                     +##' @author Riccardo Romoli
                     +##' @export
                     +matchSpec <- function(spec1, outList, whichSpec){
                     +  ## if(whichSpec == 143) browser()
                     +  ## first get the average spec from the gatherList
                     +  averageInt <-
                     +    apply(outList[[whichSpec]]$data, MARGIN = 1, FUN = mean, na.rm = TRUE)
                     +  ## outList[[whichSpec]]$data[,1:49] # si ma perch49?!?
                     +  ## normalize the intensity
                     +  normInt <- sapply(averageInt, function(x){
                     +    i <- which.max(averageInt)
                     +    (100*x)/averageInt[i]
                     +  }
                     +  )
                     +  ## combine mz and normalized intensity
                     +  pspec <- matrix(c(outList[[whichSpec]]$mz, normInt), ncol = 2)
                     +  colnames(pspec) <- c("mz", "intensity")
                     +  libSpec <- spec1$spec
                     +  ## merge the spectra to the same mz range and remove the NA
                     +  mztomerge <- data.frame(mz = min(c(min(libSpec[,"mz"], na.rm = TRUE),
                     +                                     min(pspec[,"mz"], na.rm = TRUE))) :
                     +                            max(c(max(libSpec[,"mz"], na.rm = TRUE),
                     +                                  max(pspec[,"mz"], na.rm = TRUE)))
                     +                          )
                     +  pspec.merged <- merge(pspec, mztomerge, by = "mz", all = TRUE)
                     +  libSpec.merged <- merge(libSpec, mztomerge, by = "mz", all = TRUE)
                     +  pspec.merged[is.na(pspec.merged)] <- 0
                     +  libSpec.merged[is.na(libSpec.merged)] <- 0
                     +  ## calculate the distance among the spectra
                     +  distance <- normDotProduct(as.matrix(pspec.merged[,"intensity"]),
                     +                             as.matrix(libSpec.merged[,"intensity"]))
                     +  return(distance)
                     +}
+                    +
+                    +
                     +##' The function calculate the distance between each mas spec in the msp file
                     +##' and the aligned mass spec from each sampe
                     +##'
                     +##' Return the distance matrix
                     +##' @title distToLib
                     +##' @param mspLib a .msp file from NIST
                     +##' @param outList an object from gatherInfo()
                     +##' @return the distance matrix between the mass spec and the aligned spec
                     +##' @author Riccardo Romoli
                     +##' @export
                     +distToLib <- function(mspLib, outList ){
                     +  mspDist <- lapply(1:length(mspLib),
                     +                    function(x)
                     +                    {
                     +                      ## trace
                     +                      cat(x, "\n")
                     +                      sapply(1:length(outList),
                     +                             function(y)
                     +                             {
                     +                               ## cat("y is = ", y, "\n") # for debug only
                     +                               matchSpec(mspLib[[x]], outList,
                     +                                         whichSpec = y)
                     +                             }
                     +                             )
                     +                    })
                     +  mtx.dist <- do.call(rbind, mspDist)
                     +  rownames(mtx.dist) <- sapply(mspLib, "[[", 1)
                     +  colnames(mtx.dist) <- sapply(1:length(outList),
                     +                               function(x)
                     +                               {
                     +                                 paste0("outListFold", x)
                     +                               }
                     +                               )
                     +  return(mtx.dist)
                     +}
+                    +
+                    +
                     +##' The head-to-tail-plot for the mass spectra
                     +##'
                     +##' Head-to-tail-plot to visually compare the mass spectra
                     +##' @title Head to tail plot
                     +##' @param specFromLib the mass spectra obtained from the .msp file
                     +##' @param specFromList the mass spectra obtained from \code{\link{gatherInfo}}
                     +##' @return the plot
                     +##' @author Riccardo Romoli
                     +##' @export
                     +headToTailPlot <- function(specFromLib, specFromList){
                     +    libSpec <- specFromLib$spec
                     +    pspec <- specFromList
                     +    ## get average and normalized intensity of the mass spec
                     +    averageInt <- apply(pspec$data, MARGIN = 1, FUN = mean, na.rm = TRUE)
                     +    ## normalize the intensity
                     +    normInt <- sapply(averageInt, function(x){
                     +        i <- which.max(averageInt)
                     +        (1000*x)/averageInt[i]
                     +    }
                     +    )
                     +    pspec.av <- data.frame(mz = pspec$mz, intensity = normInt)
                     +    ## merge the spectra to the same mz range and remove the NA
                     +    mztomerge <- data.frame(mz = min(c(min(libSpec[,"mz"], na.rm = TRUE),
                     +                                       min(pspec.av[,"mz"], na.rm = TRUE))) :
                     +                              max(c(max(libSpec[,"mz"], na.rm = TRUE),
                     +                                    max(pspec.av[,"mz"], na.rm = TRUE)))
                     +                            )
                     +    pspec.merged <- merge(pspec.av, mztomerge, by = "mz", all = TRUE)
                     +    libSpec.merged <- merge(libSpec, mztomerge, by = "mz", all = TRUE)
                     +    pspec.merged[is.na(pspec.merged)] <- 0
                     +    libSpec.merged[is.na(libSpec.merged)] <- 0
                     +    ## now the plot
                     +    plot(pspec.merged, type = "h", ylim = c(-1000, 1000), main = "Head to Tail Plot")
                     +    points(libSpec.merged[,"mz"], y = -libSpec.merged[,"intensity"]*10, col = 2,
                     +           type = "h")
                     +}

R/metrics.R

History View file @ eea95d1

@@ -118,7 +118,7 @@ normDotProduct <- function (x1, x2, t1=NULL, t2=NULL, df=max(ncol(x1), ncol(x2))
                      #' Retention Time Penalized Normalized Dot Product
                      #'
                      #' This function calculates the similarity of all pairs of peaks from 2
                     -#' samples, using the spectra similarity and the rretention time differencies
                     +#' samples, using the spectra similarity and the retention time differencies
                      #'
                      #' Computes the normalized dot product between every pair of peak vectors in
                      #' the retention time window (\code{D})and returns a similarity matrix.
@@ -136,65 +136,64 @@ normDotProduct <- function (x1, x2, t1=NULL, t2=NULL, df=max(ncol(x1), ncol(x2))
                      #'
                      #' ## Not Run
                      #' require(gcspikelite)
                     -#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
                     -#' cdfFiles <- dir(gcmsPath,"CDF", full=TRUE)
                     -#'
                     -#'                                         # read data, peak detection results
                     -#' pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,10.5))
                     -#' pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'),
                     -#'                    snthresh=3, fwhm=10,  step=0.1, steps=2, mzdiff=0.5,
                     -#'                    sleep=0)
                     +#' files <- list.files(path = paste(find.package("gcspikelite"), "data",
                     +#'                     sep = "/"),"CDF", full = TRUE)
                     +#' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
                     +#' ## create settings object
                     +#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
                     +#' cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
                     +#'  prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
                     +#'  extendLengthMSW = TRUE, mzCenterFun = "wMean")
                     +#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
                     +#'  multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
                     +#'  list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
                     +#' data
                      #' ## review peak picking
                     -#' plotChrom(pd, rtrange=c(7.5, 10.5), runs=c(1:3))
                     +#' plotChrom(data, rtrange = c(7.5, 10.5), runs = c(1:2))
                      #'
                     -#' r <- ndpRT(pd@peaksdata[[1]], pd@peaksdata[[2]],
                     -#'            pd@peaksrt[[1]], pd@peaksrt[[2]], D=50)
                     +#' r <- ndpRT(data@peaksdata[[1]], data@peaksdata[[2]],
                     +#'            data@peaksrt[[1]], data@peaksrt[[2]], D = 50)
                      #' ## End (Not Run)
                      #'
                      #' @export ndpRT
                     -ndpRT <- function(s1, s2, t1, t2, D){
+                    -
                     +ndpRT <- function(s1, s2, t1, t2, D) {
                          Normalize <- function(j){
                     -        n <- apply(j, 2, function(k){
                     +        n <- apply(j, 2, function(k) {
                                  m <- k[which.max(k)]
                     -            norm <- k/m*100
                     +            norm <- k / m * 100
                              })
                              return(n)
+                         }
+                    -
                     -    scoring <- function(s1, s2, t1, t2, D){
                     -        angle <- function(s1, s2){
                     -            theta <- acos(sum(s1*s2) / (sqrt(sum(s1 * s1)) * sqrt(sum(s2 * s2))))
                     -            theta <- 1-theta
                     -            if(theta < 0)
                     -            {
                     +    scoring <- function(s1, s2, t1, t2, D) {
                     +        angle <- function(s1, s2) {
                     +            theta <- acos(
                     +                sum(s1 * s2) / (sqrt(sum(s1 * s1)) * sqrt(sum(s2 * s2)))
                     +                )
                     +            theta <- 1 - theta
                     +            if(theta < 0) {
                                      theta <- 0
                     -            }
                     -            return(theta)
                     +            }
                     +            return(theta)
+                             }
+                    -
                     -        rtPen <- function(t1, t2, D){
                     +        rtPen <- function(t1, t2, D) {
                                  ## D espresso in secondi
                     -            t1 <- t1/60 # trasformo in secondi
                     -            t2 <- t2/60 # trasformo in secondi
                     -            srt <- exp(-(((t1-t2)^2) / D^2)) # da articolo MR, modificato
                     +            t1 <- t1 / 60 # trasformo in secondi
                     +            t2 <- t2 / 60 # trasformo in secondi
                     +            srt <- exp(- (((t1 - t2)^2) / D^2)) # da articolo MR, modificato
                                  # era 2*D^2
                                  return(srt)
+                             }
+                    -
                              score <- angle(s1, s2) * rtPen(t1, t2, D)
                              return(score)
+                         }
+                    -
                          s1 <- Normalize(s1)
                          s2 <- Normalize(s2)
+                    -
                     -    res <- matrix(0, nrow=ncol(s1), ncol=ncol(s2))
                     -    for(i in 1:ncol(s1)){
                     -        for(j in 1:ncol(s2)){
                     -            res[i,j] <- scoring(s1[,i], s2[,j], t1[i], t2[j], D=D)
                     +    res <- matrix(0, nrow = ncol(s1), ncol = ncol(s2))
                     +    for (i in 1:ncol(s1)) {
                     +        for (j in 1:ncol(s2)) {
                     +            res[i, j] <- scoring(s1[, i], s2[, j], t1[i], t2[j], D = D)
+                             }
                     -    }
                     +    }
                          return(res)
+                     }
@@ -226,55 +225,58 @@ ndpRT <- function(s1, s2, t1, t2, D){
                      #'
                      #' ## Not Run
                      #' require(gcspikelite)
                     -#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
                     -#' cdfFiles <- dir(gcmsPath,"CDF", full=TRUE)
                     -#' ## read data, peak detection results
                     -#' pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,10.5))
                     -#' pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'),
                     -#'                    snthresh=3, fwhm=10,  step=0.1, steps=2, mzdiff=0.5,
                     -#'                    sleep=0)
                     +#' files <- list.files(path = paste(find.package("gcspikelite"), "data",
                     +#'                     sep = "/"),"CDF", full = TRUE)
                     +#' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
                     +#' ## create settings object
                     +#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
                     +#' cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
                     +#'  prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
                     +#'  extendLengthMSW = TRUE, mzCenterFun = "wMean")
                     +#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
                     +#'  multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
                     +#'  list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
                     +#' data
                      #' ## review peak picking
                     -#' plotChrom(pd, rtrange=c(7.5, 10.5), runs=c(1:3))
                     +#' plotChrom(data, rtrange=c(7.5, 10.5), runs=c(1:2))
                      #'
                     -#' r <- corPrt(pd@peaksdata[[1]], pd@peaksdata[[2]],
                     -#'            pd@peaksrt[[1]], pd@peaksrt[[2]], D=50, penality=0.2)
                     +#' r <- corPrt(data@peaksdata[[1]], data@peaksdata[[2]],
                     +#'            data@peaksrt[[1]], data@peaksrt[[2]], D = 50, penality = 0.2)
                      #' ## End (Not Run)
                      #'
                      #' @importFrom stats complete.cases
                      #' @export corPrt
                     -corPrt <- function(d1, d2, t1, t2, D, penality=0.2){
                     +corPrt <- function(d1, d2, t1, t2, D, penality = 0.2) {
                          D <- as.numeric(D) # time window in second
                          pn <- as.numeric(penality)# penality if out of time window
                     -    pearson <- function(x,y){
                     +    pearson <- function(x,y) {
                              size <- length(x)
                     -        cfun <- .C("pearson", size=as.integer(size), x=as.double(x),
                     -                   y=as.double(y), result=double(1), PACKAGE='flagme')
                     +        cfun <- .C("pearson", size = as.integer(size), x = as.double(x),
                     +                   y = as.double(y), result = double(1), PACKAGE = 'flagme')
                              return(cfun[["result"]])
+                         }
                     -    Normalize <- function(j){
                     -        n <- apply(j, 2, function(k){
                     +    Normalize <- function(j) {
                     +        n <- apply(j, 2, function(k) {
                                  m <- k[which.max(k)]
                     -            norm <- k/m*100
                     +            norm <- k / m * 100
                              })
+                         }
                          Rank <- function(u) {
                     -        if (length(u) == 0L)
                     +        if (length(u) == 0L)
+                                 u
                              else if (is.matrix(u)) {
                     -            if (nrow(u) > 1L)
                     -                apply(u, 2L, rank, na.last="keep")
                     +            if (nrow(u) > 1L)
                     +                apply(u, 2L, rank, na.last = "keep")
                                  else row(u)
+                             }
                     -        else rank(u, na.last="keep")
                     +        else rank(u, na.last = "keep")
+                         }
                     -    #
                              x <- Normalize(d1)
                              y <- Normalize(d2)
+                    -
                          ## method <- c("pearson", "kendall", "spearman")
                          ncx <- ncol(x)
                          ncy <- ncol(y)
                     -    r <- matrix(0, nrow=ncx, ncol=ncy)
                     +    r <- matrix(0, nrow = ncx, ncol = ncy)
                          for (i in seq_len(ncx)) {
                              for (j in seq_len(ncy)) {
                                  x2 <- x[, i]
@@ -283,24 +285,20 @@ corPrt <- function(d1, d2, t1, t2, D, penality=0.2){
                                  x2 <- rank(x2[ok])
                                  y2 <- rank(y2[ok])
                                  ## insert rt penality in seconds
                     -            rtDiff <- t1[i]*60 - t2[j]*60 # retention time in seconds
                     +            rtDiff <- t1[i] * 60 - t2[j] * 60 # retention time in seconds
                                  rtDiff <- abs(rtDiff)
                                  r[i, j] <- if (any(ok))
                     -                           if(rtDiff <= D)
                     +                           if (rtDiff <= D)
                                                     pearson(x2, y2)
                                                 else
                                                     pearson(x2, y2) - pn
                                             else 0
+                             }
+                         }
+                    -
                     -    r <- apply(r, MARGIN=c(1,2), function(x){
                     -        if(x < 0.2)
                     -        {
                     +    r <- apply(r, MARGIN = c(1, 2), function(x) {
                     +        if (x < 0.2) {
                                  x <- 0
                     -        }
                     -        else
                     -        {
                     +        } else {
                                  x <- x
+                             }
                          })

R/peaksAlignment.R

History View file @ eea95d1

@@ -138,36 +138,39 @@ setMethod("show","peaksAlignment",
                      #' ## see clusterAlignment, it calls peaksAlignment
                      #'
                      #' ## Not Run:
                     -#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
                     -#' cdfFiles <- dir(gcmsPath,"CDF", full=TRUE)
                     -#'
                     -#' # read data, peak detection results
                     -#' pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,10.5))
                     -#' pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'),
                     -#'                    snthresh=3, fwhm=10,  step=0.1, steps=2, mzdiff=0.5,
                     -#'                    sleep=0)
                     -#' ## review peak picking
                     -#' plotChrom(pd, rtrange=c(7.5, 10.5), runs=c(1:3))
                     +#' files <- list.files(path = paste(find.package("gcspikelite"), "data",
                     +#'                     sep = "/"),"CDF", full = TRUE)
                     +#' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
                     +#' ## create settings object
                     +#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
                     +#' cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
                     +#'  prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
                     +#'  extendLengthMSW = TRUE, mzCenterFun = "wMean")
                     +#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
                     +#'  multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
                     +#'  list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
                     +#' data
                     +#' plotChrom(data, rtrange=c(7.5, 10.5), runs=c(1:2))
                      #'
                      #' ## align two chromatogram
                     -#' pA <- peaksAlignment(pd@peaksdata[[1]], pd@peaksdata[[2]],
                     -#'                      pd@peaksrt[[1]], pd@peaksrt[[2]], D=50,
                     -#'                      metric=3, compress=FALSE, type=2, penality=0.2)
                     +#' pA <- peaksAlignment(data@peaksdata[[1]], data@peaksdata[[2]],
                     +#'                      data@peaksrt[[1]], data@peaksrt[[2]], D = 50,
                     +#'                      metric = 3, compress = FALSE, type = 2, penality = 0.2)
                      #'
                      #' plotAlignment(pA)
                      #' pA@v$match
                      #'
                      #' par(mfrow=c(2,1))
                     -#' plot(pd@peaksdata[[1]][,15], type='h', main=paste(pd@peaksrt[[1]][[15]]))
                     -#' plot(pd@peaksdata[[2]][,17], type='h',
                     -#'      main=paste(pd@peaksrt[[2]][[17]]))
                     +#' plot(data@peaksdata[[1]][,15], type = 'h', main = paste(data@peaksrt[[1]][[15]]))
                     +#' plot(data@peaksdata[[2]][,17], type = 'h',
                     +#'      main = paste(data@peaksrt[[2]][[17]]))
                      #' ## End (Not Run)
                      #'
                      #' @export
                     -peaksAlignment <- function(d1, d2, t1, t2, gap=0.5, D=50,
                     -                           timedf=NULL, df=30, verbose=TRUE,
                     -                           usePeaks=TRUE, compress=TRUE, metric=2,
                     -                           type=2, penality=0.2){
                     +peaksAlignment <- function(d1, d2, t1, t2, gap = 0.5, D = 50,
                     +                           timedf = NULL, df = 30, verbose = TRUE,
                     +                           usePeaks = TRUE, compress = TRUE, metric = 2,
                     +                           type = 2, penality = 0.2) {
                          ## r <- switch(metric,
                          ##             normDotProduct(d1,d2,t1,t2,D=D,
@@ -190,48 +193,47 @@ peaksAlignment <- function(d1, d2, t1, t2, gap=0.5, D=50,
                          ##     D <- D/100
                          ## }
                          r <- switch(metric,
                     -                normDotProduct(d1, d2, t1, t2, D=D,
                     -                               df=df+abs(ncol(d1)-ncol(d2)),
                     -                               timedf=timedf, verbose=verbose),
                     -                ndpRT(s1=d1, s2=d2, t1, t2, D=D),
                     -                corPrt(d1, d2, t1, t2, D=D, penality=penality)
                     +                normDotProduct(d1, d2, t1, t2, D = D,
                     +                               df = df + abs(ncol(d1) - ncol(d2)),
                     +                               timedf = timedf, verbose = verbose),
                     +                ndpRT(s1 = d1, s2 = d2, t1, t2, D = D),
                     +                corPrt(d1, d2, t1, t2, D = D, penality = penality)
+                                     )
                          r[is.nan(r)] <- 1 ## remove NaN
                     -    if(type == 1)
                     -    {
                     +    if (type == 1) {
                              if(verbose)
                                  cat("[peaksAlignment] Comparing", ncol(d1), "peaks to",
                                      ncol(d2), "peaks -- gap=", gap, "D=", D, ', metric=',
                                      metric, ', type=', type, "\n")
                     -        v <- dp(r, gap=gap, verbose=verbose) # dynamic programming
                     +        v <- dp(r, gap = gap, verbose = verbose) # dynamic programming
+                         }
                     -    if(type == 2)
                     -    {
                     +    if (type == 2) {
                              if(verbose)
                                  cat("[peaksAlignment] Comparing", ncol(d1), "peaks to",
                                      ncol(d2), "peaks -- D=", D, "seconds,", 'metric=',
                                      metric, ', type=', type, '\n')
                     -        v <- dynRT(S=r) # RR, modified to be used with normDotProduct()
                     +        v <- dynRT(S = r) # RR, modified to be used with normDotProduct()
+                         }
                     -    v$match <- v$match[!is.na(v$match[,2]),] # remove non-matched peaks
                     +    v$match <- v$match[!is.na(v$match[,2]), ] # remove non-matched peaks
                          sim <- 0
                     -    for(i in 1:nrow(v$match)){
                     +    for(i in 1:nrow(v$match)) {
                              sim <- sim + r[v$match[i, 1], v$match[i, 2]]#
+                         }
                     -    sim <- sim/nrow(v$match)
                     -    if(verbose)
                     -        cat("[peaksAlignment] ", nrow(v$match), "matched.  Similarity=",
                     -            sim, "\n")
                     -    object <- new("peaksAlignment", v=v, r=r, dist=sim,
                     -                  compressed=FALSE, gap=gap, D=D)
                     -    if(compress){
                     +    sim <- sim / nrow(v$match)
                     +    if(verbose) {
                     +        cat("[peaksAlignment] ", nrow(v$match), "matched.  Similarity=",
                     +        sim, "\n")
                     +        }
                     +    object <- new("peaksAlignment", v = v, r = r, dist = sim,
                     +                  compressed = FALSE, gap = gap, D = D)
                     +    if(compress) {
                              compress(object)
                     -    }else{
                     +    } else {
                              object
+                         }
+                     }
@@ -267,23 +269,24 @@ peaksAlignment <- function(d1, d2, t1, t2, gap=0.5, D=50,
                      #' @examples
                      #'
                      #' require(gcspikelite)
                     -#'
                     -#' ## paths and files
                     -#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
                     -#' cdfFiles <- dir(gcmsPath, "CDF", full=TRUE)
                     -#' eluFiles <- dir(gcmsPath, "ELU", full=TRUE)
                     -#'
                     -#' ## read data
                     -#' pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,8.5))
                     -#' pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'),
                     -#'                    snthresh=3, fwhm=10,  step=0.1, steps=2, mzdiff=0.5)
                     -#'
                     +#' files <- list.files(path = paste(find.package("gcspikelite"), "data",
                     +#'                     sep = "/"),"CDF", full = TRUE)
                     +#' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
                     +#' ## create settings object
                     +#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
                     +#' cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
                     +#'  prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
                     +#'  extendLengthMSW = TRUE, mzCenterFun = "wMean")
                     +#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
                     +#'  multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
                     +#'  list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
                     +#' data
                      #' ## image plot
                     -#' plotChrom(pd, rtrange=c(7.5,8.5), plotPeaks=TRUE, plotPeakLabels=TRUE)
                     +#' plotChrom(data, rtrange = c(7.5,8.5), plotPeaks = TRUE, plotPeakLabels =TRUE)
                      #'
                      #' ## align two chromatogram
                     -#' pA <- peaksAlignment(pd@peaksdata[[1]], pd@peaksdata[[2]],
                     -#'                      pd@peaksrt[[1]], pd@peaksrt[[2]], D = 50,
                     +#' pA <- peaksAlignment(data@peaksdata[[1]], data@peaksdata[[2]],
                     +#'                      data@peaksrt[[1]], data@peaksrt[[2]], D = 50,
                      #'                      compress = FALSE, type = 1, metric = 1,
                      #'                      gap = 0.5)
                      #' plotAlignment(pA)

R/plotFragments.R

History View file @ eea95d1

@@ -16,53 +16,52 @@
                      #' @author riccardo.romoli@@unifi.it
                      #' @examples
                      #'
                     -#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
                     -#' cdfFiles <- dir(gcmsPath,"CDF", full=TRUE)
                     -#' # read data, peak detection results
                     -#' pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,10.5))
                     -#' pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'),
                     -#'                    snthresh=3, fwhm=10,  step=0.1, steps=2, mzdiff=0.5,
                     -#'                    sleep=0)
                     +#' files <- list.files(path = paste(find.package("gcspikelite"), "data",
                     +#'                     sep = "/"),"CDF", full = TRUE)
                     +#' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
                     +#' ## create settings object
                     +#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
                     +#' cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
                     +#'  prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
                     +#'  extendLengthMSW = TRUE, mzCenterFun = "wMean")
                     +#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
                     +#'  multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
                     +#'  list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
                     +#' data
                      #' ## align two chromatogram
                     -#' pA <- peaksAlignment(pd@peaksdata[[1]], pd@peaksdata[[2]],
                     -#'                      pd@peaksrt[[1]], pd@peaksrt[[2]], D=50,
                     -#'                      metric=3, compress=FALSE, type=2, penality=0.2)
                     +#' pA <- peaksAlignment(data@peaksdata[[1]], data@peaksdata[[2]],
                     +#'                      data@peaksrt[[1]], data@peaksrt[[2]], D = 50,
                     +#'                      metric = 3, compress = FALSE, type = 2, penality = 0.2)
                      #' pA@v$match
                      #' ## plot the mass spectra
                      #' par(mfrow=c(2,1))
                     -#' plotFrags(object=pd, sample=1, specID=10)
                     -#' plotFrags(object=pd, sample=2, specID=12)
                     +#' plotFrags(object=data, sample=1, specID=10)
                     +#' plotFrags(object=data, sample=2, specID=12)
                      #'
                      #' @export plotFrags
                     -plotFrags <- function(object, sample, specID, normalize = TRUE, ...){
                     +plotFrags <- function(object, sample, specID, normalize = TRUE, ...) {
                          sp <- as.numeric(specID)
                     -    ## sample <- object@files[sample]
                     -    ## i <- grep(pattern = sample, object@files)
                     -    if(length(sp) > 0){
                     -        y <- object@peaksdata[[sample]][,sp]
                     -        if(normalize){
                     -            y <- sapply(y, function(x){
                     +    if (length(sp) > 0) {
                     +        y <- object@peaksdata[[sample]][, sp]
                     +        if (normalize) {
                     +            y <- sapply(y, function(x) {
                                      i <- which.max(y)
                     -                (100*x)/y[i]}
                     -                )}
                     -        plot(y,
                     -             type = 'h',
                     -             main = paste('Sample', names(object@peaksdata[sample]), 'RT', object@peaksrt[[sample]][sp], 'min'),
                     -             xlab = 'm/z',
                     +                (100 * x) / y[i]}
                     +                )
                     +                }
                     +        plot(y, type = "h", main = paste("Sample",
                     +            names(object@peaksdata[sample]), "RT",
                     +            object@peaksrt[[sample]][sp], "min"), xlab = "m/z",
                                   ylab =
                     -                 if(normalize)
                     -                 {
                     -                     'Rel. Abundance'
                     -                 }
                     -                 else
                     -                 {
                     -                     'Abs. Abundance'
                     +                 if (normalize) {
                     +                     "Rel. Abundance"
                     +                 } else {
                     +                     "Abs. Abundance"
                                       }, ...)
                     -    }
                     -    else
                     -    {
                     -        stop(paste('The spectrum is not present in the sample', object@files[sample], '\n'))
                     -    }
                     +        } else {
                     +        stop(paste("The spectrum is not present in the sample",
                     +        object@files[sample], '\n'))
                     +        }
+                     }
@@ -89,41 +88,42 @@ plotFrags <- function(object, sample, specID, normalize = TRUE, ...){
                      #' @keywords gatherInfo() plot()
                      #' @examples
                      #'
                     -#' ## Rd workflow
                     -#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep = "/")
                     -#' cdfFiles <- dir(gcmsPath,"CDF", full = TRUE)
                     -#'
                     -#' # read data, peak detection results
                     -#' pd <- peaksDataset(cdfFiles[1:4], mz = seq(50,550), rtrange = c(7.5,10.5))
                     -#' pd <- addXCMSPeaks(files = cdfFiles[1:4], object = pd, peakPicking = c('mF'),
                     -#'                    snthresh = 2, fwhm = 8,  step = 0.5, steps = 2, mzdiff = 0.5,
                     -#'                    sleep = 0)
                     +#' files <- list.files(path = paste(find.package("gcspikelite"), "data",
                     +#'                     sep = "/"),"CDF", full = TRUE)
                     +#' data <- peaksDataset(files[1:4], mz = seq(50, 550), rtrange = c(7.5, 8.5))
                     +#' ## create settings object
                     +#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
                     +#' cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
                     +#'  prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
                     +#'  extendLengthMSW = TRUE, mzCenterFun = "wMean")
                     +#' data <- addXCMSPeaks(files[1:4], data, settings = mfp, minintens = 100,
                     +#'  multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
                     +#'  list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
                     +#' data
                      #' ## multiple alignment
                     -#' ma <- multipleAlignment(pd, c(1,1,2,2), wn.gap = 0.5, wn.D = 0.05, bw.gap = 0.6,
                     -#'                         bw.D = 0.2, usePeaks = TRUE, filterMin = 1, df = 50,
                     -#'                         verbose = TRUE, metric = 2, type = 2)
                     +#' ma <- multipleAlignment(data, c(1,1,2,2), wn.gap = 0.5, wn.D = 0.05,
                     +#'  bw.gap = 0.6, bw.D = 0.2, usePeaks = TRUE, filterMin = 1, df = 50,
                     +#'  verbose = TRUE, metric = 2, type = 2)
                      #'
                      #' ## gather apex intensities
                     -#' gip <- gatherInfo(pd, ma)
                     +#' gip <- gatherInfo(data, ma)
                      #' gip[[33]]
                     -#' plotAlignedFrags(object = pd, outList = gip, specID = 33)
                     +#' plotAlignedFrags(object = data, outList = gip, specID = 33)
                      #'
                      #' @export plotAlignedFrags
                     -plotAlignedFrags <- function(object, outList, specID, fullRange = TRUE, normalize = TRUE, ...)
                     -{
                     -specID <- as.numeric(specID)
                     -mz <- outList[[specID]]$mz
                     -abundance <- outList[[specID]]$data
+                    -
                     -if(normalize)
                     -{
                     -    for(i in 1:ncol(abundance))
                     -    {
                     -        if(is.na(sum(abundance[,i])))
                     -            next
                     -        abundance[,i] <- 100 * abundance[,i] / abundance[which.max(abundance[,i]), i]
                     -    }
                     -}
                     +plotAlignedFrags <- function(object, outList, specID, fullRange = TRUE,
                     +    normalize = TRUE, ...) {
                     +        specID <- as.numeric(specID)
                     +        mz <- outList[[specID]]$mz
                     +        abundance <- outList[[specID]]$data
                     +        if (normalize) {
                     +            for(i in 1:ncol(abundance)) {
                     +                if(is.na(sum(abundance[,i])))
                     +                next
                     +                abundance[,i] <- 100 * abundance[,i] /
                     +                    abundance[which.max(abundance[,i]), i]
                     +                }
                     +            }
                      ## set the plot grid
                      specnum <- table(apply(abundance, 2, sum)) # count number of massSpec different from NA

R/progressiveAlignment.R

History View file @ eea95d1

@@ -1,27 +1,34 @@
                     -##' Compress method for progressiveAlignment
                     +##' Decompress method for progressiveAlignment
                      ##'
                     -##' Compress method for progressiveAlignment
                     +##' Decompress method for progressiveAlignment
                      ##' @title Compress method for progressiveAlignment
                     -##' @param object dummy
                     -##' @param verbose dummy
                     +##' @param object progressiveAlignment object
                     +##' @param verbose logical
                      ##' @param ... dummy
                      ##' @author MR
                     +##' @importFrom SparseM as.matrix
                     +##' @import methods
                     +##' @importFrom methods setMethod new
                      ##' @keywords internal
                      setMethod("decompress", "progressiveAlignment",
                     -          function(object, verbose=TRUE, ...){
                     -              if(object@merges[[1]]$compressed == FALSE) {
                     -                  if(verbose)
                     -                      cat("[decompress.progressiveAlignment] Already decompressed.\n")
                     +          function(object, verbose = TRUE, ...) {
                     +              if (object@merges[[1]]$compressed == FALSE) {
                     +                  if (verbose)
                     +                      cat("[decompress.progressiveAlignment]
                     +                      Already decompressed.\n")
                                        return(object)
+                                   }
                     -              for(i in 1:length(object@merges)) {
                     -                  if(object@merges[[i]]$compressed) {
                     -                      object@merges[[i]]$r <- 1-as.matrix(object@merges[[i]]$r)
                     +              for (i in 1:length(object@merges)) {
                     +                  if (object@merges[[i]]$compressed) {
                     +                      object@merges[[i]]$r <-
                     +                        1 - as.matrix(object@merges[[i]]$r)
                                            object@merges[[i]]$compressed <- FALSE
+                                       }
+                                   }
                                    new("progressiveAlignment", object)
                     -          })
                     +          }
                     +          )
+                    +
                      ##' Decompress method for progressiveAlignment
                      ##'
@@ -30,24 +37,29 @@ setMethod("decompress", "progressiveAlignment",
                      ##' @param object dummy
                      ##' @param verbose dummy
                      ##' @param ... dummy
                     -##' @keywords internal
                      ##' @author MR
                      ##' @importFrom SparseM as.matrix.csc
                     -setMethod("compress","progressiveAlignment",
                     -          function(object,verbose=TRUE,...) {
                     -              if(object@merges[[1]]$compressed) {
                     -                  if(verbose)
                     -                      cat("[compress.progressiveAlignment] Already compressed.\n")
                     +##' @import methods
                     +##' @importFrom methods setMethod new
                     +##' @keywords internal
                     +setMethod("compress", "progressiveAlignment",
                     +          function(object, verbose = TRUE, ...) {
                     +              if (object@merges[[1]]$compressed) {
                     +                  if (verbose)
                     +                      cat("[compress.progressiveAlignment]
                     +                      Already compressed.\n")
                                        return(object)
+                                   }
                     -              for(i in 1:length(object@merges)) {
                     -                  if(!(object@merges[[i]]$compressed)) {
                     -                      object@merges[[i]]$r <- as.matrix.csc(1-object@merges[[i]]$r)
                     +              for (i in 1:length(object@merges)) {
                     +                  if (!(object@merges[[i]]$compressed)) {
                     +                      object@merges[[i]]$r <-
                     +                        as.matrix.csc(1 - object@merges[[i]]$r)
                                            object@merges[[i]]$compressed <- TRUE
+                                       }
+                                   }
                     -              new("progressiveAlignment",object)
                     -          })
                     +              new("progressiveAlignment", object)
                     +          }
                     +          )
                      ##' Show method for progressiveAlignment object
@@ -57,19 +69,19 @@ setMethod("compress","progressiveAlignment",
                      ##' @author MR
                      ##' @export
                      ##' @noRd
                     -setMethod("show","progressiveAlignment",
                     -function(object){
                     -   cat("An object of class \"", class(object), "\"\n", sep="")
                     +setMethod("show", "progressiveAlignment", function(object){
                     +   cat("An object of class \"", class(object), "\"\n", sep = "")
                         cat(length(object@merges), "merges\n")
                     -})
                     +}
                     +)
                      #' Data Structure for progressive alignment of many GCMS samples
                     -#'
                     +#'
                      #' Performs a progressive peak alignment (clustalw style) of multiple GCMS peak
                      #' lists
                     -#'
                     +#'
                      #' The progressive peak alignment we implemented here for multiple GCMS peak
                      #' lists is analogous to how \code{clustalw} takes a set of pairwise sequence
                      #' alignments and progressively builds a multiple alignment.  More details can
@@ -97,237 +109,188 @@ function(object){
                      #' of Melbourne.
                      #' @keywords classes
                      #' @examples
                     -#'
                     +#'
                      #' require(gcspikelite)
                     -#' ## paths and files
                     -#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
                     -#' cdfFiles <- dir(gcmsPath, "CDF", full=TRUE)
                     -#' eluFiles <- dir(gcmsPath, "ELU", full=TRUE)
                     -#'
                     -#' ## read data, peak detection results
                     -#' pd <- peaksDataset(cdfFiles[1:2], mz=seq(50,550), rtrange=c(7.5,8.5))
                     -#' pd <- addAMDISPeaks(pd, eluFiles[1:2])
                     -#'
                     -#' ca <- clusterAlignment(pd, gap=.5, D=.05, df=30, metric=1, type=1,
                     -#'                        compress = FALSE)
                     -#' pa <- progressiveAlignment(pd, ca, gap=.6, D=.1, df=30, type=1, compress = FALSE)
                     +#' files <- list.files(path = paste(find.package("gcspikelite"), "data",
                     +#'                     sep = "/"),"CDF", full = TRUE)
                     +#' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
                     +#' ## create settings object
                     +#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
                     +#' cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
                     +#'  prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
                     +#'  extendLengthMSW = TRUE, mzCenterFun = "wMean")
                     +#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
                     +#'  multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
                     +#'  list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
                     +#' data
                     +#' ca <- clusterAlignment(data, gap = 0.5, D = 0.05, df = 30, metric = 1,
                     +#'   type = 1, compress = FALSE)
                     +#' pa <- progressiveAlignment(data, ca, gap = 0.6, D = 0.1, df = 30,
                     +#'  type = 1, compress = FALSE)
                      #'
                      #' @export
                      progressiveAlignment <- function(pD, cA, D = 50, gap = 0.5, verbose = TRUE,
                                                       usePeaks = TRUE, df = 30, compress = FALSE,
                     -                                 type=2)
                     -{
                     -    ## options(error = recover)
                     +                                 type = 2) {
                          m <- cA@merge
                          merges <- vector("list", nrow(m))
                     -    if(usePeaks)
                     -    {
                     -        pd <- pD@peaksdata
                     -    }
                     -    else
                     -    {
                     -        pd <- pD@rawdata
                     +    if (usePeaks) {
                     +      pd <- pD@peaksdata
                     +    } else {
                     +      pd <- pD@rawdata
+                         }
                          pD <- NULL #?
                          cA <- decompress(cA, verbose = verbose)
+                    -
                     -    for(i in 1:nrow(m))
                     -    {
                     -        if(verbose)
                     -        {
                     -            cat("[progressiveAlignment] Doing merge", m[i,], "\n")
                     -        }
+                    -
                     -        ## left
                     -        if(m[i,1] < 0)
                     -        {
                     -            left.runs <- (-m[i,1])
                     -            left.ind <- matrix(1:ncol(pd[[left.runs]]), ncol = 1)
                     -        }
                     -        else
                     -        {
                     -            left.runs <- merges[[m[i,1]]]$runs
                     -            left.ind <- merges[[m[i,1]]]$ind
                     -        }
+                    -
                     -        ## right
                     -        if(m[i,2] < 0)
                     -        {
                     -            right.runs <- abs(m[i,2])
                     -            right.ind <- matrix(1:ncol(pd[[right.runs]]), ncol = 1) ## error
                     -            ## subscript out of bound
+                    +
                     +    for (i in 1:nrow(m)) {
                     +      if (verbose) {
                     +            cat("[progressiveAlignment] Doing merge", m[i, ], "\n")
+                             }
                     -        else
                     -        {
                     -            right.ind <- merges[[m[i,2]]]$ind
                     -            right.runs <- merges[[m[i,2]]]$runs
                     +      ## left
                     +      if (m[i, 1] < 0) {
                     +        left.runs <- (-m[i, 1])
                     +        left.ind <- matrix(1:ncol(pd[[left.runs]]), ncol = 1)
                     +        } else {
                     +          left.runs <- merges[[m[i, 1]]]$runs
                     +          left.ind <- merges[[m[i, 1]]]$ind
+                             }
+                    -
                     -	  if(verbose)
                     -      {
                     +      ## right
                     +      if (m[i, 2] < 0) {
                     +        right.runs <- abs(m[i, 2])
                     +        # subscript out of bound
                     +        right.ind <- matrix(1:ncol(pd[[right.runs]]), ncol = 1)
                     +        } else {
                     +          right.ind <- merges[[m[i, 2]]]$ind
                     +          right.runs <- merges[[m[i, 2]]]$runs
                     +          }
                     +      if (verbose) {
                              cat("[progressiveAlignment] left.runs:", left.runs, ", ")
                              cat("right.runs:", right.runs, "\n")
+                           }
+                    -
                     -	  if(length(right.runs) == 1 & length(left.runs) == 1)
                     -      {
                     -        al <- cA@alignments[[cA@aligned[left.runs,right.runs]]]
                     -        v <- al@v
                     -        sim <- al@r
                     -        mi <- .merge.indices(nrow(left.ind), nrow(right.ind), v$match)
                     -        new.ind <- mi
                     -      }
                     -    else
                     -      {
                     +      if (length(right.runs) == 1 & length(left.runs) == 1) {
                     +      al <- cA@alignments[[cA@aligned[left.runs, right.runs]]]
                     +      v <- al@v
                     +      sim <- al@r
                     +      mi <- .merge.indices(nrow(left.ind), nrow(right.ind), v$match)
                     +      new.ind <- mi
                     +      } else {
                              sim <- matrix(0, nrow = nrow(left.ind), ncol = nrow(right.ind))
                              count <- matrix(0, nrow = nrow(left.ind), ncol = nrow(right.ind))
                     -        if(verbose)
                     -          {
                     -            cat("[progressiveAlignment] (dot=50) going to", nrow(sim), ":")
                     +        if (verbose) {
                     +          cat("[progressiveAlignment] (dot=50) going to", nrow(sim), ":")
+                               }
                     -        for(r in 1:nrow(sim))
                     -        {
                     -          if(verbose)
                     -            {
                     -              if(r %% 50 == 0)
                     -                {
                     -                  cat(".")
                     +          for (r in 1:nrow(sim)) {
                     +            if (verbose) {
                     +              if(r %% 50 == 0) {
                     +                cat(".")
+                                     }
                     -            }
                     -          for(cc in max(1, r - df) : min(r + df, ncol(sim)))
                     -          {
                     -          # cc generate subsrcipt out of bound
                     -            if(cc > dim(sim)[2])
                     -            {
                     -              cat("\n\n", "Try to increase the df parameter to get a better alignment", "\n\n")
                     +                }
                     +                for (cc in max(1, r - df) : min(r + df, ncol(sim))) {
                     +                  # cc generate subsrcipt out of bound
                     +                  if (cc > dim(sim)[2]) {
                     +                    cat("\n\n", "Try to increase the df parameter to get a
                     +                    better alignment", "\n\n")
                                    # cc <- dim(sim)[2]
                     -            }
                     -            for(lr in 1:length(left.runs))
                     -            {
                     -              for(rr in 1:length(right.runs))
                     -              {
                     -                ai <- cA@aligned[left.runs[lr], right.runs[rr]]
                     -                # this.sim <- cA$alignments[[ai]]$r
                     -                lind <- left.ind[r,lr]
                     -                rind <- right.ind[cc,rr]# Error: subscript out of bounds. Solution: increase df
                     -                if(!is.na(lind) & !is.na(rind))
                     -                  {
                     -                    if(left.runs[lr] < right.runs[rr])
                     -                      {
                     -                        sim[r,cc] <- sim[r,cc] + cA@alignments[[ai]]@r[lind,rind]
                     +              }
                     +              for (lr in 1:length(left.runs)) {
                     +                for(rr in 1:length(right.runs)) {
                     +                  ai <- cA@aligned[left.runs[lr], right.runs[rr]]
                     +                  # this.sim <- cA$alignments[[ai]]$r
                     +                  lind <- left.ind[r, lr]
                     +                  rind <- right.ind[cc, rr]
                     +                  # Error: subscript out of bounds. Solution: increase df
                     +                  if (!is.na(lind) & !is.na(rind)) {
                     +                    if(left.runs[lr] < right.runs[rr]) {
                     +                      sim[r, cc] <- sim[r, cc] + cA@alignments[[ai]]@r[lind, rind]
                     +                      } else {
                     +                        sim[r, cc] <- sim[r, cc] + cA@alignments[[ai]]@r[rind, lind]
+                                           }
                     -                    else
                     -                      {
                     -                        sim[r,cc] <- sim[r,cc] + cA@alignments[[ai]]@r[rind,lind]
                     +                      count[r, cc] = count[r, cc] + 1
+                                           }
                     -                    count[r,cc] = count[r,cc] + 1
+                                       }
                     +                }
+                                   }
+                                 }
                     -          }
                     -        }
                     -        if(verbose)
                     -          {
                     -            cat("\n")
                     -          }
+                    -
                     -        if(type == 2) # RR
                     -          {
                     -            v <- dynRT(S = sim)
                     -            v$match <- v$match[!is.na(v$match[,2]),] # remove non-matched peaks
                     -          }
                     -        if(type == 1)
                     -          {
                     -            sim[count > 0] <- sim[count > 0] / count[count > 0]
                     -            sim[sim == 0] <- 1
                     -            v <- dp(sim, gap = gap, verbose = verbose)#
                     -          }
                     -          mi <- .merge.indices(nrow(left.ind), nrow(right.ind), v$match)
                     -          new.ind <- cbind(left.ind[mi[,1],], right.ind[mi[,2],])
                     -        }
                     -        rownames(new.ind) <- 1:nrow(new.ind)
                     -        merges[[i]] <- list(ind = new.ind, mi = mi, runs = c(left.runs, right.runs),
                     -                            left = left.runs, right = right.runs, r = sim,
                     -                            compressed = FALSE)
                     -  }
+                    -
                     +            if (verbose) {
                     +              cat("\n")
                     +              }
                     +              if (type == 2) {
                     +                v <- dynRT(S = sim)
                     +                # remove non-matched peaks
                     +                v$match <- v$match[!is.na(v$match[, 2]), ]
                     +                }
                     +              if (type == 1) {
                     +                sim[count > 0] <- sim[count > 0] / count[count > 0]
                     +                sim[sim == 0] <- 1
                     +                v <- dp(sim, gap = gap, verbose = verbose)
                     +                }
                     +    mi <- .merge.indices(nrow(left.ind), nrow(right.ind), v$match)
                     +    new.ind <- cbind(left.ind[mi[, 1], ], right.ind[mi[, 2], ])
                     +    }
                     +    rownames(new.ind) <- 1:nrow(new.ind)
                     +    merges[[i]] <- list(ind = new.ind, mi = mi, runs = c(left.runs, right.runs),
                     +      left = left.runs, right = right.runs, r = sim, compressed = FALSE)
                     +      }
                        cA <- NULL
                     -  if(verbose)
                     -    {
                     +  if (verbose) {
                            print(gc())
+                         }
                        pA <- new("progressiveAlignment", merges = merges)
                     -  if(compress)
                     -    {
                     -      return(compress(pA, verbose=verbose))
                     -    }
                     -  else
                     -    {
                     +  if (compress) {
                     +      return(compress(pA, verbose = verbose))
                     +    } else {
                            return(pA)
+                         }
+                     }
                      .merge.indices <- function(nl, nr, m) {
                     -    lind <- cbind(1:nl, 0)
                     -    lind[lind[,1] %in% m[,1],2] <- 1
                     -    rind <- cbind(1:nr,0)
                     -    rind[rind[,1] %in% m[,2],2] <- 1
                     -    mg <- matrix(NA, nrow=nrow(lind)+nrow(rind)-nrow(m), ncol=2)
                     -    #print(dim(mg))
                     -    li <- 1; ri <- 1; i <- 1
                     -    while(i <= nrow(mg)){
                     -        if(li > nrow(lind) & ri <= nrow(rind))
                     -        {
                     -            mg[i,] <- c(NA,rind[ri,1])
                     -            ri <- ri+1
                     -            i <- i+1
                     -            next
                     -	}
                     -        if(li <= nrow(lind) & ri > nrow(rind))
                     -        {
                     -            mg[i,] <- c(lind[li,1], NA)
                     -            li <- li+1
                     -            i <- i+1
                     -            next
                     -	}
                     -        if(lind[li,2] == 1)
                     -        {
                     -            if(rind[ri,2] == 1)
                     -            {  # match
                     -                mg[i,] <- c(lind[li,1], rind[ri,1])
                     -		ri <- ri+1
                     -		li <- li+1
                     -		#cat("match",i,li,ri,"-",mg[i,],"\n")
                     -            }
                     -            else
                     -            {             # right unmatched
                     -                mg[i,] <- c(NA, rind[ri,1])
                     -		ri <- ri+1
                     -		#cat("right unmatched",i,li,ri,"-",mg[i,],"\n")
                     -            }
                     -	}
                     -        else
                     -        {
                     -            if(rind[ri,2] == 1)
                     -            {  # left unmatched
                     -                mg[i,] <- c(lind[li,1], NA)
                     -		li <- li+1
                     -		#cat("left unmatched",i,li,ri,"-",mg[i,],"\n")
                     -            }
                     -            else
                     -            {             # both unmatched
                     -                mg[i,] <- c(lind[li,1], NA)
                     -		#cat("both unmatched - A",i,li,ri,"-",mg[i,],"\n")
                     -		i <- i+1
                     -                mg[i,] <- c(NA, rind[ri,1])
                     -		li <- li+1
                     -		ri <- ri+1
                     -		#cat("both unmatched - B",i,li,ri,"-",mg[i,],"\n")
                     +  lind <- cbind(1:nl, 0)
                     +  lind[lind[, 1] %in% m[, 1], 2] <- 1
                     +  rind <- cbind(1:nr, 0)
                     +  rind[rind[, 1] %in% m[, 2], 2] <- 1
                     +  mg <- matrix(NA, nrow = nrow(lind) + nrow(rind) - nrow(m), ncol = 2)
                     +  li <- 1; ri <- 1; i <- 1
                     +  while (i <= nrow(mg)) {
                     +    if (li > nrow(lind) & ri <= nrow(rind)) {
                     +      mg[i, ] <- c(NA, rind[ri, 1])
                     +      ri <- ri + 1
                     +      i <- i + 1
                     +      next
                     +      }
                     +    if (li <= nrow(lind) & ri > nrow(rind)) {
                     +      mg[i, ] <- c(lind[li, 1], NA)
                     +      li <- li + 1
                     +      i <- i + 1
                     +      next
                     +      }
                     +    if (lind[li, 2] == 1) {
                     +      if(rind[ri, 2] == 1) { # match
                     +        mg[i, ] <- c(lind[li, 1], rind[ri, 1])
                     +        ri <- ri + 1
                     +		    li <- li + 1
                     +        # cat("match", i, li, ri, "-", mg[i, ], "\n")
                     +        } else { # right unmatched
                     +          mg[i, ] <- c(NA, rind[ri, 1])
                     +          ri <- ri + 1
                     +		      # cat("right unmatched", i, li, ri, "-", mg[i, ], "\n")
                     +          }
                     +      } else {
                     +        if (rind[ri, 2] == 1) { # left unmatched
                     +          mg[i, ] <- c(lind[li, 1], NA)
                     +		      li <- li + 1
                     +          # cat("left unmatched", i, li, ri, "-", mg[i, ], "\n")
                     +          } else {# both unmatched
                     +            mg[i, ] <- c(lind[li, 1], NA)
                     +            # cat("both unmatched - A", i, li, ri, "-", mg[i, ], "\n")
                     +            i <- i + 1
                     +            mg[i, ] <- c(NA, rind[ri, 1])
                     +            li <- li + 1
                     +            ri <- ri + 1
                     +            # cat("both unmatched - B", i, li, ri, "-", mg[i, ], "\n")
+                                 }
                     -	}
                     -	i <- i+1
                     +        }
                     +        i <- i + 1
+                         }
                          mg
                     -}
+                    -
                     +}
                     \ No newline at end of file

R/retFatMatrix.R

History View file @ eea95d1

@@ -19,52 +19,55 @@
                      #' @examples
                      #'
                      #' require(gcspikelite)
                     -#' # paths and files
                     -#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep = "/")
                     -#' cdfFiles <- dir(gcmsPath,"CDF",full=TRUE)
                     -#' # read data, peak detection results
                     -#' pd <- peaksDataset(cdfFiles[1:2], mz=seq(50,550),
                     -#'                    rtrange=c(7.5,8.5))
                     -#' pd <- addXCMSPeaks(files=cdfFiles[1:2], object=pd,
                     -#'                    peakPicking=c('mF'), snthresh=3, fwhm=4,
                     -#'                    step=1, steps=2, mzdiff=0.5)
                     -#' ma <- multipleAlignment(pd = pd, group = c(1,1),
                     +#' files <- list.files(path = paste(find.package("gcspikelite"), "data",
                     +#'                     sep = "/"),"CDF", full = TRUE)
                     +#' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
                     +#' ## create settings object
                     +#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
                     +#' cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
                     +#'  prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
                     +#'  extendLengthMSW = TRUE, mzCenterFun = "wMean")
                     +#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
                     +#'  multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
                     +#'  list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
                     +#' data
                     +#' ma <- multipleAlignment(pd = data, group = c(1,1),
                      #'                         filterMin = 1, metric = 2, type = 2)
                     -#' outList <- gatherInfo(pd, ma)
                     -#' mtxD <- retFatMatrix(object = pd, data = outList, minFilter = 1)
                     +#' outList <- gatherInfo(data, ma)
                     +#' mtxD <- retFatMatrix(object = data, data = outList, minFilter = 1)
                      #'
                      #' @export retFatMatrix
                     -retFatMatrix <- function (object, data, minFilter = round(length(object@files)/3*2))
                     -{
                     -    a <- lapply(seq(along = data), function(x)
                     -    {
                     +retFatMatrix <- function (object, data,
                     +    minFilter = round(length(object@files) / 3 * 2)) {
                     +        a <- lapply(seq(along = data), function(x) {
                              apply(data[[x]]$data, 2, sum)
                     -    }
                     -    )
                     +        }
                     +        )
                          ## i nomi delle colonne equivalgono al numero del file;
                          ## il numero della riga equivale alla tasca della lista di gatherInfo()
                     -    abumtx <- do.call(rbind, a)
                     +    abumtx <- do.call(rbind, a)
                          abumtx <- apply(abumtx, 1, "[")
                          files_to_merge <- rownames(abumtx)
                     -    if(length(grep(pattern = "^[1-9].",  files_to_merge)) == 0)
                     -    {
                     -        files.idx <- as.numeric(sub(pattern = "^.", replacement = "", files_to_merge))
                     +    if (length(grep(pattern = "^[1-9].",  files_to_merge)) == 0) {
                     +        files.idx <- as.numeric(sub(pattern = "^.", replacement = "",
                     +            files_to_merge))
+                         }
                     -    if(length(grep(pattern = "^[1-9].",  files_to_merge)) > 0)
                     -    {
                     -        files.idx <- as.numeric(sub(pattern = "^[1-9].", replacement = "", files_to_merge))
                     +    if (length(grep(pattern = "^[1-9].",  files_to_merge)) > 0) {
                     +        files.idx <- as.numeric(sub(pattern = "^[1-9].", replacement = "",
                     +            files_to_merge))
+                         }
                          sample <- object@files[files.idx]
                     -    colnames(abumtx) <- sapply(1:ncol(abumtx), function(x){paste0("Feat", x)})
                     +    colnames(abumtx) <- sapply(1:ncol(abumtx), function(x) {
                     +        paste0("Feat", x)
                     +        }
                     +        )
                          mf <- minFilter
                          keep <- c()
                     -    for(g in 1:ncol(abumtx))
                     -    {
                     +    for (g in 1:ncol(abumtx)) {
                              keep[g] <- sum(!is.na(abumtx[, g])) >= mf
                     -    }
                     +        }
                          abumtx[is.na(abumtx)] <- c(0)
                          df <- cbind.data.frame(sample, abumtx[, keep])
                          return(df)
                     -}
+                    -
                     +}
                     \ No newline at end of file

man/addChromaTOFPeaks.Rd

History View file @ eea95d1

@@ -4,8 +4,13 @@
                      \alias{addChromaTOFPeaks}
                      \title{Add ChromaTOF peak detection results}
                      \usage{
                     -addChromaTOFPeaks(object, fns = dir(, "[Tt][Xx][Tx]"), rtDivide = 60,
                     -  verbose = TRUE, ...)
                     +addChromaTOFPeaks(
                     +  object,
                     +  fns = dir(, "[Tt][Xx][Tx]"),
                     +  rtDivide = 60,
                     +  verbose = TRUE,
                     +  ...
                     +)
+                     }
                      \arguments{
                      \item{object}{a \code{peaksDataset} object.}

man/addXCMSPeaks.Rd

History View file @ eea95d1

@@ -2,35 +2,57 @@
                      % Please edit documentation in R/addXCMSPeaks.R
                      \name{addXCMSPeaks}
                      \alias{addXCMSPeaks}
                     -\title{Add xcms/CAMERA peak detection results}
                     +\title{addXCMSPeaks}
                      \usage{
                     -addXCMSPeaks(files, object, peakPicking = c("cwt", "mF"),
                     -  perfwhm = 0.75, quick = TRUE, ...)
                     +addXCMSPeaks(
                     +  files,
                     +  object,
                     +  settings,
                     +  rtrange = NULL,
                     +  mzrange = NULL,
                     +  perfwhm = 0.75,
                     +  minintens = 100,
                     +  minfeat = 6,
                     +  multipleMatchedFilter = FALSE,
                     +  multipleMatchedFilterParam = list(fwhm = c(5, 10, 20), mz_abs = 0.1, rt_abs = 3)
                     +)
+                     }
                      \arguments{
                     -\item{files}{character vector of same length as \code{object@rawdata} (user
                     -ensures the order matches)}
                     +\item{files}{list of chromatogram files}
                     -\item{object}{a \code{peaksDataset} object.}
                     +\item{object}{a \code{peakDataset} object}
                     -\item{peakPicking}{Methods to use for peak detection. See details.}
                     +\item{settings}{list. It conteins the settings for the peak-picking}
                     -\item{perfwhm}{percentage of full width half maximum. See
                     -CAMERA::groupFWHM() for more details}
                     +\item{rtrange}{vector; retention time range}
                     -\item{quick}{logical. See CAMERA::annotate() for more details}
                     +\item{mzrange}{vector, mz range}
                     -\item{...}{arguments passed on to \code{xcmsSet} and \code{annotate}}
                     +\item{perfwhm}{etermines the maximal retentiontime difference of features in
                     +one pseudospectrum.}
+                    +
                     +\item{minintens}{minimum ion intensity to be included into a pseudospectra}
+                    +
                     +\item{minfeat}{minimum number of ion to be created a pseudospectra}
+                    +
                     +\item{multipleMatchedFilter}{logical Try to remove redundant peaks, in
                     +this case where there are any peaks within an absolute m/z value of 0.2 and
                     +within 3 s for any one sample in the xcmsSet (the largest peak is kept)}
+                    +
                     +\item{multipleMatchedFilterParam}{list. It conteins the settings for the
                     +peak-picking. mz_abs represent the the mz range; rt_abs represent thert range}
+                     }
                      \value{
                      \code{peaksDataset} object
+                     }
                      \description{
                     +Add xcms/CAMERA peak detection results
                     +}
                     +\details{
                      Reads the raw data using xcms, group each extracted ion according to their
                      retention time using CAMERA and attaches them to an already created
                      \code{peaksDataset} object
                     -}
                     -\details{
+                    +
                      Repeated calls to xcmsSet and annotate to perform peak-picking and
                      deconvolution. The peak detection results are added to the original
                      \code{peaksDataset} object. Two peak detection alorithms are available:
@@ -39,18 +61,18 @@ approach (peakPicking=c('mF')) described by Smith et al (2006). For further
                      information consult the xcms package manual.
+                     }
                      \examples{
+                    -
                     -# need access to CDF (raw data)
                     -require(gcspikelite)
                     -gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
+                    -
                     -# full paths to file names
                     -cdfFiles <- dir(gcmsPath, "CDF", full=TRUE)
+                    -
                     -# create a 'peaksDataset' object and add XCMS peaks to it
                     -pd <- peaksDataset(cdfFiles[1], mz=seq(50,550), rtrange=c(7.5,8.5))
                     -pd <- addXCMSPeaks(cdfFiles[1], pd, peakPicking=c('mF'),
                     -                   snthresh=3, fwhm=4, step=1, steps=2, mzdiff=0.5)
                     +files <- list.files(path = paste(find.package("gcspikelite"), "data",
                     +                    sep = "/"),"CDF", full = TRUE)
                     +data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
                     +## create settings object
                     +mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
                     +cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
                     + prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
                     + extendLengthMSW = TRUE, mzCenterFun = "wMean")
                     +data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
                     + multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
                     + list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
                     +data
+                     }
                      \seealso{

man/betweenAlignment.Rd

History View file @ eea95d1

@@ -8,9 +8,22 @@
                      \alias{betweenAlignment-method}
                      \title{Data Structure for "between" alignment of many GCMS samples}
                      \usage{
                     -betweenAlignment(pD, cAList, pAList, impList, filterMin = 1, gap = 0.7,
                     -  D = 10, usePeaks = TRUE, df = 30, verbose = TRUE, metric = 2,
                     -  type = 2, penality = 0.2, compress = FALSE)
                     +betweenAlignment(
                     +  pD,
                     +  cAList,
                     +  pAList,
                     +  impList,
                     +  filterMin = 1,
                     +  gap = 0.7,
                     +  D = 10,
                     +  usePeaks = TRUE,
                     +  df = 30,
                     +  verbose = TRUE,
                     +  metric = 2,
                     +  type = 2,
                     +  penality = 0.2,
                     +  compress = FALSE
                     +)
+                     }
                      \arguments{
                      \item{pD}{a \code{peaksDataset} object}

man/clusterAlignment.Rd

History View file @ eea95d1

@@ -10,8 +10,14 @@
                      \alias{plot,clusterAlignment,ANY-method}
                      \title{Data Structure for a collection of all pairwise alignments of GCMS runs}
                      \usage{
                     -clusterAlignment(pD, runs = 1:length(pD@rawdata), timedf = NULL,
                     -  usePeaks = TRUE, verbose = TRUE, ...)
                     +clusterAlignment(
                     +  pD,
                     +  runs = 1:length(pD@rawdata),
                     +  timedf = NULL,
                     +  usePeaks = TRUE,
                     +  verbose = TRUE,
                     +  ...
                     +)
+                     }
                      \arguments{
                      \item{pD}{a \code{peaksDataset} object.}

man/compress-peaksAlignment-method.Rd

History View file @ eea95d1

@@ -1,6 +1,5 @@
                      % Generated by roxygen2: do not edit by hand
                      % Please edit documentation in R/peaksAlignment.R
                     -\docType{methods}
                      \name{compress,peaksAlignment-method}
                      \alias{compress,peaksAlignment-method}
                      \title{Compression method for peaksAlignment object}

man/compress-progressiveAlignment-method.Rd

History View file @ eea95d1

@@ -1,6 +1,5 @@
                      % Generated by roxygen2: do not edit by hand
                      % Please edit documentation in R/progressiveAlignment.R
                     -\docType{methods}
                      \name{compress,progressiveAlignment-method}
                      \alias{compress,progressiveAlignment-method}
                      \title{Compress method for progressiveAlignment}

man/corPrt.Rd

History View file @ eea95d1

@@ -35,18 +35,23 @@ retention time window (\code{D})and returns the similarity matrix.
                      ## Not Run
                      require(gcspikelite)
                     -gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
                     -cdfFiles <- dir(gcmsPath,"CDF", full=TRUE)
                     -## read data, peak detection results
                     -pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,10.5))
                     -pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'),
                     -                   snthresh=3, fwhm=10,  step=0.1, steps=2, mzdiff=0.5,
                     -                   sleep=0)
                     +files <- list.files(path = paste(find.package("gcspikelite"), "data",
                     +                    sep = "/"),"CDF", full = TRUE)
                     +data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
                     +## create settings object
                     +mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
                     +cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
                     + prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
                     + extendLengthMSW = TRUE, mzCenterFun = "wMean")
                     +data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
                     + multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
                     + list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
                     +data
                      ## review peak picking
                     -plotChrom(pd, rtrange=c(7.5, 10.5), runs=c(1:3))
                     +plotChrom(data, rtrange=c(7.5, 10.5), runs=c(1:2))
                     -r <- corPrt(pd@peaksdata[[1]], pd@peaksdata[[2]],
                     -           pd@peaksrt[[1]], pd@peaksrt[[2]], D=50, penality=0.2)
                     +r <- corPrt(data@peaksdata[[1]], data@peaksdata[[2]],
                     +           data@peaksrt[[1]], data@peaksrt[[2]], D = 50, penality = 0.2)
                      ## End (Not Run)
+                     }

man/de_Duper.Rd

History View file @ eea95d1

                     new file mode 100644
@@ -0,0 +1,29 @@
                     +% Generated by roxygen2: do not edit by hand
                     +% Please edit documentation in R/addXCMSPeaks.R
                     +\name{de_Duper}
                     +\alias{de_Duper}
                     +\title{deDuper}
                     +\usage{
                     +de_Duper(object, mz_abs = 0.1, rt_abs = 2)
                     +}
                     +\arguments{
                     +\item{object}{xcms object}
+                    +
                     +\item{mz_abs}{mz range}
+                    +
                     +\item{rt_abs}{rt range}
                     +}
                     +\value{
                     +an object of xcms class
                     +}
                     +\description{
                     +Duplicate peak removal function
                     +}
                     +\details{
                     +Remove redundant peaks, in this case where there are any peaks within an
                     +absolute m/z value of 0.2 and within 3 s for any one sample in the xcmsSet
                     +(the largest peak is kept)
                     +}
                     +\author{
                     +r
                     +}

man/decompress-peaksAlignment-method.Rd

History View file @ eea95d1

@@ -1,6 +1,5 @@
                      % Generated by roxygen2: do not edit by hand
                      % Please edit documentation in R/peaksAlignment.R
                     -\docType{methods}
                      \name{decompress,peaksAlignment-method}
                      \alias{decompress,peaksAlignment-method}
                      \title{Decompression method for peaksAlignment object}

man/decompress-progressiveAlignment-method.Rd

History View file @ eea95d1

@@ -1,6 +1,5 @@
                      % Generated by roxygen2: do not edit by hand
                      % Please edit documentation in R/progressiveAlignment.R
                     -\docType{methods}
                      \name{decompress,progressiveAlignment-method}
                      \alias{decompress,progressiveAlignment-method}
                      \title{Compress method for progressiveAlignment}
@@ -8,17 +7,17 @@
                      \S4method{decompress}{progressiveAlignment}(object, verbose = TRUE, ...)
+                     }
                      \arguments{
                     -\item{object}{dummy}
                     +\item{object}{progressiveAlignment object}
                     -\item{verbose}{dummy}
                     +\item{verbose}{logical}
                      \item{...}{dummy}
+                     }
                      \description{
                     -Compress method for progressiveAlignment
                     +Decompress method for progressiveAlignment
+                     }
                      \details{
                     -Compress method for progressiveAlignment
                     +Decompress method for progressiveAlignment
+                     }
                      \author{
                      MR

man/distToLib.Rd

History View file @ eea95d1

                     new file mode 100644
@@ -0,0 +1,26 @@
                     +% Generated by roxygen2: do not edit by hand
                     +% Please edit documentation in R/importSpectra.R
                     +\name{distToLib}
                     +\alias{distToLib}
                     +\title{distToLib}
                     +\usage{
                     +distToLib(mspLib, outList)
                     +}
                     +\arguments{
                     +\item{mspLib}{a .msp file from NIST}
+                    +
                     +\item{outList}{an object from gatherInfo()}
                     +}
                     +\value{
                     +the distance matrix between the mass spec and the aligned spec
                     +}
                     +\description{
                     +The function calculate the distance between each mas spec in the msp file
                     +and the aligned mass spec from each sampe
                     +}
                     +\details{
                     +Return the distance matrix
                     +}
                     +\author{
                     +Riccardo Romoli
                     +}

man/dynRT.Rd

History View file @ eea95d1

@@ -23,21 +23,25 @@ the mass spectra
                      \examples{
                      require(gcspikelite)
                     -gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
                     -cdfFiles <- dir(gcmsPath,"CDF", full=TRUE)
                     -## read data, peak detection results
                     -pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550),
                     -    rtrange=c(7.5,10.5))
                     -pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd,
                     -    peakPicking=c('mF'),snthresh=3, fwhm=10,  step=0.1, steps=2,
                     -    mzdiff=0.5, sleep=0)
                     +files <- list.files(path = paste(find.package("gcspikelite"), "data",
                     +                    sep = "/"),"CDF", full = TRUE)
                     +data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
                     +## create settings object
                     +mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
                     +cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
                     + prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
                     + extendLengthMSW = TRUE, mzCenterFun = "wMean")
                     +data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
                     + multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
                     + list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
                     +data
                      ## review peak picking
                     -plotChrom(pd, rtrange=c(7.5, 10.5), runs=c(1:3))
                     +plotChrom(data, rtrange=c(7.5, 10.5), runs=c(1:2))
                      ## similarity
                     -r <- ndpRT(pd@peaksdata[[1]], pd@peaksdata[[2]], pd@peaksrt[[1]],
                     -    pd@peaksrt[[2]], D=50)
                     +r <- ndpRT(data@peaksdata[[1]], data@peaksdata[[2]], data@peaksrt[[1]],
                     +    data@peaksrt[[2]], D = 50)
                      ## dynamic retention time based alignment algorithm
                     -v <- dynRT(S=r)
                     +v <- dynRT(S = r)
+                     }
                      \author{

man/gatherInfo.Rd

History View file @ eea95d1

@@ -4,9 +4,16 @@
                      \alias{gatherInfo}
                      \title{Gathers abundance informations from an alignment}
                      \usage{
                     -gatherInfo(pD, obj, newind = NULL, method = c("apex"),
                     -  findmzind = TRUE, useTIC = FALSE, top = NULL,
                     -  intensity.cut = 0.05)
                     +gatherInfo(
                     +  pD,
                     +  obj,
                     +  newind = NULL,
                     +  method = c("apex"),
                     +  findmzind = TRUE,
                     +  useTIC = FALSE,
                     +  top = NULL,
                     +  intensity.cut = 0.05
                     +)
+                     }
                      \arguments{
                      \item{pD}{a \code{peaksDataset} object, to get the abundance data from}

man/headToTailPlot.Rd

History View file @ eea95d1

                     new file mode 100644
@@ -0,0 +1,25 @@
                     +% Generated by roxygen2: do not edit by hand
                     +% Please edit documentation in R/importSpectra.R
                     +\name{headToTailPlot}
                     +\alias{headToTailPlot}
                     +\title{Head to tail plot}
                     +\usage{
                     +headToTailPlot(specFromLib, specFromList)
                     +}
                     +\arguments{
                     +\item{specFromLib}{the mass spectra obtained from the .msp file}
+                    +
                     +\item{specFromList}{the mass spectra obtained from \code{\link{gatherInfo}}}
                     +}
                     +\value{
                     +the plot
                     +}
                     +\description{
                     +The head-to-tail-plot for the mass spectra
                     +}
                     +\details{
                     +Head-to-tail-plot to visually compare the mass spectra
                     +}
                     +\author{
                     +Riccardo Romoli
                     +}

man/importSpec.Rd

History View file @ eea95d1

                     new file mode 100644
@@ -0,0 +1,24 @@
                     +% Generated by roxygen2: do not edit by hand
                     +% Please edit documentation in R/importSpectra.R
                     +\name{importSpec}
                     +\alias{importSpec}
                     +\title{importSpec}
                     +\usage{
                     +importSpec(file)
                     +}
                     +\arguments{
                     +\item{file}{a .msp file from NIST search library database}
                     +}
                     +\value{
                     +list conaining the mass spctra
                     +}
                     +\description{
                     +Read the mass spectra from an external msp file
                     +}
                     +\details{
                     +Read the mass spectra from an external file in msp format. The format is
                     +used in NIST search library database.
                     +}
                     +\author{
                     [email protected]
                     +}

man/imputePeaks.Rd

History View file @ eea95d1

@@ -4,8 +4,7 @@
                      \alias{imputePeaks}
                      \title{Imputatin of locations of peaks that were undetected}
                      \usage{
                     -imputePeaks(pD, obj, typ = 1, obj2 = NULL, filterMin = 1,
                     -  verbose = TRUE)
                     +imputePeaks(pD, obj, typ = 1, obj2 = NULL, filterMin = 1, verbose = TRUE)
+                     }
                      \arguments{
                      \item{pD}{a \code{peaksDataset} object}

man/matchSpec.Rd

History View file @ eea95d1

                     new file mode 100644
@@ -0,0 +1,28 @@
                     +% Generated by roxygen2: do not edit by hand
                     +% Please edit documentation in R/importSpectra.R
                     +\name{matchSpec}
                     +\alias{matchSpec}
                     +\title{matchSpec}
                     +\usage{
                     +matchSpec(spec1, outList, whichSpec)
                     +}
                     +\arguments{
                     +\item{spec1}{reference mass spectrum}
+                    +
                     +\item{outList}{the return of \code{\link{gatherInfo}}}
+                    +
                     +\item{whichSpec}{the entry number of outList}
                     +}
                     +\value{
                     +the distance between the reference mass spectrum and the others
                     +}
                     +\description{
                     +Calculate the distance between a reference mass spectrum
                     +}
                     +\details{
                     +Calculate the distance between a reference mass spectrum and one from the
                     +sample
                     +}
                     +\author{
                     +Riccardo Romoli
                     +}

man/multipleAlignment-class.Rd

History View file @ eea95d1

@@ -8,10 +8,25 @@
                      \alias{multipleAlignment-method}
                      \title{Data Structure for multiple alignment of many GCMS samples}
                      \usage{
                     -multipleAlignment(pd, group, bw.gap = 0.8, wn.gap = 0.6, bw.D = 0.2,
                     -  wn.D = 0.05, filterMin = 1, lite = FALSE, usePeaks = TRUE,
                     -  df = 50, verbose = TRUE, timeAdjust = FALSE, doImpute = FALSE,
                     -  metric = 2, type = 2, penality = 0.2, compress = FALSE)
                     +multipleAlignment(
                     +  pd,
                     +  group,
                     +  bw.gap = 0.8,
                     +  wn.gap = 0.6,
                     +  bw.D = 0.2,
                     +  wn.D = 0.05,
                     +  filterMin = 1,
                     +  lite = FALSE,
                     +  usePeaks = TRUE,
                     +  df = 50,
                     +  verbose = TRUE,
                     +  timeAdjust = FALSE,
                     +  doImpute = FALSE,
                     +  metric = 2,
                     +  type = 2,
                     +  penality = 0.2,
                     +  compress = FALSE
                     +)
+                     }
                      \arguments{
                      \item{pd}{a \code{peaksDataset} object}

man/ndpRT.Rd

History View file @ eea95d1

@@ -22,7 +22,7 @@ matrix of similarities
+                     }
                      \description{
                      This function calculates the similarity of all pairs of peaks from 2
                     -samples, using the spectra similarity and the rretention time differencies
                     +samples, using the spectra similarity and the retention time differencies
+                     }
                      \details{
                      Computes the normalized dot product between every pair of peak vectors in
@@ -32,19 +32,23 @@ the retention time window (\code{D})and returns a similarity matrix.
                      ## Not Run
                      require(gcspikelite)
                     -gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
                     -cdfFiles <- dir(gcmsPath,"CDF", full=TRUE)
+                    -
                     -                                        # read data, peak detection results
                     -pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,10.5))
                     -pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'),
                     -                   snthresh=3, fwhm=10,  step=0.1, steps=2, mzdiff=0.5,
                     -                   sleep=0)
                     +files <- list.files(path = paste(find.package("gcspikelite"), "data",
                     +                    sep = "/"),"CDF", full = TRUE)
                     +data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
                     +## create settings object
                     +mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
                     +cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
                     + prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
                     + extendLengthMSW = TRUE, mzCenterFun = "wMean")
                     +data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
                     + multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
                     + list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
                     +data
                      ## review peak picking
                     -plotChrom(pd, rtrange=c(7.5, 10.5), runs=c(1:3))
                     +plotChrom(data, rtrange = c(7.5, 10.5), runs = c(1:2))
                     -r <- ndpRT(pd@peaksdata[[1]], pd@peaksdata[[2]],
                     -           pd@peaksrt[[1]], pd@peaksrt[[2]], D=50)
                     +r <- ndpRT(data@peaksdata[[1]], data@peaksdata[[2]],
                     +           data@peaksrt[[1]], data@peaksrt[[2]], D = 50)
                      ## End (Not Run)
+                     }

man/normDotProduct.Rd

History View file @ eea95d1

@@ -4,8 +4,16 @@
                      \alias{normDotProduct}
                      \title{Normalized Dot Product}
                      \usage{
                     -normDotProduct(x1, x2, t1 = NULL, t2 = NULL, df = max(ncol(x1),
                     -  ncol(x2)), D = 1e+05, timedf = NULL, verbose = FALSE)
                     +normDotProduct(
                     +  x1,
                     +  x2,
                     +  t1 = NULL,
                     +  t2 = NULL,
                     +  df = max(ncol(x1), ncol(x2)),
                     +  D = 1e+05,
                     +  timedf = NULL,
                     +  verbose = FALSE
                     +)
+                     }
                      \arguments{
                      \item{x1}{data matrix for sample 1}

man/parseChromaTOF.Rd

History View file @ eea95d1

@@ -4,8 +4,15 @@
                      \alias{parseChromaTOF}
                      \title{Parser for ChromaTOF files}
                      \usage{
                     -parseChromaTOF(fn, min.pc = 0.01, mz = seq(85, 500), rt.cut = 0.008,
                     -  rtrange = NULL, skip = 1, rtDivide = 60)
                     +parseChromaTOF(
                     +  fn,
                     +  min.pc = 0.01,
                     +  mz = seq(85, 500),
                     +  rt.cut = 0.008,
                     +  rtrange = NULL,
                     +  skip = 1,
                     +  rtDivide = 60
                     +)
+                     }
                      \arguments{
                      \item{fn}{ChromaTOF filename to read.}

man/parseELU.Rd

History View file @ eea95d1

@@ -4,8 +4,7 @@
                      \alias{parseELU}
                      \title{Parser for ELU files}
                      \usage{
                     -parseELU(f, min.pc = 0.01, mz = seq(50, 550), rt.cut = 0.008,
                     -  rtrange = NULL)
                     +parseELU(f, min.pc = 0.01, mz = seq(50, 550), rt.cut = 0.008, rtrange = NULL)
+                     }
                      \arguments{
                      \item{f}{ELU filename to read.}

man/peaksAlignment-class.Rd

History View file @ eea95d1

@@ -10,9 +10,22 @@
                      \alias{plot,peaksAlignment,ANY-method}
                      \title{Data Structure for pairwise alignment of 2 GCMS samples}
                      \usage{
                     -peaksAlignment(d1, d2, t1, t2, gap = 0.5, D = 50, timedf = NULL,
                     -  df = 30, verbose = TRUE, usePeaks = TRUE, compress = TRUE,
                     -  metric = 2, type = 2, penality = 0.2)
                     +peaksAlignment(
                     +  d1,
                     +  d2,
                     +  t1,
                     +  t2,
                     +  gap = 0.5,
                     +  D = 50,
                     +  timedf = NULL,
                     +  df = 30,
                     +  verbose = TRUE,
                     +  usePeaks = TRUE,
                     +  compress = TRUE,
                     +  metric = 2,
                     +  type = 2,
                     +  penality = 0.2
                     +)
+                     }
                      \arguments{
                      \item{d1}{matrix of MS intensities for 1st sample (if doing a peak
@@ -76,29 +89,32 @@ data.
                      ## see clusterAlignment, it calls peaksAlignment
                      ## Not Run:
                     -gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
                     -cdfFiles <- dir(gcmsPath,"CDF", full=TRUE)
+                    -
                     -# read data, peak detection results
                     -pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,10.5))
                     -pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'),
                     -                   snthresh=3, fwhm=10,  step=0.1, steps=2, mzdiff=0.5,
                     -                   sleep=0)
                     -## review peak picking
                     -plotChrom(pd, rtrange=c(7.5, 10.5), runs=c(1:3))
                     +files <- list.files(path = paste(find.package("gcspikelite"), "data",
                     +                    sep = "/"),"CDF", full = TRUE)
                     +data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
                     +## create settings object
                     +mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
                     +cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
                     + prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
                     + extendLengthMSW = TRUE, mzCenterFun = "wMean")
                     +data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
                     + multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
                     + list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
                     +data
                     +plotChrom(data, rtrange=c(7.5, 10.5), runs=c(1:2))
                      ## align two chromatogram
                     -pA <- peaksAlignment(pd@peaksdata[[1]], pd@peaksdata[[2]],
                     -                     pd@peaksrt[[1]], pd@peaksrt[[2]], D=50,
                     -                     metric=3, compress=FALSE, type=2, penality=0.2)
                     +pA <- peaksAlignment(data@peaksdata[[1]], data@peaksdata[[2]],
                     +                     data@peaksrt[[1]], data@peaksrt[[2]], D = 50,
                     +                     metric = 3, compress = FALSE, type = 2, penality = 0.2)
                      plotAlignment(pA)
                      pA@v$match
                      par(mfrow=c(2,1))
                     -plot(pd@peaksdata[[1]][,15], type='h', main=paste(pd@peaksrt[[1]][[15]]))
                     -plot(pd@peaksdata[[2]][,17], type='h',
                     -     main=paste(pd@peaksrt[[2]][[17]]))
                     +plot(data@peaksdata[[1]][,15], type = 'h', main = paste(data@peaksrt[[1]][[15]]))
                     +plot(data@peaksdata[[2]][,17], type = 'h',
                     +     main = paste(data@peaksrt[[2]][[17]]))
                      ## End (Not Run)
+                     }

man/peaksDataset.Rd

History View file @ eea95d1

@@ -10,8 +10,13 @@
                      \alias{plot,peaksDataset,ANY-method}
                      \title{Data Structure for raw GCMS data and peak detection results}
                      \usage{
                     -peaksDataset(fns = dir(, "[Cc][Dd][Ff]"), verbose = TRUE,
                     -  mz = seq(50, 550), rtDivide = 60, rtrange = NULL)
                     +peaksDataset(
                     +  fns = dir(, "[Cc][Dd][Ff]"),
                     +  verbose = TRUE,
                     +  mz = seq(50, 550),
                     +  rtDivide = 60,
                     +  rtrange = NULL
                     +)
+                     }
                      \arguments{
                      \item{fns}{character vector, filenames of raw data in CDF format.}

man/plotAlignedFrags.Rd

History View file @ eea95d1

@@ -4,8 +4,14 @@
                      \alias{plotAlignedFrags}
                      \title{plotAlignedFrags}
                      \usage{
                     -plotAlignedFrags(object, outList, specID, fullRange = TRUE,
                     -  normalize = TRUE, ...)
                     +plotAlignedFrags(
                     +  object,
                     +  outList,
                     +  specID,
                     +  fullRange = TRUE,
                     +  normalize = TRUE,
                     +  ...
                     +)
+                     }
                      \arguments{
                      \item{object}{where to keep the mass range of the experiment}
@@ -31,24 +37,27 @@ Plot the deconvoluted and aligned mass spectra collected using gatherInfo()
+                     }
                      \examples{
                     -## Rd workflow
                     -gcmsPath <- paste(find.package("gcspikelite"), "data", sep = "/")
                     -cdfFiles <- dir(gcmsPath,"CDF", full = TRUE)
+                    -
                     -# read data, peak detection results
                     -pd <- peaksDataset(cdfFiles[1:4], mz = seq(50,550), rtrange = c(7.5,10.5))
                     -pd <- addXCMSPeaks(files = cdfFiles[1:4], object = pd, peakPicking = c('mF'),
                     -                   snthresh = 2, fwhm = 8,  step = 0.5, steps = 2, mzdiff = 0.5,
                     -                   sleep = 0)
                     +files <- list.files(path = paste(find.package("gcspikelite"), "data",
                     +                    sep = "/"),"CDF", full = TRUE)
                     +data <- peaksDataset(files[1:4], mz = seq(50, 550), rtrange = c(7.5, 8.5))
                     +## create settings object
                     +mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
                     +cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
                     + prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
                     + extendLengthMSW = TRUE, mzCenterFun = "wMean")
                     +data <- addXCMSPeaks(files[1:4], data, settings = mfp, minintens = 100,
                     + multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
                     + list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
                     +data
                      ## multiple alignment
                     -ma <- multipleAlignment(pd, c(1,1,2,2), wn.gap = 0.5, wn.D = 0.05, bw.gap = 0.6,
                     -                        bw.D = 0.2, usePeaks = TRUE, filterMin = 1, df = 50,
                     -                        verbose = TRUE, metric = 2, type = 2)
                     +ma <- multipleAlignment(data, c(1,1,2,2), wn.gap = 0.5, wn.D = 0.05,
                     + bw.gap = 0.6, bw.D = 0.2, usePeaks = TRUE, filterMin = 1, df = 50,
                     + verbose = TRUE, metric = 2, type = 2)
                      ## gather apex intensities
                     -gip <- gatherInfo(pd, ma)
                     +gip <- gatherInfo(data, ma)
                      gip[[33]]
                     -plotAlignedFrags(object = pd, outList = gip, specID = 33)
                     +plotAlignedFrags(object = data, outList = gip, specID = 33)
+                     }
                      \author{

man/plotAlignment-peaksAlignment-method.Rd

History View file @ eea95d1

@@ -1,15 +1,22 @@
                      % Generated by roxygen2: do not edit by hand
                      % Please edit documentation in R/peaksAlignment.R
                     -\docType{methods}
                      \name{plotAlignment,peaksAlignment-method}
                      \alias{plotAlignment,peaksAlignment-method}
                      \title{plotAlignment}
                      \usage{
                     -\S4method{plotAlignment}{peaksAlignment}(object, xlab = "Peaks - run 1",
                     -  ylab = "Peaks - run 2", plotMatches = TRUE, matchPch = 19,
                     -  matchLwd = 3, matchCex = 0.5, matchCol = "black",
                     -  col = colorpanel(50, "white", "green", "navyblue"), breaks = seq(0,
                     -  1, length = 51), ...)
                     +\S4method{plotAlignment}{peaksAlignment}(
                     +  object,
                     +  xlab = "Peaks - run 1",
                     +  ylab = "Peaks - run 2",
                     +  plotMatches = TRUE,
                     +  matchPch = 19,
                     +  matchLwd = 3,
                     +  matchCex = 0.5,
                     +  matchCol = "black",
                     +  col = colorpanel(50, "white", "green", "navyblue"),
                     +  breaks = seq(0, 1, length = 51),
                     +  ...
                     +)
+                     }
                      \arguments{
                      \item{object}{a \code{clusterAlignment} object}
@@ -50,23 +57,24 @@ The similarity matrix is plotted and optionally, the set of matching peaks.
                      \examples{
                      require(gcspikelite)
+                    -
                     -## paths and files
                     -gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
                     -cdfFiles <- dir(gcmsPath, "CDF", full=TRUE)
                     -eluFiles <- dir(gcmsPath, "ELU", full=TRUE)
+                    -
                     -## read data
                     -pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,8.5))
                     -pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'),
                     -                   snthresh=3, fwhm=10,  step=0.1, steps=2, mzdiff=0.5)
+                    -
                     +files <- list.files(path = paste(find.package("gcspikelite"), "data",
                     +                    sep = "/"),"CDF", full = TRUE)
                     +data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
                     +## create settings object
                     +mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
                     +cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
                     + prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
                     + extendLengthMSW = TRUE, mzCenterFun = "wMean")
                     +data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
                     + multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
                     + list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
                     +data
                      ## image plot
                     -plotChrom(pd, rtrange=c(7.5,8.5), plotPeaks=TRUE, plotPeakLabels=TRUE)
                     +plotChrom(data, rtrange = c(7.5,8.5), plotPeaks = TRUE, plotPeakLabels =TRUE)
                      ## align two chromatogram
                     -pA <- peaksAlignment(pd@peaksdata[[1]], pd@peaksdata[[2]],
                     -                     pd@peaksrt[[1]], pd@peaksrt[[2]], D = 50,
                     +pA <- peaksAlignment(data@peaksdata[[1]], data@peaksdata[[2]],
                     +                     data@peaksrt[[1]], data@peaksrt[[2]], D = 50,
                                           compress = FALSE, type = 1, metric = 1,
                                           gap = 0.5)
                      plotAlignment(pA)

man/plotChrom-peaksDataset-method.Rd

History View file @ eea95d1

@@ -1,19 +1,33 @@
                      % Generated by roxygen2: do not edit by hand
                      % Please edit documentation in R/peaksDataset.R
                     -\docType{methods}
                      \name{plotChrom,peaksDataset-method}
                      \alias{plotChrom,peaksDataset-method}
                      \title{Plotting functions for GCMS data objects}
                      \usage{
                     -\S4method{plotChrom}{peaksDataset}(object,
                     -  runs = 1:length(object@rawdata), mzind = 1:nrow(object@rawdata[[1]]),
                     -  mind = NULL, plotSampleLabels = TRUE, calcGlobalMax = FALSE,
                     -  peakCex = 0.8, plotPeaks = TRUE, plotPeakBoundaries = FALSE,
                     -  plotPeakLabels = FALSE, plotMergedPeakLabels = TRUE, mlwd = 3,
                     -  usePeaks = TRUE, plotAcrossRuns = FALSE, overlap = F,
                     -  rtrange = NULL, cols = NULL, thin = 1,
                     -  max.near = median(object@rawrt[[1]]), how.near = 50, scale.up = 1,
                     -  ...)
                     +\S4method{plotChrom}{peaksDataset}(
                     +  object,
                     +  runs = 1:length(object@rawdata),
                     +  mzind = 1:nrow(object@rawdata[[1]]),
                     +  mind = NULL,
                     +  plotSampleLabels = TRUE,
                     +  calcGlobalMax = FALSE,
                     +  peakCex = 0.8,
                     +  plotPeaks = TRUE,
                     +  plotPeakBoundaries = FALSE,
                     +  plotPeakLabels = FALSE,
                     +  plotMergedPeakLabels = TRUE,
                     +  mlwd = 3,
                     +  usePeaks = TRUE,
                     +  plotAcrossRuns = FALSE,
                     +  overlap = F,
                     +  rtrange = NULL,
                     +  cols = NULL,
                     +  thin = 1,
                     +  max.near = median(object@rawrt[[1]]),
                     +  how.near = 50,
                     +  scale.up = 1,
                     +  ...
                     +)
+                     }
                      \arguments{
                      \item{object}{a \code{peaksDataset} object.}

man/plotClustAlignment-clusterAlignment-method.Rd

History View file @ eea95d1

@@ -1,12 +1,10 @@
                      % Generated by roxygen2: do not edit by hand
                      % Please edit documentation in R/clusterAlignment.R
                     -\docType{methods}
                      \name{plotClustAlignment,clusterAlignment-method}
                      \alias{plotClustAlignment,clusterAlignment-method}
                      \title{plotClustAlignment}
                      \usage{
                     -\S4method{plotClustAlignment}{clusterAlignment}(object, alignment = 1,
                     -  ...)
                     +\S4method{plotClustAlignment}{clusterAlignment}(object, alignment = 1, ...)
+                     }
                      \arguments{
                      \item{object}{\code{clusterAlignment} object.}
@@ -31,19 +29,19 @@ just a collection of all pairwise \code{peakAlignment} objects.
                      require(gcspikelite)
                     -## paths and files
                     +# paths and files
                      gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
                      cdfFiles <- dir(gcmsPath, "CDF", full=TRUE)
                      eluFiles <- dir(gcmsPath, "ELU", full=TRUE)
                     -## read data
                     -pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,8.5))
                     -pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'),
                     -                   snthresh=3, fwhm=10,  step=0.1, steps=2, mzdiff=0.5)
                     -ca <- clusterAlignment(pd, metric = 1, D = 50, type = 1, gap = 0.5)
                     +# read data, peak detection results
                     +pd <- peaksDataset(cdfFiles[1:2], mz=seq(50,550), rtrange=c(7.5,8.5))
                     +pd <- addAMDISPeaks(pd, eluFiles[1:2])
+                    +
                     +ca <- clusterAlignment(pd, gap=0.5, D=0.05, df=30, metric=1, type=1)
                      plotClustAlignment(ca, run = 1)
                      plotClustAlignment(ca, run = 2)
                     -plotClustAlignment(ca, run = 3)
                     +plotClustAlignment(ca, run = 3)
+                     }
                      \references{

man/plotFrags.Rd

History View file @ eea95d1

@@ -29,22 +29,27 @@ Plot the deconvoluted mass spectra from the profile matrix
+                     }
                      \examples{
                     -gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
                     -cdfFiles <- dir(gcmsPath,"CDF", full=TRUE)
                     -# read data, peak detection results
                     -pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,10.5))
                     -pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'),
                     -                   snthresh=3, fwhm=10,  step=0.1, steps=2, mzdiff=0.5,
                     -                   sleep=0)
                     +files <- list.files(path = paste(find.package("gcspikelite"), "data",
                     +                    sep = "/"),"CDF", full = TRUE)
                     +data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
                     +## create settings object
                     +mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
                     +cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
                     + prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
                     + extendLengthMSW = TRUE, mzCenterFun = "wMean")
                     +data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
                     + multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
                     + list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
                     +data
                      ## align two chromatogram
                     -pA <- peaksAlignment(pd@peaksdata[[1]], pd@peaksdata[[2]],
                     -                     pd@peaksrt[[1]], pd@peaksrt[[2]], D=50,
                     -                     metric=3, compress=FALSE, type=2, penality=0.2)
                     +pA <- peaksAlignment(data@peaksdata[[1]], data@peaksdata[[2]],
                     +                     data@peaksrt[[1]], data@peaksrt[[2]], D = 50,
                     +                     metric = 3, compress = FALSE, type = 2, penality = 0.2)
                      pA@v$match
                      ## plot the mass spectra
                      par(mfrow=c(2,1))
                     -plotFrags(object=pd, sample=1, specID=10)
                     -plotFrags(object=pd, sample=2, specID=12)
                     +plotFrags(object=data, sample=1, specID=10)
                     +plotFrags(object=data, sample=2, specID=12)
+                     }
                      \author{

man/plotImage.Rd

History View file @ eea95d1

@@ -1,13 +1,19 @@
                      % Generated by roxygen2: do not edit by hand
                      % Please edit documentation in R/peaksDataset.R
                     -\docType{methods}
                      \name{plotImage}
                      \alias{plotImage}
                      \alias{plotImage,peaksDataset-method}
                      \title{Plot of images of GCMS data}
                      \usage{
                     -\S4method{plotImage}{peaksDataset}(object, run = 1, rtrange = c(11,
                     -  13), main = NULL, mzrange = c(50, 200), SCALE = log2, ...)
                     +\S4method{plotImage}{peaksDataset}(
                     +  object,
                     +  run = 1,
                     +  rtrange = c(11, 13),
                     +  main = NULL,
                     +  mzrange = c(50, 200),
                     +  SCALE = log2,
                     +  ...
                     +)
+                     }
                      \arguments{
                      \item{object}{a \code{peaksDataset} object}

man/progressiveAlignment-class.Rd

History View file @ eea95d1

@@ -7,8 +7,17 @@
                      \alias{show,progressiveAlignment-method}
                      \title{Data Structure for progressive alignment of many GCMS samples}
                      \usage{
                     -progressiveAlignment(pD, cA, D = 50, gap = 0.5, verbose = TRUE,
                     -  usePeaks = TRUE, df = 30, compress = FALSE, type = 2)
                     +progressiveAlignment(
                     +  pD,
                     +  cA,
                     +  D = 50,
                     +  gap = 0.5,
                     +  verbose = TRUE,
                     +  usePeaks = TRUE,
                     +  df = 30,
                     +  compress = FALSE,
                     +  type = 2
                     +)
+                     }
                      \arguments{
                      \item{pD}{a \code{peaksDataset} object}
@@ -47,18 +56,22 @@ be found in the reference below.
                      \examples{
                      require(gcspikelite)
                     -## paths and files
                     -gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
                     -cdfFiles <- dir(gcmsPath, "CDF", full=TRUE)
                     -eluFiles <- dir(gcmsPath, "ELU", full=TRUE)
+                    -
                     -## read data, peak detection results
                     -pd <- peaksDataset(cdfFiles[1:2], mz=seq(50,550), rtrange=c(7.5,8.5))
                     -pd <- addAMDISPeaks(pd, eluFiles[1:2])
+                    -
                     -ca <- clusterAlignment(pd, gap=.5, D=.05, df=30, metric=1, type=1,
                     -                       compress = FALSE)
                     -pa <- progressiveAlignment(pd, ca, gap=.6, D=.1, df=30, type=1, compress = FALSE)
                     +files <- list.files(path = paste(find.package("gcspikelite"), "data",
                     +                    sep = "/"),"CDF", full = TRUE)
                     +data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
                     +## create settings object
                     +mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
                     +cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
                     + prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
                     + extendLengthMSW = TRUE, mzCenterFun = "wMean")
                     +data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
                     + multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
                     + list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
                     +data
                     +ca <- clusterAlignment(data, gap = 0.5, D = 0.05, df = 30, metric = 1,
                     +  type = 1, compress = FALSE)
                     +pa <- progressiveAlignment(data, ca, gap = 0.6, D = 0.1, df = 30,
                     + type = 1, compress = FALSE)
+                     }
                      \references{

man/retFatMatrix.Rd

History View file @ eea95d1

@@ -31,19 +31,22 @@ different peaks.
                      \examples{
                      require(gcspikelite)
                     -# paths and files
                     -gcmsPath <- paste(find.package("gcspikelite"), "data", sep = "/")
                     -cdfFiles <- dir(gcmsPath,"CDF",full=TRUE)
                     -# read data, peak detection results
                     -pd <- peaksDataset(cdfFiles[1:2], mz=seq(50,550),
                     -                   rtrange=c(7.5,8.5))
                     -pd <- addXCMSPeaks(files=cdfFiles[1:2], object=pd,
                     -                   peakPicking=c('mF'), snthresh=3, fwhm=4,
                     -                   step=1, steps=2, mzdiff=0.5)
                     -ma <- multipleAlignment(pd = pd, group = c(1,1),
                     +files <- list.files(path = paste(find.package("gcspikelite"), "data",
                     +                    sep = "/"),"CDF", full = TRUE)
                     +data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
                     +## create settings object
                     +mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
                     +cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
                     + prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
                     + extendLengthMSW = TRUE, mzCenterFun = "wMean")
                     +data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100,
                     + multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
                     + list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1))
                     +data
                     +ma <- multipleAlignment(pd = data, group = c(1,1),
                                              filterMin = 1, metric = 2, type = 2)
                     -outList <- gatherInfo(pd, ma)
                     -mtxD <- retFatMatrix(object = pd, data = outList, minFilter = 1)
                     +outList <- gatherInfo(data, ma)
                     +mtxD <- retFatMatrix(object = data, data = outList, minFilter = 1)
+                     }
                      \seealso{

man/rmaFitUnit.Rd

History View file @ eea95d1

@@ -4,8 +4,15 @@
                      \alias{rmaFitUnit}
                      \title{Fits a robust linear model (RLM) for one metabolite}
                      \usage{
                     -rmaFitUnit(u, maxit = 5, mzEffect = TRUE, cls = NULL,
                     -  fitSample = TRUE, fitOrCoef = c("coef", "fit"), TRANSFORM = log2)
                     +rmaFitUnit(
                     +  u,
                     +  maxit = 5,
                     +  mzEffect = TRUE,
                     +  cls = NULL,
                     +  fitSample = TRUE,
                     +  fitOrCoef = c("coef", "fit"),
                     +  TRANSFORM = log2
                     +)
+                     }
                      \arguments{
                      \item{u}{a metabolite unit (list object with vectors \code{mz} and \code{rt}

man/show-multipleAlignment-method.Rd

History View file @ eea95d1

@@ -1,6 +1,5 @@
                      % Generated by roxygen2: do not edit by hand
                      % Please edit documentation in R/multipleAlignment.R
                     -\docType{methods}
                      \name{show,multipleAlignment-method}
                      \alias{show,multipleAlignment-method}
                      \title{Store the raw data and optionally, information regarding signal peaks for

vignettes/auto/flagme.el

History View file @ eea95d1

                     deleted file mode 100644
@@ -1,16 +0,0 @@
                     -(TeX-add-style-hook
                     - "flagme"
                     - (lambda ()
                     -   (TeX-add-to-alist 'LaTeX-provided-package-options
                     -                     '(("caption" "tableposition=top") ("inputenc" "utf8")))
                     -   (TeX-run-style-hooks
                     -    "latex2e"
                     -    "article"
                     -    "art10"
                     -    "amsmath"
                     -    "amscd"
                     -    "caption"
                     -    "ifthen"
                     -    "inputenc"))
                     - :latex)
+                    -

vignettes/flagme-knitr.Rnw

History View file @ eea95d1

                     new file mode 100644
@@ -0,0 +1,416 @@
                     +\documentclass{article}
+                    +
                     +\usepackage{amsmath}
                     +\usepackage{amscd}
                     +\usepackage[tableposition=top]{caption}
                     +\usepackage{ifthen}
                     +\usepackage[utf8]{inputenc}
                     +\topmargin 0in
                     +\headheight 0in
                     +\headsep 0in
                     +\oddsidemargin 0in
                     +\evensidemargin 0in
                     +\textwidth 176mm
                     +\textheight 215mm
+                    +
+                    +
                     +\begin{document}
+                    +
                     +%\VignetteIndexEntry{Using flagme -- Fragment-level analysis of GC-MS-based metabolomics data}
+                    +
                     +\title{\texttt{flagme}: Fragment-level analysis of \\ GC-MS-based
                     +  metabolomics data}
                     +\author{Mark Robinson \\ \texttt{[email protected]} \\ Riccardo
                     +  Romoli \\ \texttt{[email protected]}}
                     +\maketitle
+                    +
+                    +
                     +\section{Introduction}
                     +\noindent This document gives a brief introduction to the
                     +\texttt{flagme} package, which is designed to process, visualise and
                     +statistically analyze sets of GC-MS samples. The ideas discussed here
                     +were originally designed with GC-MS-based metabolomics in mind, but
                     +indeed some of the methods and visualizations could be useful for
                     +LC-MS data sets. The {\em fragment-level analysis} though, takes
                     +advantage of the rich fragmentation patterns observed from electron
                     +interaction (EI) ionization.
+                    +
                     +There are many aspects of data processing for GC-MS data. Generally,
                     +algorithms are run separately on each sample to detect features, or
                     +{\em peaks} (e.g. AMDIS). Due to retention time shifts from
                     +run-to-run, an alignment algorithm is employed to allow the matching
                     +of the same feature across multiple samples.  Alternatively, if known
                     +standards are introduced to the samples, retention {\em indices} can
                     +be computed for each peak and used for alignment. After peaks are
                     +matched across all samples, further processing steps are employed to
                     +create a matrix of abundances, leading into detecting differences in
                     +abundance.
+                    +
                     +Many of these data processing steps are prone to errors and they often
                     +tend to be black boxes. But, with effective exploratory data
                     +analysis, many of the pitfalls can be avoided and any problems can be
                     +fixed before proceeding to the downstream statistical analysis. The
                     +package provides various visualizations to ensure the methods applied
                     +are not black boxes.
+                    +
                     +The \texttt{flagme} package gives a complete suite of methods to go
                     +through all common stages of data processing. In addition, R is
                     +especially well suited to the downstream data analysis tasks since it
                     +is very rich in analysis tools and has excellent visualization
                     +capabilities. In addition, it is freely available
                     +(\texttt{www.r-project.org}), extensible and there is a growing
                     +community of users and developers. For routine analyses, graphical
                     +user interfaces could be designed.
+                    +
+                    +
                     +\section{Reading and visualizing GC-MS data}
                     +To run these examples, you must have the \texttt{gcspikelite} package
                     +installed.  This data package contains several GC-MS samples from a
                     +spike-in experiment we designed to interrogate data processing
                     +methods.  So, first, we load the packages:
+                    +
                     +<<libraries, echo=FALSE>>=
                     +require(gcspikelite)
                     +library(flagme)
                     +@
+                    +
+                    +
                     +To load the data and corresponding peak detection results, we simply
                     +create vectors of the file-names and create a \texttt{peakDataset}
                     +object. Note that we can speed up the import time by setting the
                     +retention time range to a subset of the elution, as below:
+                    +
                     +<<rawdata>>=
                     +gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/")
                     +data(targets)
                     +cdfFiles <- paste(gcmsPath, targets$FileName, sep="/")
                     +eluFiles <- gsub("CDF", "ELU", cdfFiles)
                     +pd <- peaksDataset(cdfFiles, mz=seq(50,550), rtrange=c(7.5,8.5))
                     +pd <- addAMDISPeaks(pd, eluFiles)
                     +pd
                     +@
+                    +
                     +Here, we have added peaks from AMDIS, a well known and mature
                     +algorithm for deconvolution of GC-MS data. For GC-TOF-MS data, we have
                     +implemented a parser for the \texttt{ChromaTOF} output (see the
                     +analogous \texttt{addChromaTOFPeaks} function). The
                     +\texttt{addXCMSPeaks} allows to use all the XCMS peak-picking
                     +algorithms; using this approach it is also possible to elaborate the
                     +raw data file from within R instead of using an external software.
                     +%% Support for XMCS or MzMine may be added in the future. Ask the author
                     +%% if another detection result format is desired as the parsers are
                     +%% generally easy to design.
                     +In particular the function reads the raw data using XCMS, group each extracted ion
                     +according to their retention time using CAMERA and attaches them to an
                     +already created \texttt{peaksDataset} object:
+                    +
                     +<<addXCMS>>=
                     +pd.2 <- peaksDataset(cdfFiles[1:3], mz = seq(50, 550), rtrange = c(7.5, 8.5))
                     +cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
                     +  prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
                     +  extendLengthMSW = TRUE, mzCenterFun = "wMean")
                     +mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
                     +pd.2 <- addXCMSPeaks(cdfFiles[1:3], pd.2, settings = mfp, minintens = 100,
                     +  multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
                     +  list(fwhm = c(5, 10, 20), rt_abs = 2, mz_abs = 0.1)
                     +  )
                     +pd.2
                     +@
+                    +
                     +The possibility to work using computer cluster will be added in the future.
+                    +
                     +Regardless of platform and peak detection algorithm, a useful
                     +visualization of a set of samples is the set of total ion currents
                     +(TIC), or extracted ion currents (XICs). To view TICs, you can call:
+                    +
                     +<<plotexample1, fig.width=9, fig.height=7>>=
                     +plotChrom(pd, rtrange=c(7.5,8.5), plotPeaks=TRUE, plotPeakLabels=TRUE,
                     +     max.near=8, how.near=0.5, col=rep(c("blue","red","black"), each=3))
                     +@
+                    +
                     +Note here the little {\em hashes} represent the detected peaks and are
                     +labelled with an integer index. One of the main challenges is to match
                     +these peak detections across several samples, given that the appear at
                     +slightly different times in different runs.
+                    +
                     +For XICs, you need to give the indices (of \texttt{pd@mz}, the grid of
                     +mass-to-charge values) that you want to plot through the
                     +\texttt{mzind} argument.  This could be a single ion
                     +(e.g. \texttt{mzind=24}) or could be a range of indices if multiple
                     +ions are of interest (e.g. \texttt{mzind=c(24,25,98,99)}).
+                    +
                     +There are several other features within the \texttt{plot} command on
                     +\texttt{peaksDataset} objects that can be useful. See \texttt{?plot}
                     +(and select the flagme version) for full details.
+                    +
                     +Another useful visualization, at least for individual samples, is a 2D
                     +heatmap of intensity. Such plots can be enlightening, especially when
                     +peak detection results are overlaid. For example (with detected
                     +fragment peaks from AMDIS shown in white):
+                    +
                     +<<plotexample2,fig.width=9,fig.height=7>>=
                     +r <- 1
                     +plotImage(pd, run=r, rtrange=c(7.5,8.5), main="")
                     +v <- which(pd@peaksdata[[r]] > 0, arr.ind=TRUE) # find detected peaks
                     +abline(v=pd@peaksrt[[r]])
                     +points(pd@peaksrt[[r]][v[,2]], pd@mz[v[,1]], pch=19, cex=.6, col="white")
                     +@
+                    +
+                    +
                     +\section{Pairwise alignment with dynamic programming algorithm}
                     +One of the first challenges of GC-MS data is the matching of detected
                     +peaks (i.e. metabolites) across several samples. Although gas
                     +chromatography is quite robust, there can be some drift in the elution
                     +time of the same analyte from run to run. We have devised a strategy,
                     +based on dynamic programming, that takes into account both the
                     +similarity in spectrum (at the apex of the called peak) and the
                     +similarity in retention time, without requiring the identity of each
                     +peak; this matching uses the data alone. If each sample gives a `peak
                     +list' of the detected peaks (such as that from AMDIS that we have
                     +attached to our \texttt{peaksDataset} object), the challenge is to
                     +introduce gaps into these lists such that they are best aligned. From
                     +this a matrix of retention times or a matrix of peak abundances can be
                     +extracted for further statistical analysis, visualization and
                     +interpretation. For this matching, we created a procedure analogous to
                     +a multiple {\em sequence} alignment.
+                    +
                     +To highlight the dynamic programming-based alignment strategy, we
                     +first illustrate a pairwise alignment of two peak lists. This example
                     +also illustrates the selection of parameters necessary for the
                     +alignment. From the data read in previously, let us consider the
                     +alignment of two samples, denoted \texttt{0709\_468} and
                     +\texttt{0709\_474}. First, a similarity matrix for two samples is
                     +calculated. This is calculated based on a scoring function and takes
                     +into account the similarity in retention time and in the similarity of
                     +the apex spectra, according to:
                     +\[
                     +S_{ij}(D) = \frac{\sum_{k=1}^K x_{ik} y_{jk}}{\sqrt{ \sum_{k=1}^K
                     +    x_{ik}^2 \cdot \sum_{k=1}^K y_{jk}^2 } } \cdot \exp \left( -
                     +  \frac{1}{2} \frac{(t_i-t_j)^2}{D^2} \right)
                     +\]
                     +\noindent where $i$ is the index of the peak in the first sample and
                     +$j$ is the index of the peak in the second sample, $\mathbf{x}_i$ and
                     +$\mathbf{y}_j$ are the spectra vectors and $t_i$ and $t_j$ are their
                     +respective retention times. As you can see, there are two components
                     +to the similarity: spectra similarity (left term) and similarity in
                     +retention time (right term). Of course, other metrics for spectra
                     +similarity are feasible. Ask the author if you want to see other
                     +metrices implemented. We have some non-optimized code for a few
                     +alternative metrics.
+                    +
                     +The peak alignment algorithm, much like sequence alignments, requires
                     +a \texttt{gap} parameter to be set, here a number between 0 and 1.  A
                     +high gap penalty discourages gaps when matching the two lists of peaks
                     +and a low gap penalty allows gaps at a very low {\em cost}.  We find
                     +that a gap penalty in the middle range (0.4-0.6) works well for GC-MS
                     +peak matching.  Another parameter, \texttt{D}, modulates the impact of
                     +the difference in retention time penalty. A large value for
                     +\texttt{D} essentially eliminates the effect. Generally, we set this
                     +parameter to be a bit larger than the average width of a peak,
                     +allowing a little flexibility for retention time shifts between
                     +samples. Keep in mind the \texttt{D} parameter should be set on the
                     +scale (i.e. seconds or minutes) of the \texttt{peaksrt} slot of the
                     +\texttt{peaksDataset} object. The next example shows the effect of
                     +the \texttt{gap} and \texttt{D} penalty on the matching of a small
                     +ranges of peaks.
+                    +
                     +<<pairwisealignexample, fig.width=7, fig.height=7>>=
                     +Ds <- c(0.1, 10, 0.1, 0.1)
                     +gaps <- c(0.5, 0.5, 0.1, 0.9)
                     +par(mfrow=c(2,2), mai=c(0.8466,0.4806,0.4806,0.1486))
                     +for(i in 1:4){
                     +  pa <- peaksAlignment(pd@peaksdata[[1]], pd@peaksdata[[2]],
                     +                       pd@peaksrt[[1]], pd@peaksrt[[2]], D=Ds[i],
                     +                       gap=gaps[i], metric=1, type=1, compress = FALSE)
                     +  plotAlignment(pa, xlim=c(0, 17), ylim=c(0, 16), matchCol="yellow",
                     +       main=paste("D=", Ds[i], " gap=", gaps[i], sep=""))
                     +}
                     +@
+                    +
                     +You might ask: is the flagme package useful without peak detection
                     +results? Possibly. There have been some developments in alignment
                     +(generally on LC-MS proteomics experiments) without peak/feature
                     +detection, such as Prince et al. 2006, where a very similar dynamic
                     +programming is used for a pairwise alignment. We have experimented
                     +with alignments without using the peaks, but do not have any
                     +convincing results. It does introduce a new set of challenges in terms
                     +of highlighting differentially abundant metabolites. However, in the
                     +\texttt{peaksAlignment} routine above (and those mentioned below), you
                     +can set \texttt{usePeaks=FALSE} in order to do {\em scan}-based
                     +alignments instead of {\em peak}-based alignments. In addition, the
                     +\texttt{flagme} package may be useful simply for its bare-bones
                     +dynamic programming algorithm.
+                    +
+                    +
                     +\subsection{Normalizing retention time score to drift estimates}
                     +In what is mentioned above for pairwise alignments, we are penalizing
                     +for differences in retention times that are non-zero. But, as you can
                     +see from the TICs, some differences in retention time are
                     +consistent. For example, all of the peaks from sample
                     +\texttt{0709\_485} elute at later times than peaks from sample
                     +\texttt{0709\_496}. We should be able to estimate this drift and
                     +normalize the time penalty to that estimate, instead of penalizing to
                     +zero. That is, we should replace $t_i-t_j$ with $t_i-t_j-\hat{d}_{ij}$
                     +where $\hat{d}_{ij}$ is the expected drift between peak $i$ of the
                     +first sample and peak $j$ of the second sample.
+                    +
                     +More details coming soon.
+                    +
+                    +
                     +\subsection{Imputing location of undetected peaks}
                     +One goal of the alignment leading into downstream data analyses is the
                     +generation of a table of abundances for each metabolite across all
                     +samples. As you can see from the TICs above, there are some low
                     +intensity peaks that fall below detection in some but not all
                     +samples. Our view is that instead of inserting arbitrary low constants
                     +(such as 0 or half the detection limit) or imputing the intensities
                     +post-hoc or having missing data in the data matrices, it is best to
                     +return to the area of the where the peak should be and give some kind
                     +of abundance. The alignments themselves are rich in information with
                     +respect to the location of undetected peaks. We feel this is a more
                     +conservative and statistically valid approach than introducing
                     +arbitrary values.
+                    +
                     +More details coming soon.
+                    +
+                    +
                     +\section{Multiple alignment of several experimental groups}
                     +Next, we discuss the multiple alignment of peaks across many
                     +samples. With replicates, we typically do an alignment within
                     +replicates, then combine these together into a summarized form. This
                     +cuts down on the computational cost. For example, consider 2 sets of
                     +samples, each with 5 replicates. Aligning first within replicates
                     +requires 10+10+1 total alignments whereas an all-pairwise alignment
                     +requires 45 pairwise alignments. In addition, this allows some
                     +flexibility in setting different gap and distance penalty parameters
                     +for the {\em within} alignment and {\em between} alignment. An
                     +example follows.
+                    +
                     +<<multiplealignment>>=
                     +print(targets)
                     +ma <- multipleAlignment(pd, group=targets$Group, wn.gap=0.5, wn.D=.05,
                     +                        bw.gap=.6, bw.D=0.05, usePeaks=TRUE, filterMin=1,
                     +                        df=50, verbose=FALSE, metric = 1, type = 1) # bug
                     +ma
                     +@
+                    +
                     +If you set \texttt{verbose=TRUE}, many nitty-gritty details of the
                     +alignment procedure are given.  Next, we can take the alignment
                     +results and overlay it onto the TICs, allowing a visual inspection.
+                    +
                     +<<multiplealignmentfig,fig.width=9,fig.height=7>>=
                     +plotChrom(pd, rtrange=c(7.5,8.5), runs=ma@betweenAlignment@runs,
                     +     mind=ma@betweenAlignment@ind, plotPeaks=TRUE,
                     +     plotPeakLabels=TRUE, max.near=8, how.near=.5,
                     +     col=rep(c("blue","red","black"), each=3))
                     +@
+                    +
+                    +
                     +% \section{Correlation Alignment algorithm}
                     +% Another approach, represented by the \texttt{correlationAlignment}
                     +% function, is to use a modified form of the Pearson correlation
                     +% algorithm. After the correlation between two samples is calculated, a
                     +% penalization coefficient, based on the retention time differences, is
                     +% applied to the result. It is also possible to set a retention time
                     +% range in which the penalization is 0, this because in gas
                     +% chromatography we can have a little deviation in the retention time of
                     +% the metabolite so, based on the experimental data, we can choose the
                     +% retention time window for the penalization coefficient being applied.
+                    +
                     +<<correlationAlignment, eval=FALSE>>=
                     +mp <- correlationAlignment(object=pd.2, thr=0.85, D=20, penality=0.2,
                     +                           normalize=TRUE, minFilter=1)
                     +mp
                     +@
+                    +
                     +% \noindent where \texttt{thr} represent correlation threshold from 0
                     +% (min) to 1 (max); \texttt{D} represent the retention time window in
                     +% seconds; \texttt{penality} represent the penality inflicted to a match
                     +% between two peaks when the retention time difference exceed the
                     +% parameter \texttt{D}; \texttt{normalize} is about the peak
                     +% normalization-to-100 before the correlation is calculated;
                     +% \texttt{minFilter} give the opportunity to exclude from the resulting
                     +% correlation matrix each feature that in represented in our samples
                     +% less time than this value. The value of minFilter must be smaller than
                     +% the number of samples.
+                    +
                     +% The correlation-based peak alignment for multiple GC-MS
                     +% peak lists uses a center-star technique to the alignment of the
                     +% peaks. The combination of the \texttt{D} and \texttt{penality} parameters
                     +% allow the users to force the algorithm to match the peaks close to the
                     +% reference. The \texttt{thr} parameter control the matching factor.
+                    +
+                    +
                     +\subsection{Gathering results}
                     +The alignment results can be extracted from the \texttt{multipleAlignment}
                     +object as:
                     +<<multiplealignmentres>>=
                     +ma@betweenAlignment@runs
                     +ma@betweenAlignment@ind
                     +@
+                    +
                     +\noindent This table would suggest that matched peak \texttt{8} (see
                     +numbers below the TICs in the figure above) corresponds to detected
                     +peaks \texttt{9, 12, 11} in runs \texttt{4, 5, 6} and so on, same as
                     +shown in the above plot.
+                    +
                     +In addition, you can gather a list of all the merged peaks with the
                     +\texttt{gatherInfo} function, giving elements for the retention times,
                     +the detected fragment ions and their intensities.  The example below
                     +also shows the how to construct a table of retention times of the
                     +matched peaks (No attempt is made here to adjust retention times onto
                     +a common scale.  Instead, the peaks are matched to each other on their
                     +original scale).  For example:
+                    +
                     +<<alignmentres>>=
                     +outList <- gatherInfo(pd,ma)
                     +outList[[8]]
                     +rtmat <- matrix(unlist(lapply(outList,.subset,"rt"), use.names=FALSE),
                     +                nr=length(outList), byrow=TRUE)
                     +colnames(rtmat) <- names(outList[[1]]$rt); rownames(rtmat) <- 1:nrow(rtmat)
                     +round(rtmat, 3)
                     +@
+                    +
+                    +
                     +\section{Future improvements and extension}
                     +There are many procedures that we have implemented in our
                     +investigation of GC-MS data, but have not made part of the package just
                     +yet. Some of the most useful procedures will be released, such as:
+                    +
                     +\begin{enumerate}
                     +\item Parsers for other peak detection algorithms (e.g. % XCMS,
                     +  MzMine) and parsers for other alignment procedures
                     +  (e.g. SpectConnect) and perhaps retention indices procedures.
                     +\item More convenient access to the alignment information and
                     +  abundance table.
                     +\item Statistical analysis of differential metabolite abundance.
                     +\item Fragment-level analysis, an alternative method to summarize
                     +  abundance across all detected fragments of a metabolite peak.
                     +\end{enumerate}
+                    +
                     +\section{References}
                     +See the following for further details:
+                    +
                     +\begin{enumerate}
                     +\item Robinson MD. {\em Methods for the analysis of gas chromatography
                     +    - mass spectrometry data.} {\bf Ph.D. Thesis}. October 2008.
                     +  Department of Medical Biology (Walter and Eliza Hall Institute of
                     +  Medical Research), University of Melbourne.
                     +\item Robinson MD, De Souza DP, Keen WW, Saunders EC, McConville MJ,
                     +  Speed TP, Liki\'{c} VA. (2007) {\em A dynamic programming approach
                     +    for the alignment of signal peaks in multiple gas
                     +    chromatography-mass spectrometry experiments.} {\bf BMC
                     +    Bioinformatics}. 8:419.
                     +\item Prince JT, Marcotte EM (2006) {\em Chromatographic alignment of
                     +    ESI-LC-MS proteomics data sets by ordered bijective interpolated
                     +    warping}. {\bf Anal Chem}. 78(17):6140-52.
                     +\end{enumerate}
+                    +
                     +\section{This vignette was built with/at ...}
+                    +
                     +<<session>>=
                     +sessionInfo()
                     +date()
                     +@
+                    +
                     +\end{document}

vignettes/flagme.Rnw

History View file @ eea95d1

@@ -16,7 +16,7 @@
                      \begin{document}
                     -%\VignetteIndexEntry{Using flagme -- Fragment-level analysis of GC-MS-based metabolomics data}
                     +%\VignetteIndexEntry{\texttt{flagme}: Fragment-level analysis of \\ GC-MS-based metabolomics data}
                      \title{\texttt{flagme}: Fragment-level analysis of \\ GC-MS-based
                        metabolomics data}
@@ -105,9 +105,15 @@ according to their retention time using CAMERA and attaches them to an
                      already created \texttt{peaksDataset} object:
                      <<addXCMS>>=
                     -pd.2 <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,8.5))
                     -pd.2 <- addXCMSPeaks(cdfFiles[1:3], pd.2, peakPicking=c('mF'),
                     -                     snthresh=3, fwhm=4, step=1, steps=2, mzdiff=0.5)
                     +pd.2 <- peaksDataset(cdfFiles[1:3], mz = seq(50, 550), rtrange = c(7.5, 8.5))
                     +cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40),
                     +  prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0,
                     +  extendLengthMSW = TRUE, mzCenterFun = "wMean")
                     +mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
                     +pd.2 <- addXCMSPeaks(cdfFiles[1:3], pd.2, settings = mfp, minintens = 100,
                     +  multipleMatchedFilter = FALSE, multipleMatchedFilterParam =
                     +  list(fwhm = c(5, 10, 20), rt_abs = 2, mz_abs = 0.1)
                     +  )
                      pd.2
+                     @
@@ -117,7 +123,7 @@ Regardless of platform and peak detection algorithm, a useful
                      visualization of a set of samples is the set of total ion currents
                      (TIC), or extracted ion currents (XICs). To view TICs, you can call:
                     -<<plotexample1, fig=TRUE, width=9, height=7>>=
                     +<<plotexample1, fig.width=9, fig.height=7>>=
                      plotChrom(pd, rtrange=c(7.5,8.5), plotPeaks=TRUE, plotPeakLabels=TRUE,
                           max.near=8, how.near=0.5, col=rep(c("blue","red","black"), each=3))
+                     @
@@ -142,7 +148,7 @@ heatmap of intensity. Such plots can be enlightening, especially when
                      peak detection results are overlaid. For example (with detected
                      fragment peaks from AMDIS shown in white):
                     -<<plotexample2,fig=TRUE,width=9,height=7>>=
                     +<<plotexample2,fig.width=9,fig.height=7>>=
                      r <- 1
                      plotImage(pd, run=r, rtrange=c(7.5,8.5), main="")
                      v <- which(pd@peaksdata[[r]] > 0, arr.ind=TRUE) # find detected peaks
@@ -208,7 +214,7 @@ scale (i.e. seconds or minutes) of the \texttt{peaksrt} slot of the
                      the \texttt{gap} and \texttt{D} penalty on the matching of a small
                      ranges of peaks.
                     -<<pairwisealignexample, fig=TRUE, width=7, height=7>>=
                     +<<pairwisealignexample, fig.width=7, fig.height=7>>=
                      Ds <- c(0.1, 10, 0.1, 0.1)
                      gaps <- c(0.5, 0.5, 0.1, 0.9)
                      par(mfrow=c(2,2), mai=c(0.8466,0.4806,0.4806,0.1486))
@@ -292,7 +298,7 @@ If you set \texttt{verbose=TRUE}, many nitty-gritty details of the
                      alignment procedure are given.  Next, we can take the alignment
                      results and overlay it onto the TICs, allowing a visual inspection.
                     -<<multiplealignmentfig,fig=TRUE,width=9,height=7>>=
                     +<<multiplealignmentfig,fig.width=9,fig.height=7>>=
                      plotChrom(pd, rtrange=c(7.5,8.5), runs=ma@betweenAlignment@runs,
                           mind=ma@betweenAlignment@ind, plotPeaks=TRUE,
                           plotPeakLabels=TRUE, max.near=8, how.near=.5,

vignettes/flagme.pdf

History View file @ eea95d1

299

305

Binary files a/vignettes/flagme.pdf and b/vignettes/flagme.pdf differ

vignettes/flagme.tex

History View file @ eea95d1

                     new file mode 100644
@@ -0,0 +1,965 @@
                     +\documentclass{article}\usepackage[]{graphicx}\usepackage[]{color}
                     +% maxwidth is the original width if it is less than linewidth
                     +% otherwise use linewidth (to make sure the graphics do not exceed the margin)
                     +\makeatletter
                     +\def\maxwidth{ %
                     +  \ifdim\Gin@nat@width>\linewidth
                     +    \linewidth
                     +  \else
                     +    \Gin@nat@width
                     +  \fi
                     +}
                     +\makeatother
+                    +
                     +\definecolor{fgcolor}{rgb}{0.345, 0.345, 0.345}
                     +\newcommand{\hlnum}[1]{\textcolor[rgb]{0.686,0.059,0.569}{#1}}%
                     +\newcommand{\hlstr}[1]{\textcolor[rgb]{0.192,0.494,0.8}{#1}}%
                     +\newcommand{\hlcom}[1]{\textcolor[rgb]{0.678,0.584,0.686}{\textit{#1}}}%
                     +\newcommand{\hlopt}[1]{\textcolor[rgb]{0,0,0}{#1}}%
                     +\newcommand{\hlstd}[1]{\textcolor[rgb]{0.345,0.345,0.345}{#1}}%
                     +\newcommand{\hlkwa}[1]{\textcolor[rgb]{0.161,0.373,0.58}{\textbf{#1}}}%
                     +\newcommand{\hlkwb}[1]{\textcolor[rgb]{0.69,0.353,0.396}{#1}}%
                     +\newcommand{\hlkwc}[1]{\textcolor[rgb]{0.333,0.667,0.333}{#1}}%
                     +\newcommand{\hlkwd}[1]{\textcolor[rgb]{0.737,0.353,0.396}{\textbf{#1}}}%
                     +\let\hlipl\hlkwb
+                    +
                     +\usepackage{framed}
                     +\makeatletter
                     +\newenvironment{kframe}{%
                     + \def\at@end@of@kframe{}%
                     + \ifinner\ifhmode%
                     +  \def\at@end@of@kframe{\end{minipage}}%
                     +  \begin{minipage}{\columnwidth}%
                     + \fi\fi%
                     + \def\FrameCommand##1{\hskip\@totalleftmargin \hskip-\fboxsep
                     + \colorbox{shadecolor}{##1}\hskip-\fboxsep
                     +     % There is no \\@totalrightmargin, so:
                     +     \hskip-\linewidth \hskip-\@totalleftmargin \hskip\columnwidth}%
                     + \MakeFramed {\advance\hsize-\width
                     +   \@totalleftmargin\z@ \linewidth\hsize
                     +   \@setminipage}}%
                     + {\par\unskip\endMakeFramed%
                     + \at@end@of@kframe}
                     +\makeatother
+                    +
                     +\definecolor{shadecolor}{rgb}{.97, .97, .97}
                     +\definecolor{messagecolor}{rgb}{0, 0, 0}
                     +\definecolor{warningcolor}{rgb}{1, 0, 1}
                     +\definecolor{errorcolor}{rgb}{1, 0, 0}
                     +\newenvironment{knitrout}{}{} % an empty environment to be redefined in TeX
+                    +
                     +\usepackage{alltt}
+                    +
                     +\usepackage{amsmath}
                     +\usepackage{amscd}
                     +\usepackage[tableposition=top]{caption}
                     +\usepackage{ifthen}
                     +\usepackage[utf8]{inputenc}
                     +\topmargin 0in
                     +\headheight 0in
                     +\headsep 0in
                     +\oddsidemargin 0in
                     +\evensidemargin 0in
                     +\textwidth 176mm
                     +\textheight 215mm
                     +\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
                     +\begin{document}
+                    +
                     +%\VignetteIndexEntry{\texttt{flagme}: Fragment-level analysis of \\ GC-MS-based metabolomics data}
+                    +
                     +\title{\texttt{flagme}: Fragment-level analysis of \\ GC-MS-based
                     +  metabolomics data}
                     +\author{Mark Robinson \\ \texttt{[email protected]} \\ Riccardo
                     +  Romoli \\ \texttt{[email protected]}}
                     +\maketitle
+                    +
+                    +
                     +\section{Introduction}
                     +\noindent This document gives a brief introduction to the
                     +\texttt{flagme} package, which is designed to process, visualise and
                     +statistically analyze sets of GC-MS samples. The ideas discussed here
                     +were originally designed with GC-MS-based metabolomics in mind, but
                     +indeed some of the methods and visualizations could be useful for
                     +LC-MS data sets. The {\em fragment-level analysis} though, takes
                     +advantage of the rich fragmentation patterns observed from electron
                     +interaction (EI) ionization.
+                    +
                     +There are many aspects of data processing for GC-MS data. Generally,
                     +algorithms are run separately on each sample to detect features, or
                     +{\em peaks} (e.g. AMDIS). Due to retention time shifts from
                     +run-to-run, an alignment algorithm is employed to allow the matching
                     +of the same feature across multiple samples.  Alternatively, if known
                     +standards are introduced to the samples, retention {\em indices} can
                     +be computed for each peak and used for alignment. After peaks are
                     +matched across all samples, further processing steps are employed to
                     +create a matrix of abundances, leading into detecting differences in
                     +abundance.
+                    +
                     +Many of these data processing steps are prone to errors and they often
                     +tend to be black boxes. But, with effective exploratory data
                     +analysis, many of the pitfalls can be avoided and any problems can be
                     +fixed before proceeding to the downstream statistical analysis. The
                     +package provides various visualizations to ensure the methods applied
                     +are not black boxes.
+                    +
                     +The \texttt{flagme} package gives a complete suite of methods to go
                     +through all common stages of data processing. In addition, R is
                     +especially well suited to the downstream data analysis tasks since it
                     +is very rich in analysis tools and has excellent visualization
                     +capabilities. In addition, it is freely available
                     +(\texttt{www.r-project.org}), extensible and there is a growing
                     +community of users and developers. For routine analyses, graphical
                     +user interfaces could be designed.
+                    +
+                    +
                     +\section{Reading and visualizing GC-MS data}
                     +To run these examples, you must have the \texttt{gcspikelite} package
                     +installed.  This data package contains several GC-MS samples from a
                     +spike-in experiment we designed to interrogate data processing
                     +methods.  So, first, we load the packages:
+                    +
                     +\begin{knitrout}
                     +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
+                    +
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: gcspikelite}}
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: xcms}}
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: BiocParallel}}
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: MSnbase}}
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: BiocGenerics}}
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: parallel}}
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# \\\#\# Attaching package: 'BiocGenerics'}}
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# The following objects are masked from 'package:parallel':\\\#\# \\\#\# \ \ \ \ clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,\\\#\# \ \ \ \ clusterExport, clusterMap, parApply, parCapply, parLapply,\\\#\# \ \ \ \ parLapplyLB, parRapply, parSapply, parSapplyLB}}
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# The following objects are masked from 'package:stats':\\\#\# \\\#\# \ \ \ \ IQR, mad, sd, var, xtabs}}
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# The following objects are masked from 'package:base':\\\#\# \\\#\# \ \ \ \ anyDuplicated, append, as.data.frame, basename, cbind, colnames,\\\#\# \ \ \ \ dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,\\\#\# \ \ \ \ grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,\\\#\# \ \ \ \ order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,\\\#\# \ \ \ \ rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,\\\#\# \ \ \ \ union, unique, unsplit, which.max, which.min}}
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: Biobase}}
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Welcome to Bioconductor\\\#\# \\\#\# \ \ \ \ Vignettes contain introductory material; view with\\\#\# \ \ \ \ 'browseVignettes()'. To cite Bioconductor, see\\\#\# \ \ \ \ 'citation("{}Biobase"{})', and for packages 'citation("{}pkgname"{})'.}}
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: mzR}}
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: Rcpp}}
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: S4Vectors}}
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: stats4}}
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# \\\#\# Attaching package: 'S4Vectors'}}
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# The following objects are masked from 'package:base':\\\#\# \\\#\# \ \ \ \ expand.grid, I, unname}}
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: ProtGenerics}}
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# \\\#\# Attaching package: 'ProtGenerics'}}
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# The following object is masked from 'package:stats':\\\#\# \\\#\# \ \ \ \ smooth}}
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# \\\#\# This is MSnbase version 2.18.0 \\\#\# \ \ Visit https://lgatto.github.io/MSnbase/ to get started.}}
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# \\\#\# Attaching package: 'MSnbase'}}
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# The following object is masked from 'package:base':\\\#\# \\\#\# \ \ \ \ trimws}}
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# \\\#\# This is xcms version 3.14.1}}
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# \\\#\# Attaching package: 'xcms'}}
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# The following object is masked from 'package:stats':\\\#\# \\\#\# \ \ \ \ sigma}}
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: CAMERA}}\end{kframe}
                     +\end{knitrout}
+                    +
+                    +
                     +To load the data and corresponding peak detection results, we simply
                     +create vectors of the file-names and create a \texttt{peakDataset}
                     +object. Note that we can speed up the import time by setting the
                     +retention time range to a subset of the elution, as below:
+                    +
                     +\begin{knitrout}
                     +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
                     +\begin{alltt}
                     +\hlstd{gcmsPath} \hlkwb{<-} \hlkwd{paste}\hlstd{(}\hlkwd{find.package}\hlstd{(}\hlstr{"gcspikelite"}\hlstd{),} \hlstr{"data"}\hlstd{,} \hlkwc{sep}\hlstd{=}\hlstr{"/"}\hlstd{)}
                     +\hlkwd{data}\hlstd{(targets)}
                     +\hlstd{cdfFiles} \hlkwb{<-} \hlkwd{paste}\hlstd{(gcmsPath, targets}\hlopt{$}\hlstd{FileName,} \hlkwc{sep}\hlstd{=}\hlstr{"/"}\hlstd{)}
                     +\hlstd{eluFiles} \hlkwb{<-} \hlkwd{gsub}\hlstd{(}\hlstr{"CDF"}\hlstd{,} \hlstr{"ELU"}\hlstd{, cdfFiles)}
                     +\hlstd{pd} \hlkwb{<-} \hlkwd{peaksDataset}\hlstd{(cdfFiles,} \hlkwc{mz}\hlstd{=}\hlkwd{seq}\hlstd{(}\hlnum{50}\hlstd{,}\hlnum{550}\hlstd{),} \hlkwc{rtrange}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{7.5}\hlstd{,}\hlnum{8.5}\hlstd{))}
                     +\end{alltt}
                     +\begin{verbatim}
                     +##  Reading  /usr/local/lib/R/site-library/gcspikelite/data/0709_468.CDF
                     +\end{verbatim}
+                    +
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{verbatim}
                     +##  Reading  /usr/local/lib/R/site-library/gcspikelite/data/0709_474.CDF
                     +\end{verbatim}
+                    +
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{verbatim}
                     +##  Reading  /usr/local/lib/R/site-library/gcspikelite/data/0709_475.CDF
                     +\end{verbatim}
+                    +
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{verbatim}
                     +##  Reading  /usr/local/lib/R/site-library/gcspikelite/data/0709_485.CDF
                     +\end{verbatim}
+                    +
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{verbatim}
                     +##  Reading  /usr/local/lib/R/site-library/gcspikelite/data/0709_493.CDF
                     +\end{verbatim}
+                    +
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{verbatim}
                     +##  Reading  /usr/local/lib/R/site-library/gcspikelite/data/0709_496.CDF
                     +\end{verbatim}
+                    +
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{verbatim}
                     +##  Reading  /usr/local/lib/R/site-library/gcspikelite/data/0709_470.CDF
                     +\end{verbatim}
+                    +
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{verbatim}
                     +##  Reading  /usr/local/lib/R/site-library/gcspikelite/data/0709_471.CDF
                     +\end{verbatim}
+                    +
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{verbatim}
                     +##  Reading  /usr/local/lib/R/site-library/gcspikelite/data/0709_479.CDF
                     +\end{verbatim}
+                    +
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{alltt}
                     +\hlstd{pd} \hlkwb{<-} \hlkwd{addAMDISPeaks}\hlstd{(pd, eluFiles)}
                     +\end{alltt}
                     +\begin{verbatim}
                     +## Reading retention time range: 7.500133 8.499917
                     +## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_468.ELU ... Done.
                     +## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_474.ELU ... Done.
                     +## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_475.ELU ... Done.
                     +## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_485.ELU ... Done.
                     +## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_493.ELU ... Done.
                     +## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_496.ELU ... Done.
                     +## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_470.ELU ... Done.
                     +## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_471.ELU ... Done.
                     +## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_479.ELU ... Done.
                     +\end{verbatim}
                     +\begin{alltt}
                     +\hlstd{pd}
                     +\end{alltt}
                     +\begin{verbatim}
                     +## An object of class "peaksDataset"
                     +## 9 samples: 0709_468 0709_474 0709_475 0709_485 0709_493 0709_496 0709_470 0709_471 0709_479
                     +## 501 m/z bins - range: ( 50 550 )
                     +## scans: 175 175 175 175 175 174 175 175 175
                     +## peaks: 24 23 26 20 27 24 24 25 21
                     +\end{verbatim}
                     +\end{kframe}
                     +\end{knitrout}
+                    +
                     +Here, we have added peaks from AMDIS, a well known and mature
                     +algorithm for deconvolution of GC-MS data. For GC-TOF-MS data, we have
                     +implemented a parser for the \texttt{ChromaTOF} output (see the
                     +analogous \texttt{addChromaTOFPeaks} function). The
                     +\texttt{addXCMSPeaks} allows to use all the XCMS peak-picking
                     +algorithms; using this approach it is also possible to elaborate the
                     +raw data file from within R instead of using an external software.
                     +%% Support for XMCS or MzMine may be added in the future. Ask the author
                     +%% if another detection result format is desired as the parsers are
                     +%% generally easy to design.
                     +In particular the function reads the raw data using XCMS, group each extracted ion
                     +according to their retention time using CAMERA and attaches them to an
                     +already created \texttt{peaksDataset} object:
+                    +
                     +\begin{knitrout}
                     +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
                     +\begin{alltt}
                     +\hlstd{pd.2} \hlkwb{<-} \hlkwd{peaksDataset}\hlstd{(cdfFiles[}\hlnum{1}\hlopt{:}\hlnum{3}\hlstd{],} \hlkwc{mz} \hlstd{=} \hlkwd{seq}\hlstd{(}\hlnum{50}\hlstd{,} \hlnum{550}\hlstd{),} \hlkwc{rtrange} \hlstd{=} \hlkwd{c}\hlstd{(}\hlnum{7.5}\hlstd{,} \hlnum{8.5}\hlstd{))}
                     +\end{alltt}
                     +\begin{verbatim}
                     +##  Reading  /usr/local/lib/R/site-library/gcspikelite/data/0709_468.CDF
                     +\end{verbatim}
+                    +
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{verbatim}
                     +##  Reading  /usr/local/lib/R/site-library/gcspikelite/data/0709_474.CDF
                     +\end{verbatim}
+                    +
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{verbatim}
                     +##  Reading  /usr/local/lib/R/site-library/gcspikelite/data/0709_475.CDF
                     +\end{verbatim}
+                    +
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{alltt}
                     +\hlstd{cwt} \hlkwb{<-} \hlstd{xcms}\hlopt{::}\hlkwd{CentWaveParam}\hlstd{(}\hlkwc{snthresh} \hlstd{=} \hlnum{3}\hlstd{,} \hlkwc{ppm} \hlstd{=} \hlnum{3000}\hlstd{,} \hlkwc{peakwidth} \hlstd{=} \hlkwd{c}\hlstd{(}\hlnum{3}\hlstd{,} \hlnum{40}\hlstd{),}
                     +  \hlkwc{prefilter} \hlstd{=} \hlkwd{c}\hlstd{(}\hlnum{3}\hlstd{,} \hlnum{100}\hlstd{),} \hlkwc{fitgauss} \hlstd{=} \hlnum{FALSE}\hlstd{,} \hlkwc{integrate} \hlstd{=} \hlnum{2}\hlstd{,} \hlkwc{noise} \hlstd{=} \hlnum{0}\hlstd{,}
                     +  \hlkwc{extendLengthMSW} \hlstd{=} \hlnum{TRUE}\hlstd{,} \hlkwc{mzCenterFun} \hlstd{=} \hlstr{"wMean"}\hlstd{)}
                     +\hlstd{mfp} \hlkwb{<-} \hlstd{xcms}\hlopt{::}\hlkwd{MatchedFilterParam}\hlstd{(}\hlkwc{fwhm} \hlstd{=} \hlnum{10}\hlstd{,} \hlkwc{snthresh} \hlstd{=} \hlnum{5}\hlstd{)}
                     +\hlstd{pd.2} \hlkwb{<-} \hlkwd{addXCMSPeaks}\hlstd{(cdfFiles[}\hlnum{1}\hlopt{:}\hlnum{3}\hlstd{], pd.2,} \hlkwc{settings} \hlstd{= mfp,} \hlkwc{minintens} \hlstd{=} \hlnum{100}\hlstd{,}
                     +  \hlkwc{multipleMatchedFilter} \hlstd{=} \hlnum{FALSE}\hlstd{,} \hlkwc{multipleMatchedFilterParam} \hlstd{=}
                     +  \hlkwd{list}\hlstd{(}\hlkwc{fwhm} \hlstd{=} \hlkwd{c}\hlstd{(}\hlnum{5}\hlstd{,} \hlnum{10}\hlstd{,} \hlnum{20}\hlstd{),} \hlkwc{rt_abs} \hlstd{=} \hlnum{2}\hlstd{,} \hlkwc{mz_abs} \hlstd{=} \hlnum{0.1}\hlstd{)}
                     +  \hlstd{)}
                     +\end{alltt}
+                    +
+                    +
                     +{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}
+                    +
                     +{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in xcmsSet(x, method = "{}centWave"{}, prefilter = c(5, 100), scanrange = scanRange, : Chromatographic peak detection failed for all files! The first error was: Error in .local(object, ...): unused arguments (settings = new("{}MatchedFilterParam"{}, binSize = 0.1, impute = "{}none"{}, baseValue = numeric(0), distance = numeric(0), fwhm = 10, sigma = 4.24664515033124, max = 5, snthresh = 5, steps = 2, mzdiff = 0.6, index = FALSE), minintens = 100, multipleMatchedFilter = FALSE, multipleMatchedFilterParam = list(c(5, 10, 20), 2, 0.1))}}\begin{alltt}
                     +\hlstd{pd.2}
                     +\end{alltt}
                     +\begin{verbatim}
                     +## An object of class "peaksDataset"
                     +## 3 samples: 0709_468 0709_474 0709_475
                     +## 501 m/z bins - range: ( 50 550 )
                     +## scans: 175 175 175
                     +## peaks:
                     +\end{verbatim}
                     +\end{kframe}
                     +\end{knitrout}
+                    +
                     +The possibility to work using computer cluster will be added in the future.
+                    +
                     +Regardless of platform and peak detection algorithm, a useful
                     +visualization of a set of samples is the set of total ion currents
                     +(TIC), or extracted ion currents (XICs). To view TICs, you can call:
+                    +
                     +\begin{knitrout}
                     +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
                     +\begin{alltt}
                     +\hlkwd{plotChrom}\hlstd{(pd,} \hlkwc{rtrange}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{7.5}\hlstd{,}\hlnum{8.5}\hlstd{),} \hlkwc{plotPeaks}\hlstd{=}\hlnum{TRUE}\hlstd{,} \hlkwc{plotPeakLabels}\hlstd{=}\hlnum{TRUE}\hlstd{,}
                     +     \hlkwc{max.near}\hlstd{=}\hlnum{8}\hlstd{,} \hlkwc{how.near}\hlstd{=}\hlnum{0.5}\hlstd{,} \hlkwc{col}\hlstd{=}\hlkwd{rep}\hlstd{(}\hlkwd{c}\hlstd{(}\hlstr{"blue"}\hlstd{,}\hlstr{"red"}\hlstd{,}\hlstr{"black"}\hlstd{),} \hlkwc{each}\hlstd{=}\hlnum{3}\hlstd{))}
                     +\end{alltt}
+                    +
+                    +
                     +{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in (function (classes, fdef, mtable) : unable to find an inherited method for function 'plotChrom' for signature '"{}peaksDataset"{}'}}\end{kframe}
                     +\end{knitrout}
+                    +
                     +Note here the little {\em hashes} represent the detected peaks and are
                     +labelled with an integer index. One of the main challenges is to match
                     +these peak detections across several samples, given that the appear at
                     +slightly different times in different runs.
+                    +
                     +For XICs, you need to give the indices (of \texttt{pd@mz}, the grid of
                     +mass-to-charge values) that you want to plot through the
                     +\texttt{mzind} argument.  This could be a single ion
                     +(e.g. \texttt{mzind=24}) or could be a range of indices if multiple
                     +ions are of interest (e.g. \texttt{mzind=c(24,25,98,99)}).
+                    +
                     +There are several other features within the \texttt{plot} command on
                     +\texttt{peaksDataset} objects that can be useful. See \texttt{?plot}
                     +(and select the flagme version) for full details.
+                    +
                     +Another useful visualization, at least for individual samples, is a 2D
                     +heatmap of intensity. Such plots can be enlightening, especially when
                     +peak detection results are overlaid. For example (with detected
                     +fragment peaks from AMDIS shown in white):
+                    +
                     +\begin{knitrout}
                     +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
                     +\begin{alltt}
                     +\hlstd{r} \hlkwb{<-} \hlnum{1}
                     +\hlkwd{plotImage}\hlstd{(pd,} \hlkwc{run}\hlstd{=r,} \hlkwc{rtrange}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{7.5}\hlstd{,}\hlnum{8.5}\hlstd{),} \hlkwc{main}\hlstd{=}\hlstr{""}\hlstd{)}
                     +\hlstd{v} \hlkwb{<-} \hlkwd{which}\hlstd{(pd}\hlopt{@}\hlkwc{peaksdata}\hlstd{[[r]]} \hlopt{>} \hlnum{0}\hlstd{,} \hlkwc{arr.ind}\hlstd{=}\hlnum{TRUE}\hlstd{)} \hlcom{# find detected peaks}
                     +\hlkwd{abline}\hlstd{(}\hlkwc{v}\hlstd{=pd}\hlopt{@}\hlkwc{peaksrt}\hlstd{[[r]])}
                     +\hlkwd{points}\hlstd{(pd}\hlopt{@}\hlkwc{peaksrt}\hlstd{[[r]][v[,}\hlnum{2}\hlstd{]], pd}\hlopt{@}\hlkwc{mz}\hlstd{[v[,}\hlnum{1}\hlstd{]],} \hlkwc{pch}\hlstd{=}\hlnum{19}\hlstd{,} \hlkwc{cex}\hlstd{=}\hlnum{.6}\hlstd{,} \hlkwc{col}\hlstd{=}\hlstr{"white"}\hlstd{)}
                     +\end{alltt}
                     +\end{kframe}
                     +\includegraphics[width=\maxwidth]{figure/plotexample2-1}
                     +\end{knitrout}
+                    +
+                    +
                     +\section{Pairwise alignment with dynamic programming algorithm}
                     +One of the first challenges of GC-MS data is the matching of detected
                     +peaks (i.e. metabolites) across several samples. Although gas
                     +chromatography is quite robust, there can be some drift in the elution
                     +time of the same analyte from run to run. We have devised a strategy,
                     +based on dynamic programming, that takes into account both the
                     +similarity in spectrum (at the apex of the called peak) and the
                     +similarity in retention time, without requiring the identity of each
                     +peak; this matching uses the data alone. If each sample gives a `peak
                     +list' of the detected peaks (such as that from AMDIS that we have
                     +attached to our \texttt{peaksDataset} object), the challenge is to
                     +introduce gaps into these lists such that they are best aligned. From
                     +this a matrix of retention times or a matrix of peak abundances can be
                     +extracted for further statistical analysis, visualization and
                     +interpretation. For this matching, we created a procedure analogous to
                     +a multiple {\em sequence} alignment.
+                    +
                     +To highlight the dynamic programming-based alignment strategy, we
                     +first illustrate a pairwise alignment of two peak lists. This example
                     +also illustrates the selection of parameters necessary for the
                     +alignment. From the data read in previously, let us consider the
                     +alignment of two samples, denoted \texttt{0709\_468} and
                     +\texttt{0709\_474}. First, a similarity matrix for two samples is
                     +calculated. This is calculated based on a scoring function and takes
                     +into account the similarity in retention time and in the similarity of
                     +the apex spectra, according to:
                     +\[
                     +S_{ij}(D) = \frac{\sum_{k=1}^K x_{ik} y_{jk}}{\sqrt{ \sum_{k=1}^K
                     +    x_{ik}^2 \cdot \sum_{k=1}^K y_{jk}^2 } } \cdot \exp \left( -
                     +  \frac{1}{2} \frac{(t_i-t_j)^2}{D^2} \right)
                     +\]
                     +\noindent where $i$ is the index of the peak in the first sample and
                     +$j$ is the index of the peak in the second sample, $\mathbf{x}_i$ and
                     +$\mathbf{y}_j$ are the spectra vectors and $t_i$ and $t_j$ are their
                     +respective retention times. As you can see, there are two components
                     +to the similarity: spectra similarity (left term) and similarity in
                     +retention time (right term). Of course, other metrics for spectra
                     +similarity are feasible. Ask the author if you want to see other
                     +metrices implemented. We have some non-optimized code for a few
                     +alternative metrics.
+                    +
                     +The peak alignment algorithm, much like sequence alignments, requires
                     +a \texttt{gap} parameter to be set, here a number between 0 and 1.  A
                     +high gap penalty discourages gaps when matching the two lists of peaks
                     +and a low gap penalty allows gaps at a very low {\em cost}.  We find
                     +that a gap penalty in the middle range (0.4-0.6) works well for GC-MS
                     +peak matching.  Another parameter, \texttt{D}, modulates the impact of
                     +the difference in retention time penalty. A large value for
                     +\texttt{D} essentially eliminates the effect. Generally, we set this
                     +parameter to be a bit larger than the average width of a peak,
                     +allowing a little flexibility for retention time shifts between
                     +samples. Keep in mind the \texttt{D} parameter should be set on the
                     +scale (i.e. seconds or minutes) of the \texttt{peaksrt} slot of the
                     +\texttt{peaksDataset} object. The next example shows the effect of
                     +the \texttt{gap} and \texttt{D} penalty on the matching of a small
                     +ranges of peaks.
+                    +
                     +\begin{knitrout}
                     +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
                     +\begin{alltt}
                     +\hlstd{Ds} \hlkwb{<-} \hlkwd{c}\hlstd{(}\hlnum{0.1}\hlstd{,} \hlnum{10}\hlstd{,} \hlnum{0.1}\hlstd{,} \hlnum{0.1}\hlstd{)}
                     +\hlstd{gaps} \hlkwb{<-} \hlkwd{c}\hlstd{(}\hlnum{0.5}\hlstd{,} \hlnum{0.5}\hlstd{,} \hlnum{0.1}\hlstd{,} \hlnum{0.9}\hlstd{)}
                     +\hlkwd{par}\hlstd{(}\hlkwc{mfrow}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{2}\hlstd{,}\hlnum{2}\hlstd{),} \hlkwc{mai}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{0.8466}\hlstd{,}\hlnum{0.4806}\hlstd{,}\hlnum{0.4806}\hlstd{,}\hlnum{0.1486}\hlstd{))}
                     +\hlkwa{for}\hlstd{(i} \hlkwa{in} \hlnum{1}\hlopt{:}\hlnum{4}\hlstd{)\{}
                     +  \hlstd{pa} \hlkwb{<-} \hlkwd{peaksAlignment}\hlstd{(pd}\hlopt{@}\hlkwc{peaksdata}\hlstd{[[}\hlnum{1}\hlstd{]], pd}\hlopt{@}\hlkwc{peaksdata}\hlstd{[[}\hlnum{2}\hlstd{]],}
                     +                       \hlstd{pd}\hlopt{@}\hlkwc{peaksrt}\hlstd{[[}\hlnum{1}\hlstd{]], pd}\hlopt{@}\hlkwc{peaksrt}\hlstd{[[}\hlnum{2}\hlstd{]],} \hlkwc{D}\hlstd{=Ds[i],}
                     +                       \hlkwc{gap}\hlstd{=gaps[i],} \hlkwc{metric}\hlstd{=}\hlnum{1}\hlstd{,} \hlkwc{type}\hlstd{=}\hlnum{1}\hlstd{,} \hlkwc{compress} \hlstd{=} \hlnum{FALSE}\hlstd{)}
                     +  \hlkwd{plotAlignment}\hlstd{(pa,} \hlkwc{xlim}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{0}\hlstd{,} \hlnum{17}\hlstd{),} \hlkwc{ylim}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{0}\hlstd{,} \hlnum{16}\hlstd{),} \hlkwc{matchCol}\hlstd{=}\hlstr{"yellow"}\hlstd{,}
                     +       \hlkwc{main}\hlstd{=}\hlkwd{paste}\hlstd{(}\hlstr{"D="}\hlstd{, Ds[i],} \hlstr{" gap="}\hlstd{, gaps[i],} \hlkwc{sep}\hlstd{=}\hlstr{""}\hlstd{))}
                     +\hlstd{\}}
                     +\end{alltt}
                     +\begin{verbatim}
                     +## [peaksAlignment] Comparing 24 peaks to 23 peaks -- gap= 0.5 D= 0.001 , metric= 1 , type= 1
                     +## [peaksAlignment]  21 matched.  Similarity= 0.7983038
                     +\end{verbatim}
+                    +
+                    +
                     +{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in plotAlignment(pa, xlim = c(0, 17), ylim = c(0, 16), matchCol = "{}yellow"{}, : could not find function "{}plotAlignment"{}}}\end{kframe}
                     +\end{knitrout}
+                    +
                     +You might ask: is the flagme package useful without peak detection
                     +results? Possibly. There have been some developments in alignment
                     +(generally on LC-MS proteomics experiments) without peak/feature
                     +detection, such as Prince et al. 2006, where a very similar dynamic
                     +programming is used for a pairwise alignment. We have experimented
                     +with alignments without using the peaks, but do not have any
                     +convincing results. It does introduce a new set of challenges in terms
                     +of highlighting differentially abundant metabolites. However, in the
                     +\texttt{peaksAlignment} routine above (and those mentioned below), you
                     +can set \texttt{usePeaks=FALSE} in order to do {\em scan}-based
                     +alignments instead of {\em peak}-based alignments. In addition, the
                     +\texttt{flagme} package may be useful simply for its bare-bones
                     +dynamic programming algorithm.
+                    +
+                    +
                     +\subsection{Normalizing retention time score to drift estimates}
                     +In what is mentioned above for pairwise alignments, we are penalizing
                     +for differences in retention times that are non-zero. But, as you can
                     +see from the TICs, some differences in retention time are
                     +consistent. For example, all of the peaks from sample
                     +\texttt{0709\_485} elute at later times than peaks from sample
                     +\texttt{0709\_496}. We should be able to estimate this drift and
                     +normalize the time penalty to that estimate, instead of penalizing to
                     +zero. That is, we should replace $t_i-t_j$ with $t_i-t_j-\hat{d}_{ij}$
                     +where $\hat{d}_{ij}$ is the expected drift between peak $i$ of the
                     +first sample and peak $j$ of the second sample.
+                    +
                     +More details coming soon.
+                    +
+                    +
                     +\subsection{Imputing location of undetected peaks}
                     +One goal of the alignment leading into downstream data analyses is the
                     +generation of a table of abundances for each metabolite across all
                     +samples. As you can see from the TICs above, there are some low
                     +intensity peaks that fall below detection in some but not all
                     +samples. Our view is that instead of inserting arbitrary low constants
                     +(such as 0 or half the detection limit) or imputing the intensities
                     +post-hoc or having missing data in the data matrices, it is best to
                     +return to the area of the where the peak should be and give some kind
                     +of abundance. The alignments themselves are rich in information with
                     +respect to the location of undetected peaks. We feel this is a more
                     +conservative and statistically valid approach than introducing
                     +arbitrary values.
+                    +
                     +More details coming soon.
+                    +
+                    +
                     +\section{Multiple alignment of several experimental groups}
                     +Next, we discuss the multiple alignment of peaks across many
                     +samples. With replicates, we typically do an alignment within
                     +replicates, then combine these together into a summarized form. This
                     +cuts down on the computational cost. For example, consider 2 sets of
                     +samples, each with 5 replicates. Aligning first within replicates
                     +requires 10+10+1 total alignments whereas an all-pairwise alignment
                     +requires 45 pairwise alignments. In addition, this allows some
                     +flexibility in setting different gap and distance penalty parameters
                     +for the {\em within} alignment and {\em between} alignment. An
                     +example follows.
+                    +
                     +\begin{knitrout}
                     +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
                     +\begin{alltt}
                     +\hlkwd{print}\hlstd{(targets)}
                     +\end{alltt}
                     +\begin{verbatim}
                     +##       FileName Group
                     +## 1 0709_468.CDF   mmA
                     +## 2 0709_474.CDF   mmA
                     +## 3 0709_475.CDF   mmA
                     +## 4 0709_485.CDF   mmC
                     +## 5 0709_493.CDF   mmC
                     +## 6 0709_496.CDF   mmC
                     +## 7 0709_470.CDF   mmD
                     +## 8 0709_471.CDF   mmD
                     +## 9 0709_479.CDF   mmD
                     +\end{verbatim}
                     +\begin{alltt}
                     +\hlstd{ma} \hlkwb{<-} \hlkwd{multipleAlignment}\hlstd{(pd,} \hlkwc{group}\hlstd{=targets}\hlopt{$}\hlstd{Group,} \hlkwc{wn.gap}\hlstd{=}\hlnum{0.5}\hlstd{,} \hlkwc{wn.D}\hlstd{=}\hlnum{.05}\hlstd{,}
                     +                        \hlkwc{bw.gap}\hlstd{=}\hlnum{.6}\hlstd{,} \hlkwc{bw.D}\hlstd{=}\hlnum{0.05}\hlstd{,} \hlkwc{usePeaks}\hlstd{=}\hlnum{TRUE}\hlstd{,} \hlkwc{filterMin}\hlstd{=}\hlnum{1}\hlstd{,}
                     +                        \hlkwc{df}\hlstd{=}\hlnum{50}\hlstd{,} \hlkwc{verbose}\hlstd{=}\hlnum{FALSE}\hlstd{,} \hlkwc{metric} \hlstd{=} \hlnum{1}\hlstd{,} \hlkwc{type} \hlstd{=} \hlnum{1}\hlstd{)} \hlcom{# bug}
                     +\end{alltt}
                     +\begin{verbatim}
                     +## [clusterAlignment] Aligning 0709_468 to 0709_474
                     +## [peaksAlignment] Comparing 24 peaks to 23 peaks -- gap= 0.5 D= 5e-04 , metric= 1 , type= 1
                     +## [peaksAlignment]  22 matched.  Similarity= 0.8625793
                     +## [clusterAlignment] Aligning 0709_468 to 0709_475
                     +## [peaksAlignment] Comparing 24 peaks to 26 peaks -- gap= 0.5 D= 5e-04 , metric= 1 , type= 1
                     +## [peaksAlignment]  15 matched.  Similarity= 0.8
                     +## [clusterAlignment] Aligning 0709_474 to 0709_475
                     +## [peaksAlignment] Comparing 23 peaks to 26 peaks -- gap= 0.5 D= 5e-04 , metric= 1 , type= 1
                     +## [peaksAlignment]  20 matched.  Similarity= 0.899699
                     +## [progressiveAlignment] Doing merge -1 -3
                     +## [progressiveAlignment] left.runs: 1 , right.runs: 3
                     +## [progressiveAlignment] Doing merge -2 1
                     +## [progressiveAlignment] left.runs: 2 , right.runs: 1 3
                     +## [progressiveAlignment] (dot=50) going to 23 :
                     +##            used  (Mb) gc trigger  (Mb) max used  (Mb)
                     +## Ncells  7846781 419.1   12592810 672.6 12592810 672.6
                     +## Vcells 14553341 111.1   38545179 294.1 38545179 294.1
                     +## [clusterAlignment] Aligning 0709_485 to 0709_493
                     +## [peaksAlignment] Comparing 20 peaks to 27 peaks -- gap= 0.5 D= 5e-04 , metric= 1 , type= 1
                     +## [peaksAlignment]  20 matched.  Similarity= 0.9354748
                     +## [clusterAlignment] Aligning 0709_485 to 0709_496
                     +## [peaksAlignment] Comparing 20 peaks to 24 peaks -- gap= 0.5 D= 5e-04 , metric= 1 , type= 1
                     +## [peaksAlignment]  20 matched.  Similarity= 0.9359244
                     +## [clusterAlignment] Aligning 0709_493 to 0709_496
                     +## [peaksAlignment] Comparing 27 peaks to 24 peaks -- gap= 0.5 D= 5e-04 , metric= 1 , type= 1
                     +## [peaksAlignment]  22 matched.  Similarity= 0.8515771
                     +## [progressiveAlignment] Doing merge -5 -6
                     +## [progressiveAlignment] left.runs: 5 , right.runs: 6
                     +## [progressiveAlignment] Doing merge -4 1
                     +## [progressiveAlignment] left.runs: 4 , right.runs: 5 6
                     +## [progressiveAlignment] (dot=50) going to 20 :
                     +##            used  (Mb) gc trigger  (Mb) max used  (Mb)
                     +## Ncells  7846949 419.1   12592810 672.6 12592810 672.6
                     +## Vcells 14554064 111.1   38545179 294.1 38545179 294.1
                     +## [clusterAlignment] Aligning 0709_470 to 0709_471
                     +## [peaksAlignment] Comparing 24 peaks to 25 peaks -- gap= 0.5 D= 5e-04 , metric= 1 , type= 1
                     +## [peaksAlignment]  22 matched.  Similarity= 0.8879722
                     +## [clusterAlignment] Aligning 0709_470 to 0709_479
                     +## [peaksAlignment] Comparing 24 peaks to 21 peaks -- gap= 0.5 D= 5e-04 , metric= 1 , type= 1
                     +## [peaksAlignment]  21 matched.  Similarity= 0.8564714
                     +## [clusterAlignment] Aligning 0709_471 to 0709_479
                     +## [peaksAlignment] Comparing 25 peaks to 21 peaks -- gap= 0.5 D= 5e-04 , metric= 1 , type= 1
                     +## [peaksAlignment]  19 matched.  Similarity= 0.8258931
                     +## [progressiveAlignment] Doing merge -8 -9
                     +## [progressiveAlignment] left.runs: 8 , right.runs: 9
                     +## [progressiveAlignment] Doing merge -7 1
                     +## [progressiveAlignment] left.runs: 7 , right.runs: 8 9
                     +## [progressiveAlignment] (dot=50) going to 24 :
                     +##            used  (Mb) gc trigger  (Mb) max used  (Mb)
                     +## Ncells  7847143 419.1   12592810 672.6 12592810 672.6
                     +## Vcells 14555317 111.1   38545179 294.1 38545179 294.1
                     +## [clusterAlignment] Aligning to
                     +## [peaksAlignment] Comparing 36 peaks to 29 peaks -- gap= 0.6 D= 5e-04 , metric= 1 , type= 1
                     +## [peaksAlignment]  25 matched.  Similarity= 0.9094807
                     +## [clusterAlignment] Aligning to
                     +## [peaksAlignment] Comparing 36 peaks to 29 peaks -- gap= 0.6 D= 5e-04 , metric= 1 , type= 1
                     +## [peaksAlignment]  29 matched.  Similarity= 0.8798354
                     +## [clusterAlignment] Aligning to
                     +## [peaksAlignment] Comparing 29 peaks to 29 peaks -- gap= 0.6 D= 5e-04 , metric= 1 , type= 1
                     +## [peaksAlignment]  29 matched.  Similarity= 0.9655151
                     +## [progressiveAlignment] Doing merge -1 -3
                     +## [progressiveAlignment] left.runs: 1 , right.runs: 3
                     +## [progressiveAlignment] Doing merge -2 1
                     +## [progressiveAlignment] left.runs: 2 , right.runs: 1 3
                     +## [progressiveAlignment] (dot=50) going to 29 :
                     +##            used  (Mb) gc trigger  (Mb) max used  (Mb)
                     +## Ncells  7848679 419.2   12592810 672.6 12592810 672.6
                     +## Vcells 14607964 111.5   38545179 294.1 38545179 294.1
                     +\end{verbatim}
                     +\begin{alltt}
                     +\hlstd{ma}
                     +\end{alltt}
                     +\begin{verbatim}
                     +## An object of class "multipleAlignment"
                     +## 3 groups: 3 3 3 samples, respectively.
                     +## 36 merged peaks
                     +\end{verbatim}
                     +\end{kframe}
                     +\end{knitrout}
+                    +
                     +If you set \texttt{verbose=TRUE}, many nitty-gritty details of the
                     +alignment procedure are given.  Next, we can take the alignment
                     +results and overlay it onto the TICs, allowing a visual inspection.
+                    +
                     +\begin{knitrout}
                     +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
                     +\begin{alltt}
                     +\hlkwd{plotChrom}\hlstd{(pd,} \hlkwc{rtrange}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{7.5}\hlstd{,}\hlnum{8.5}\hlstd{),} \hlkwc{runs}\hlstd{=ma}\hlopt{@}\hlkwc{betweenAlignment}\hlopt{@}\hlkwc{runs}\hlstd{,}
                     +     \hlkwc{mind}\hlstd{=ma}\hlopt{@}\hlkwc{betweenAlignment}\hlopt{@}\hlkwc{ind}\hlstd{,} \hlkwc{plotPeaks}\hlstd{=}\hlnum{TRUE}\hlstd{,}
                     +     \hlkwc{plotPeakLabels}\hlstd{=}\hlnum{TRUE}\hlstd{,} \hlkwc{max.near}\hlstd{=}\hlnum{8}\hlstd{,} \hlkwc{how.near}\hlstd{=}\hlnum{.5}\hlstd{,}
                     +     \hlkwc{col}\hlstd{=}\hlkwd{rep}\hlstd{(}\hlkwd{c}\hlstd{(}\hlstr{"blue"}\hlstd{,}\hlstr{"red"}\hlstd{,}\hlstr{"black"}\hlstd{),} \hlkwc{each}\hlstd{=}\hlnum{3}\hlstd{))}
                     +\end{alltt}
+                    +
+                    +
                     +{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in (function (classes, fdef, mtable) : unable to find an inherited method for function 'plotChrom' for signature '"{}peaksDataset"{}'}}\end{kframe}
                     +\end{knitrout}
+                    +
+                    +
                     +% \section{Correlation Alignment algorithm}
                     +% Another approach, represented by the \texttt{correlationAlignment}
                     +% function, is to use a modified form of the Pearson correlation
                     +% algorithm. After the correlation between two samples is calculated, a
                     +% penalization coefficient, based on the retention time differences, is
                     +% applied to the result. It is also possible to set a retention time
                     +% range in which the penalization is 0, this because in gas
                     +% chromatography we can have a little deviation in the retention time of
                     +% the metabolite so, based on the experimental data, we can choose the
                     +% retention time window for the penalization coefficient being applied.
+                    +
                     +\begin{knitrout}
                     +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
                     +\begin{alltt}
                     +\hlstd{mp} \hlkwb{<-} \hlkwd{correlationAlignment}\hlstd{(}\hlkwc{object}\hlstd{=pd.2,} \hlkwc{thr}\hlstd{=}\hlnum{0.85}\hlstd{,} \hlkwc{D}\hlstd{=}\hlnum{20}\hlstd{,} \hlkwc{penality}\hlstd{=}\hlnum{0.2}\hlstd{,}
                     +                           \hlkwc{normalize}\hlstd{=}\hlnum{TRUE}\hlstd{,} \hlkwc{minFilter}\hlstd{=}\hlnum{1}\hlstd{)}
                     +\hlstd{mp}
                     +\end{alltt}
                     +\end{kframe}
                     +\end{knitrout}
+                    +
                     +% \noindent where \texttt{thr} represent correlation threshold from 0
                     +% (min) to 1 (max); \texttt{D} represent the retention time window in
                     +% seconds; \texttt{penality} represent the penality inflicted to a match
                     +% between two peaks when the retention time difference exceed the
                     +% parameter \texttt{D}; \texttt{normalize} is about the peak
                     +% normalization-to-100 before the correlation is calculated;
                     +% \texttt{minFilter} give the opportunity to exclude from the resulting
                     +% correlation matrix each feature that in represented in our samples
                     +% less time than this value. The value of minFilter must be smaller than
                     +% the number of samples.
+                    +
                     +% The correlation-based peak alignment for multiple GC-MS
                     +% peak lists uses a center-star technique to the alignment of the
                     +% peaks. The combination of the \texttt{D} and \texttt{penality} parameters
                     +% allow the users to force the algorithm to match the peaks close to the
                     +% reference. The \texttt{thr} parameter control the matching factor.
+                    +
+                    +
                     +\subsection{Gathering results}
                     +The alignment results can be extracted from the \texttt{multipleAlignment}
                     +object as:
                     +\begin{knitrout}
                     +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
                     +\begin{alltt}
                     +\hlstd{ma}\hlopt{@}\hlkwc{betweenAlignment}\hlopt{@}\hlkwc{runs}
                     +\end{alltt}
                     +\begin{verbatim}
                     +## [1] 4 5 6 2 1 3 7 8 9
                     +\end{verbatim}
                     +\begin{alltt}
                     +\hlstd{ma}\hlopt{@}\hlkwc{betweenAlignment}\hlopt{@}\hlkwc{ind}
                     +\end{alltt}
                     +\begin{verbatim}
                     +##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
                     +##  [1,]    1    1    1    1    1    1    1    1    1
                     +##  [2,]   NA    2   NA   NA    2   NA    2    2   NA
                     +##  [3,]   NA    3    2    2    3    2    3    3   NA
                     +##  [4,]    2    4    3    3    4    3    4    4    2
                     +##  [5,]    3    5    4    4    5    4    5    5    3
                     +##  [6,]    4    6    5    5    6    5    6    6    4
                     +##  [7,]    5    7    6    6    7    6   NA   NA   NA
                     +##  [8,]    6    8    7    7    8    7    7    7    5
                     +##  [9,]    7    9    8   NA   NA    8    8    8    6
                     +## [10,]   NA   NA   NA   NA   NA    9   NA   NA   NA
                     +## [11,]   NA   NA   NA   NA   NA   10   NA   NA   NA
                     +## [12,]   NA   10   NA   NA   NA   11   NA   NA   NA
                     +## [13,]    8   11    9   NA   NA   12   NA   NA   NA
                     +## [14,]   NA   12   NA   NA   NA   13   NA   NA   NA
                     +## [15,]   NA   13   NA   NA   NA   14   NA   NA    7
                     +## [16,]    9   14   NA   NA   NA   15   NA    9    8
                     +## [17,]   10   15   10    8    9   16   NA   10    9
                     +## [18,]   11   16   11    9   10   17    9   11   10
                     +## [19,]   NA   17   12   NA   NA   18   10   12   NA
                     +## [20,]   NA   18   13   NA   NA   19   11   13   NA
                     +## [21,]   12   NA   14   NA   11   20   12   14   NA
                     +## [22,]   NA   NA   NA   10   12   21   13   15   NA
                     +## [23,]   NA   NA   NA   11   13   22   14   16   11
                     +## [24,]   13   19   15   12   NA   NA   15   17   12
                     +## [25,]   NA   NA   NA   13   14   NA   16   NA   NA
                     +## [26,]   NA   NA   NA   14   15   NA   17   18   13
                     +## [27,]   NA   NA   NA   15   16   NA   NA   19   14
                     +## [28,]   NA   20   16   16   17   NA   18   20   15
                     +## [29,]   NA   21   17   17   18   NA   19   21   16
                     +## [30,]   14   22   18   18   19   NA   20   22   17
                     +## [31,]   15   23   19   19   20   NA   21   NA   NA
                     +## [32,]   16   NA   20   20   21   NA   22   23   18
                     +## [33,]   17   24   21   21   22   23   NA   NA   19
                     +## [34,]   18   25   22   22   23   24   23   24   20
                     +## [35,]   19   26   23   NA   NA   25   NA   NA   NA
                     +## [36,]   20   27   24   23   24   26   24   25   21
                     +\end{verbatim}
                     +\end{kframe}
                     +\end{knitrout}
+                    +
                     +\noindent This table would suggest that matched peak \texttt{8} (see
                     +numbers below the TICs in the figure above) corresponds to detected
                     +peaks \texttt{9, 12, 11} in runs \texttt{4, 5, 6} and so on, same as
                     +shown in the above plot.
+                    +
                     +In addition, you can gather a list of all the merged peaks with the
                     +\texttt{gatherInfo} function, giving elements for the retention times,
                     +the detected fragment ions and their intensities.  The example below
                     +also shows the how to construct a table of retention times of the
                     +matched peaks (No attempt is made here to adjust retention times onto
                     +a common scale.  Instead, the peaks are matched to each other on their
                     +original scale).  For example:
+                    +
                     +\begin{knitrout}
                     +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
                     +\begin{alltt}
                     +\hlstd{outList} \hlkwb{<-} \hlkwd{gatherInfo}\hlstd{(pd,ma)}
                     +\hlstd{outList[[}\hlnum{8}\hlstd{]]}
                     +\end{alltt}
                     +\begin{verbatim}
                     +## $rt
                     +##    mmC.4    mmC.5    mmC.6    mmA.2    mmA.1    mmA.3    mmD.7    mmD.8
                     +## 7.728317 7.740700 7.711417 7.713550 7.708567 7.711600 7.702967 7.701717
                     +##    mmD.9
                     +## 7.713933
                     +##
                     +## $mz
                     +##  [1]  52  59  66  70  72  73  74  75  79  89 104 116 133 147 148 188 204
                     +##
                     +## $data
                     +##       mmC.4 mmC.5 mmC.6 mmA.2 mmA.1 mmA.3 mmD.7 mmD.8 mmD.9
                     +##  [1,]     0     0     0     0     0     0 26248     0     0
                     +##  [2,]     0     0     0  5113  4425  4994  4855  4557  4728
                     +##  [3,]     0     0     0  3926  5146  4876  4831  3354  4783
                     +##  [4,]     0     0     0 11568     0     0 10637     0     0
                     +##  [5,]     0     0     0  3680  3910  4492  4051  3427  3907
                     +##  [6,]     0     0     0 61816 65680 66768 65912 52848 61560
                     +##  [7,]     0     0     0  6705  6185  7400  6642  6235  7088
                     +##  [8,]     0     0 24160 26376 23328     0 28016 26304 27184
                     +##  [9,]     0     0     0     0     0     0 38712     0     0
                     +## [10,]     0     0     0  5617  5347  5702  5173  3946  5659
                     +## [11,]     0     0     0 13173 13808 13207 12852  9816 12492
                     +## [12,]     0     0     0  5417  5525  5912  5577  4504  5201
                     +## [13,]     0     0     0  3539  3730  2910  3599  2436  3893
                     +## [14,]     0     0 17904 21864 20016 22280 21904 17896 22400
                     +## [15,]     0     0     0  4413  3430  3890  3006  3335  3851
                     +## [16,]     0     0  7636 14433 14751 13765 14731 10680 14061
                     +## [17,]     0     0     0  6878  6667  7018  6935  5149  6830
                     +\end{verbatim}
                     +\begin{alltt}
                     +\hlstd{rtmat} \hlkwb{<-} \hlkwd{matrix}\hlstd{(}\hlkwd{unlist}\hlstd{(}\hlkwd{lapply}\hlstd{(outList,.subset,}\hlstr{"rt"}\hlstd{),} \hlkwc{use.names}\hlstd{=}\hlnum{FALSE}\hlstd{),}
                     +                \hlkwc{nr}\hlstd{=}\hlkwd{length}\hlstd{(outList),} \hlkwc{byrow}\hlstd{=}\hlnum{TRUE}\hlstd{)}
                     +\hlkwd{colnames}\hlstd{(rtmat)} \hlkwb{<-} \hlkwd{names}\hlstd{(outList[[}\hlnum{1}\hlstd{]]}\hlopt{$}\hlstd{rt);} \hlkwd{rownames}\hlstd{(rtmat)} \hlkwb{<-} \hlnum{1}\hlopt{:}\hlkwd{nrow}\hlstd{(rtmat)}
                     +\hlkwd{round}\hlstd{(rtmat,} \hlnum{3}\hlstd{)}
                     +\end{alltt}
                     +\begin{verbatim}
                     +##    mmC.4 mmC.5 mmC.6 mmA.2 mmA.1 mmA.3 mmD.7 mmD.8 mmD.9
                     +## 1  7.534 7.512 7.506 7.531 7.526 7.540 7.520 7.519 7.531
                     +## 2     NA 7.535    NA    NA 7.549    NA 7.543 7.547    NA
                     +## 3     NA 7.558 7.551 7.559 7.566 7.557 7.560 7.565    NA
                     +## 4  7.580 7.575 7.569 7.576 7.583 7.574 7.577 7.582 7.577
                     +## 5  7.597 7.586 7.586 7.588 7.600 7.592 7.594 7.599 7.594
                     +## 6  7.614 7.615 7.614 7.616 7.617 7.614 7.617 7.610 7.617
                     +## 7  7.717 7.695 7.694 7.691 7.663 7.649    NA    NA    NA
                     +## 8  7.728 7.741 7.711 7.714 7.709 7.712 7.703 7.702 7.714
                     +## 9  7.803 7.804 7.803    NA    NA 7.803 7.806 7.805 7.805
                     +## 10    NA    NA    NA    NA    NA 7.826    NA    NA    NA
                     +## 11    NA    NA    NA    NA    NA 7.975    NA    NA    NA
                     +## 12    NA 7.809    NA    NA    NA 7.997    NA    NA    NA
                     +## 13 7.825 7.849 7.951    NA    NA 8.009    NA    NA    NA
                     +## 14    NA 7.946    NA    NA    NA 8.077    NA    NA    NA
                     +## 15    NA 7.958    NA    NA    NA 8.095    NA    NA 7.817
                     +## 16 7.946 7.969    NA    NA    NA 8.112    NA 7.816 7.823
                     +## 17 7.974 7.986 7.980 7.736 7.783 8.249    NA 7.907 7.874
                     +## 18 8.008 8.009 8.003 7.799 7.800 8.283 7.812 7.936 7.943
                     +## 19    NA 8.049 8.043    NA    NA 8.312 7.966 7.965    NA
                     +## 20    NA 8.061 8.060    NA    NA 8.335 7.989 7.993    NA
                     +## 21 8.077    NA 8.100    NA 7.823 8.357 8.012 8.010    NA
                     +## 22    NA    NA    NA 7.828 7.880 8.375 8.069 8.068    NA
                     +## 23    NA    NA    NA 7.942 7.943 8.403 8.086 8.085 7.977
                     +## 24 8.111 8.107 8.111 7.976    NA    NA 8.109 8.108 8.000
                     +## 25    NA    NA    NA 7.999 7.966    NA 8.172    NA    NA
                     +## 26    NA    NA    NA 8.079 7.994    NA 8.246 8.245 8.080
                     +## 27    NA    NA    NA 8.114 8.011    NA    NA 8.262 8.091
                     +## 28    NA 8.204 8.237 8.182 8.109    NA 8.280 8.330 8.114
                     +## 29    NA 8.244 8.254 8.251 8.246    NA 8.326 8.342 8.251
                     +## 30 8.254 8.301 8.294 8.285 8.263    NA 8.360 8.359 8.263
                     +## 31 8.266 8.324 8.323 8.337 8.332    NA 8.377    NA    NA
                     +## 32 8.334    NA 8.329 8.359 8.360    NA 8.395 8.393 8.337
                     +## 33 8.363 8.352 8.352 8.399 8.394 8.420    NA    NA 8.400
                     +## 34 8.403 8.392 8.386 8.434 8.434 8.437 8.435 8.433 8.440
                     +## 35 8.437 8.432 8.432    NA    NA 8.443    NA    NA    NA
                     +## 36 8.477 8.461 8.460 8.474 8.469 8.472 8.469 8.468 8.474
                     +\end{verbatim}
                     +\end{kframe}
                     +\end{knitrout}
+                    +
+                    +
                     +\section{Future improvements and extension}
                     +There are many procedures that we have implemented in our
                     +investigation of GC-MS data, but have not made part of the package just
                     +yet. Some of the most useful procedures will be released, such as:
+                    +
                     +\begin{enumerate}
                     +\item Parsers for other peak detection algorithms (e.g. % XCMS,
                     +  MzMine) and parsers for other alignment procedures
                     +  (e.g. SpectConnect) and perhaps retention indices procedures.
                     +\item More convenient access to the alignment information and
                     +  abundance table.
                     +\item Statistical analysis of differential metabolite abundance.
                     +\item Fragment-level analysis, an alternative method to summarize
                     +  abundance across all detected fragments of a metabolite peak.
                     +\end{enumerate}
+                    +
                     +\section{References}
                     +See the following for further details:
+                    +
                     +\begin{enumerate}
                     +\item Robinson MD. {\em Methods for the analysis of gas chromatography
                     +    - mass spectrometry data.} {\bf Ph.D. Thesis}. October 2008.
                     +  Department of Medical Biology (Walter and Eliza Hall Institute of
                     +  Medical Research), University of Melbourne.
                     +\item Robinson MD, De Souza DP, Keen WW, Saunders EC, McConville MJ,
                     +  Speed TP, Liki\'{c} VA. (2007) {\em A dynamic programming approach
                     +    for the alignment of signal peaks in multiple gas
                     +    chromatography-mass spectrometry experiments.} {\bf BMC
                     +    Bioinformatics}. 8:419.
                     +\item Prince JT, Marcotte EM (2006) {\em Chromatographic alignment of
                     +    ESI-LC-MS proteomics data sets by ordered bijective interpolated
                     +    warping}. {\bf Anal Chem}. 78(17):6140-52.
                     +\end{enumerate}
+                    +
                     +\section{This vignette was built with/at ...}
+                    +
                     +\begin{knitrout}
                     +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
                     +\begin{alltt}
                     +\hlkwd{sessionInfo}\hlstd{()}
                     +\end{alltt}
                     +\begin{verbatim}
                     +## R version 4.1.1 (2021-08-10)
                     +## Platform: x86_64-pc-linux-gnu (64-bit)
                     +## Running under: Debian GNU/Linux 10 (buster)
                     +##
                     +## Matrix products: default
                     +## BLAS:   /usr/lib/x86_64-linux-gnu/openblas/libblas.so.3
                     +## LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.3.5.so
                     +##
                     +## locale:
                     +##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C
                     +##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8
                     +##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8
                     +##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C
                     +##  [9] LC_ADDRESS=C               LC_TELEPHONE=C
                     +## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
                     +##
                     +## attached base packages:
                     +## [1] stats4    parallel  stats     graphics  grDevices utils     datasets
                     +## [8] methods   base
                     +##
                     +## other attached packages:
                     +##  [1] flagme_1.48.0       CAMERA_1.48.0       xcms_3.14.1
                     +##  [4] MSnbase_2.18.0      ProtGenerics_1.24.0 S4Vectors_0.30.2
                     +##  [7] mzR_2.26.1          Rcpp_1.0.7          Biobase_2.52.0
                     +## [10] BiocGenerics_0.38.0 BiocParallel_1.26.2 gcspikelite_1.30.0
                     +##
                     +## loaded via a namespace (and not attached):
                     +##   [1] colorspace_2.0-2            ellipsis_0.3.2
                     +##   [3] htmlTable_2.3.0             XVector_0.32.0
                     +##   [5] GenomicRanges_1.44.0        base64enc_0.1-3
                     +##   [7] clue_0.3-60                 rstudioapi_0.13
                     +##   [9] affyio_1.62.0               fansi_0.5.0
                     +##  [11] codetools_0.2-18            splines_4.1.1
                     +##  [13] ncdf4_1.17                  doParallel_1.0.16
                     +##  [15] impute_1.66.0               robustbase_0.93-9
                     +##  [17] knitr_1.36                  Formula_1.2-4
                     +##  [19] cluster_2.1.2               vsn_3.60.0
                     +##  [21] png_0.1-7                   graph_1.70.0
                     +##  [23] BiocManager_1.30.16         compiler_4.1.1
                     +##  [25] backports_1.2.1             assertthat_0.2.1
                     +##  [27] Matrix_1.3-4                fastmap_1.1.0
                     +##  [29] limma_3.48.3                htmltools_0.5.2
                     +##  [31] tools_4.1.1                 igraph_1.2.7
                     +##  [33] gtable_0.3.0                glue_1.4.2
                     +##  [35] GenomeInfoDbData_1.2.6      affy_1.70.0
                     +##  [37] RANN_2.6.1                  dplyr_1.0.7
                     +##  [39] MALDIquant_1.20             vctrs_0.3.8
                     +##  [41] preprocessCore_1.54.0       iterators_1.0.13
                     +##  [43] xfun_0.26                   stringr_1.4.0
                     +##  [45] lifecycle_1.0.1             gtools_3.9.2
                     +##  [47] XML_3.99-0.8                DEoptimR_1.0-9
                     +##  [49] zlibbioc_1.38.0             MASS_7.3-54
                     +##  [51] scales_1.1.1                pcaMethods_1.84.0
                     +##  [53] MatrixGenerics_1.4.3        SummarizedExperiment_1.22.0
                     +##  [55] RBGL_1.68.0                 MassSpecWavelet_1.58.0
                     +##  [57] SparseM_1.81                RColorBrewer_1.1-2
                     +##  [59] gridExtra_2.3               ggplot2_3.3.5
                     +##  [61] rpart_4.1-15                latticeExtra_0.6-29
                     +##  [63] stringi_1.7.5               highr_0.9
                     +##  [65] foreach_1.5.1               checkmate_2.0.0
                     +##  [67] caTools_1.18.2              GenomeInfoDb_1.28.4
                     +##  [69] rlang_0.4.11                pkgconfig_2.0.3
                     +##  [71] matrixStats_0.61.0          bitops_1.0-7
                     +##  [73] mzID_1.30.0                 evaluate_0.14
                     +##  [75] lattice_0.20-45             purrr_0.3.4
                     +##  [77] htmlwidgets_1.5.4           tidyselect_1.1.1
                     +##  [79] plyr_1.8.6                  magrittr_2.0.1
                     +##  [81] R6_2.5.1                    IRanges_2.26.0
                     +##  [83] gplots_3.1.1                generics_0.1.0
                     +##  [85] Hmisc_4.6-0                 DelayedArray_0.18.0
                     +##  [87] DBI_1.1.1                   pillar_1.6.3
                     +##  [89] foreign_0.8-81              MsCoreUtils_1.4.0
                     +##  [91] survival_3.2-13             RCurl_1.98-1.5
                     +##  [93] nnet_7.3-16                 tibble_3.1.5
                     +##  [95] crayon_1.4.1                KernSmooth_2.23-20
                     +##  [97] utf8_1.2.2                  jpeg_0.1-9
                     +##  [99] grid_4.1.1                  data.table_1.14.2
                     +## [101] digest_0.6.28               munsell_0.5.0
                     +\end{verbatim}
                     +\begin{alltt}
                     +\hlkwd{date}\hlstd{()}
                     +\end{alltt}
                     +\begin{verbatim}
                     +## [1] "Mon Oct 18 15:30:58 2021"
                     +\end{verbatim}
                     +\end{kframe}
                     +\end{knitrout}
+                    +
                     +\end{document}

...	...	@@ -176,10 +176,11 @@ betweenAlignment <- function(pD, cAList, pAList, impList, filterMin = 1,
176	176	#' @export
177	177	#' @noRd
178	178	setMethod("show","betweenAlignment",
179		- function(object){
	179	+ function(object) {
180	180	cat("An object of class \"", class(object), "\"\n", sep = "")
181	181	cat(length(object@mergedPeaksDataset@peaksrt), "groups:",
182		- sapply(object@mergedPeaksDataset@peaksrt, length), "merged peaks\n"
	182	+ sapply(object@mergedPeaksDataset@peaksrt, length),
	183	+ "merged peaks\n"
183	184	)
184	185	}
185	186	)