... | ... |
@@ -1,5 +1,5 @@ |
1 | 1 |
Package: flagme |
2 |
-Version: 1.33.5 |
|
2 |
+Version: 1.48.1 |
|
3 | 3 |
Date: 2015/04/06 |
4 | 4 |
Title: Analysis of Metabolomics GC/MS Data |
5 | 5 |
Author: Mark Robinson <[email protected]>, Riccardo Romoli <[email protected]> |
... | ... |
@@ -12,7 +12,7 @@ License: LGPL (>= 2) |
12 | 12 |
Collate: 0classes.R clusterAlignment.R init.R multipleAlignment.R |
13 | 13 |
peaksAlignment.R progressiveAlignment.R betweenAlignment.R dp.R |
14 | 14 |
metrics.R parse.R peaksDataset.R gatherInfo.R plotFragments.R |
15 |
- rmaFitUnit.R addXCMSPeaks.R retFatMatrix.R exportSpectra.R |
|
15 |
+ rmaFitUnit.R addXCMSPeaks.R retFatMatrix.R exportSpectra.R importSpectra.R |
|
16 | 16 |
biocViews: DifferentialExpression, MassSpectrometry |
17 |
-RoxygenNote: 6.1.1 |
|
17 |
+RoxygenNote: 7.1.2 |
|
18 | 18 |
Encoding: UTF-8 |
... | ... |
@@ -7,11 +7,15 @@ export(betweenAlignment) |
7 | 7 |
export(calcTimeDiffs) |
8 | 8 |
export(clusterAlignment) |
9 | 9 |
export(corPrt) |
10 |
+export(distToLib) |
|
10 | 11 |
export(dp) |
11 | 12 |
export(dynRT) |
12 | 13 |
export(exportSpectra) |
13 | 14 |
export(gatherInfo) |
15 |
+export(headToTailPlot) |
|
16 |
+export(importSpec) |
|
14 | 17 |
export(imputePeaks) |
18 |
+export(matchSpec) |
|
15 | 19 |
export(multipleAlignment) |
16 | 20 |
export(ndpRT) |
17 | 21 |
export(normDotProduct) |
... | ... |
@@ -36,6 +40,7 @@ importClassesFrom(SparseM,matrix.csc) |
36 | 40 |
importFrom(CAMERA,annotate) |
37 | 41 |
importFrom(CAMERA,getpspectra) |
38 | 42 |
importFrom(MASS,rlm) |
43 |
+importFrom(SparseM,as.matrix) |
|
39 | 44 |
importFrom(SparseM,as.matrix.csc) |
40 | 45 |
importFrom(gplots,colorpanel) |
41 | 46 |
importFrom(graphics,axis) |
... | ... |
@@ -1,184 +1,287 @@ |
1 | 1 |
#' Add xcms/CAMERA peak detection results |
2 |
-#' |
|
2 |
+#' |
|
3 | 3 |
#' Reads the raw data using xcms, group each extracted ion according to their |
4 | 4 |
#' retention time using CAMERA and attaches them to an already created |
5 | 5 |
#' \code{peaksDataset} object |
6 |
-#' |
|
6 |
+#' |
|
7 | 7 |
#' Repeated calls to xcmsSet and annotate to perform peak-picking and |
8 | 8 |
#' deconvolution. The peak detection results are added to the original |
9 | 9 |
#' \code{peaksDataset} object. Two peak detection alorithms are available: |
10 | 10 |
#' continuous wavelet transform (peakPicking=c('cwt')) and the matched filter |
11 | 11 |
#' approach (peakPicking=c('mF')) described by Smith et al (2006). For further |
12 | 12 |
#' information consult the xcms package manual. |
13 |
-#' |
|
14 |
-#' @param files character vector of same length as \code{object@rawdata} (user |
|
15 |
-#' ensures the order matches) |
|
16 |
-#' @param object a \code{peaksDataset} object. |
|
17 |
-#' @param peakPicking Methods to use for peak detection. See details. |
|
18 |
-#' @param perfwhm percentage of full width half maximum. See |
|
19 |
-#' CAMERA::groupFWHM() for more details |
|
20 |
-#' @param quick logical. See CAMERA::annotate() for more details |
|
21 |
-#' @param ... arguments passed on to \code{xcmsSet} and \code{annotate} |
|
13 |
+#' @title addXCMSPeaks |
|
14 |
+#' @param files list of chromatogram files |
|
15 |
+#' @param object a \code{peakDataset} object |
|
16 |
+#' @param settings list. It conteins the settings for the peak-picking |
|
17 |
+#' @param rtrange vector; retention time range |
|
18 |
+#' @param mzrange vector, mz range |
|
19 |
+#' @param perfwhm etermines the maximal retentiontime difference of features in |
|
20 |
+#' one pseudospectrum. |
|
21 |
+#' @param minintens minimum ion intensity to be included into a pseudospectra |
|
22 |
+#' @param minfeat minimum number of ion to be created a pseudospectra |
|
23 |
+#' @param multipleMatchedFilter logical Try to remove redundant peaks, in |
|
24 |
+#' this case where there are any peaks within an absolute m/z value of 0.2 and |
|
25 |
+#' within 3 s for any one sample in the xcmsSet (the largest peak is kept) |
|
26 |
+#' @param multipleMatchedFilterParam list. It conteins the settings for the |
|
27 |
+#' peak-picking. mz_abs represent the the mz range; rt_abs represent thert range |
|
28 |
+#' @importFrom xcms xcmsRaw xcmsSet |
|
29 |
+#' @importFrom CAMERA annotate getpspectra |
|
30 |
+#' @importFrom stats aggregate |
|
31 |
+#' @export addXCMSPeaks |
|
22 | 32 |
#' @return \code{peaksDataset} object |
23 | 33 |
#' @author Riccardo Romoli \email{riccardo.romoli@@unifi.it} |
24 | 34 |
#' @seealso \code{\link{peaksDataset}} \code{\link{findPeaks.matchedFilter}} |
25 | 35 |
#' \code{\link{findPeaks.centWave}} \code{\link{xcmsRaw-class}} |
26 | 36 |
#' @keywords manip |
27 | 37 |
#' @examples |
28 |
-#' |
|
29 |
-#' # need access to CDF (raw data) |
|
30 |
-#' require(gcspikelite) |
|
31 |
-#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/") |
|
32 |
-#' |
|
33 |
-#' # full paths to file names |
|
34 |
-#' cdfFiles <- dir(gcmsPath, "CDF", full=TRUE) |
|
35 |
-#' |
|
36 |
-#' # create a 'peaksDataset' object and add XCMS peaks to it |
|
37 |
-#' pd <- peaksDataset(cdfFiles[1], mz=seq(50,550), rtrange=c(7.5,8.5)) |
|
38 |
-#' pd <- addXCMSPeaks(cdfFiles[1], pd, peakPicking=c('mF'), |
|
39 |
-#' snthresh=3, fwhm=4, step=1, steps=2, mzdiff=0.5) |
|
38 |
+#' files <- list.files(path = paste(find.package("gcspikelite"), "data", |
|
39 |
+#' sep = "/"),"CDF", full = TRUE) |
|
40 |
+#' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5)) |
|
41 |
+#' ## create settings object |
|
42 |
+#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5) |
|
43 |
+#' cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40), |
|
44 |
+#' prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0, |
|
45 |
+#' extendLengthMSW = TRUE, mzCenterFun = "wMean") |
|
46 |
+#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100, |
|
47 |
+#' multipleMatchedFilter = FALSE, multipleMatchedFilterParam = |
|
48 |
+#' list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1)) |
|
49 |
+#' data |
|
40 | 50 |
#' |
41 | 51 |
#' @importFrom xcms xcmsRaw xcmsSet |
42 | 52 |
#' @importFrom CAMERA annotate getpspectra |
43 | 53 |
#' @importFrom stats aggregate |
44 | 54 |
#' @export addXCMSPeaks |
45 |
-addXCMSPeaks <- function (files, object, peakPicking = c("cwt", "mF"), perfwhm = 0.75, quick = TRUE, ...) |
|
46 |
-{ |
|
55 |
+addXCMSPeaks <- function(files, object, settings, rtrange = NULL, |
|
56 |
+ mzrange = NULL, perfwhm = 0.75, minintens = 100, |
|
57 |
+ minfeat = 6, multipleMatchedFilter = FALSE, |
|
58 |
+ multipleMatchedFilterParam = list( |
|
59 |
+ fwhm = c(5, 10, 20), mz_abs = 0.1, rt_abs = 3) |
|
60 |
+ ) { |
|
61 |
+ ## Rmpi tends to give many warnings that are not relevant to end |
|
62 |
+ ## users: this is an attempt to suppress this output |
|
63 |
+ owarn <- options("warn") |
|
64 |
+ on.exit(options(warn = owarn$warn)) |
|
47 | 65 |
options(warn = -1) |
48 |
- cdfFiles <- as.character(files) |
|
49 |
- if (length(cdfFiles) != length(object@rawdata)) |
|
50 |
- stop("Number of files must be the same as the number of runs (and must match).") |
|
51 |
- xs <- lapply(cdfFiles, function(x, y) { |
|
52 |
- f <- which(cdfFiles %in% x) |
|
53 |
- xr <- xcmsRaw(x) |
|
54 |
- rtrange <- c(min(object@rawrt[[f]]), max(object@rawrt[[f]])) * |
|
55 |
- 60 |
|
56 |
- scanRange <- c(max(1, which(xr@scantime > rtrange[1])[1], |
|
57 |
- na.rm = TRUE), min(length(xr@scantime), which(xr@scantime > |
|
58 |
- rtrange[2])[1] - 1, na.rm = TRUE)) |
|
59 |
- if (peakPicking == "cwt") { |
|
60 |
- s <- xcmsSet(x, method = "centWave", prefilter = c(5, |
|
61 |
- 100), scanrange = scanRange, integrate = 1, mzdiff = -0.001, |
|
62 |
- fitgauss = TRUE, ...) |
|
66 |
+ |
|
67 |
+ ## if an rtrange is given, we first find out which scans correspond |
|
68 |
+ ## to this and then use the scanRange argument of xcmsSet |
|
69 |
+ if (!is.null(rtrange)) { |
|
70 |
+ if (length(rtrange) != 2) { |
|
71 |
+ stop("Improper rtrange given!") |
|
63 | 72 |
} |
64 |
- if (peakPicking == "mF") { |
|
65 |
- s <- xcmsSet(x, method = "matchedFilter", scanrange = scanRange, |
|
66 |
- max = 500, ...) |
|
67 |
- } |
|
68 |
- idx <- which(s@peaks[, "mz"] > min(object@mz) & s@peaks[, |
|
69 |
- "mz"] < max(object@mz)) |
|
70 |
- s@peaks <- s@peaks[idx, ] |
|
71 |
- if(quick == TRUE) |
|
72 |
- { |
|
73 |
- a <- annotate(s, perfwhm = perfwhm, quick = quick) |
|
74 |
- } |
|
75 |
- if(quick == FALSE) |
|
76 |
- { |
|
77 |
- a <- annotate(s, perfwhm = perfwhm, cor_eic_th = 0.8, |
|
78 |
- pval = 0.05, graphMethod = "hcs", |
|
79 |
- calcIso = FALSE, calcCiS = TRUE, |
|
80 |
- calcCaS = FALSE) |
|
73 |
+ |
|
74 |
+ rtrange <- rtrange * 60 ## convert from minutes to seconds |
|
75 |
+ xr <- xcms::xcmsRaw(files[1]) |
|
76 |
+ scanRange <- c(max(1, which(xr@scantime > rtrange[1])[1], na.rm = TRUE), |
|
77 |
+ min(length(xr@scantime), |
|
78 |
+ which(xr@scantime > rtrange[2])[1] - 1, na.rm = TRUE) |
|
79 |
+ ) |
|
80 |
+ allSettings <- c(list(files = files, scanrange = scanRange), settings) |
|
81 |
+ } |
|
82 |
+ else { |
|
83 |
+ allSettings <- c(list(files = files), settings) |
|
84 |
+ } |
|
85 |
+ |
|
86 |
+ ## peak-picking |
|
87 |
+ cat("Start peakPicking \n") |
|
88 |
+ row <- MSnbase::readMSData(files, centroided. = TRUE, mode = "onDisk", |
|
89 |
+ msLevel. = 1) |
|
90 |
+ if (class(settings)[1] == "MatchedFilterParam") { |
|
91 |
+ if (multipleMatchedFilter == TRUE) { |
|
92 |
+ settings@fwhm <- multipleMatchedFilterParam$fwhm[1] |
|
93 |
+ set1a <- xcms::findChromPeaks(row, param = settings ) |
|
94 |
+ settings@fwhm <- multipleMatchedFilterParam$fwhm[2] |
|
95 |
+ set1b <- xcms::findChromPeaks(row, param = settings ) |
|
96 |
+ settings@fwhm <- multipleMatchedFilterParam$fwhm[3] |
|
97 |
+ set1c <- xcms::findChromPeaks(row, param = settings ) |
|
98 |
+ set1 <- set1c |
|
99 |
+ xcms::chromPeaks(set1) <- rbind( |
|
100 |
+ xcms::chromPeaks(set1a), xcms::chromPeaks(set1b), |
|
101 |
+ xcms::chromPeaks(set1c) |
|
102 |
+ ) |
|
103 |
+ xcms::chromPeaks(set1) <- xcms::chromPeaks(set1)[ |
|
104 |
+ order(xcms::chromPeaks(set1)[, "sample"], decreasing = FALSE), ] |
|
105 |
+ s <- de_Duper(set1, |
|
106 |
+ mz_abs = multipleMatchedFilterParam$mz_abs, |
|
107 |
+ rt_abs = multipleMatchedFilterParam$rt_abs) |
|
81 | 108 |
} |
82 |
- return(a) |
|
83 |
- }, y = peakPicking) |
|
84 |
- if (peakPicking == "cwt") { |
|
85 |
- area <- c("intb") |
|
86 | 109 |
} |
87 |
- if (peakPicking == "mF") { |
|
88 |
- area <- c("intf") |
|
110 |
+ xset <- xcms::findChromPeaks(row, param = settings) |
|
111 |
+ cat("peakPicking Done \n") |
|
112 |
+ pdp <- xcms::PeakDensityParam(sampleGroups = seq(along = files), bw = 20, |
|
113 |
+ minFraction = 0.5) |
|
114 |
+ xset <- xcms::groupChromPeaks(xset, pdp) |
|
115 |
+ xset <- xcms::fillChromPeaks(xset, param = xcms::ChromPeakAreaParam()) |
|
116 |
+ xset <- as(xset, "xcmsSet") |
|
117 |
+ if (!is.null(mzrange)) { |
|
118 |
+ idx <- (xset@peaks[, "mz"] > mzrange[1]) & |
|
119 |
+ (xset@peaks[, "mz"] < mzrange[2] |
|
120 |
+ ) |
|
121 |
+ xset@peaks <- xset@peaks[idx, ] |
|
122 |
+ } |
|
123 |
+ ## deconvolution; list of all the xset |
|
124 |
+ ap <- split(xset, factor(xcms::sampnames(xset), |
|
125 |
+ levels = xcms::sampnames(xset))) |
|
126 |
+ apd <- lapply(ap, function(x) { |
|
127 |
+ xa <- CAMERA::xsAnnotate(x, sample = 1) |
|
128 |
+ xa <- CAMERA::groupFWHM(xa, perfwhm = perfwhm) |
|
89 | 129 |
} |
90 |
- data <- lapply(seq(along = cdfFiles), function(x) { |
|
91 |
- filt <- sapply(xs[[x]]@pspectra, function(r) { |
|
92 |
- length(r) |
|
93 |
- }) |
|
94 |
- spec.idx <- c(1:length(xs[[x]]@pspectra))[which(filt >= |
|
95 |
- 6)] |
|
96 |
- mzrange <- object@mz |
|
97 |
- abu <- data.frame(matrix(0, nrow = length(mzrange), ncol = length(spec.idx))) |
|
98 |
- rownames(abu) <- mzrange |
|
99 |
- colnames(abu) <- spec.idx |
|
100 |
- mz <- data.frame(mz = mzrange) |
|
101 |
- abu <- sapply(spec.idx, function(z) { |
|
102 |
- spec <- getpspectra(xs[[x]], z)[, c("mz", area)] |
|
103 |
- spec[, "mz"] <- round(spec[, "mz"]) |
|
104 |
- if (max(table(spec[, 1])) > 1) { |
|
105 |
- spec.noDouble <- cbind(aggregate(spec[, 2], list(spec[, |
|
106 |
- 1]), FUN = sum)) |
|
107 |
- colnames(spec.noDouble) <- c("mz", area) |
|
108 |
- spec <- spec.noDouble |
|
130 |
+ ) |
|
131 |
+ ## filter pseudo spectra |
|
132 |
+ intensity <- "maxo" |
|
133 |
+ res <- lapply(apd, function(x) { |
|
134 |
+ allpks <- x@groupInfo |
|
135 |
+ minI <- minintens# * max(allpks[, intensity]) |
|
136 |
+ tooSmall <- which(allpks[, intensity] < minI) |
|
137 |
+ pspectra <- lapply(x@pspectra, function(x) { |
|
138 |
+ x[!x %in% tooSmall]}) |
|
139 |
+ npeaks <- sapply(pspectra, length) |
|
140 |
+ pspectra <- pspectra[npeaks >= minfeat] |
|
141 |
+ ## list of unique pspec, double masses removed |
|
142 |
+ listpspec <- lapply(pspectra, function(x) { |
|
143 |
+ aa <- cbind(mz = round(allpks[x, "mz"], digits = 0), |
|
144 |
+ allpks[x, c(intensity, "rt", "rtmin", "rtmax")] |
|
145 |
+ ) |
|
146 |
+ double <- duplicated(aa[, 1]) |
|
147 |
+ bb <- cbind(aggregate(aa[, 2], list(aa[, 1]), |
|
148 |
+ FUN = sum), aa[!double, 3:5]) |
|
149 |
+ setNames(bb, c(colnames(aa))) |
|
150 |
+ } |
|
151 |
+ ) |
|
152 |
+ ## get mzrange from data |
|
153 |
+ mz.max <- max(sapply(listpspec, function(x) { |
|
154 |
+ max(x[, "mz"])})) |
|
155 |
+ mz.min <- min(sapply(listpspec, function(x) { |
|
156 |
+ min(x[, "mz"])})) |
|
157 |
+ mz.range <- data.frame(mz = c(mz.min:mz.max)) |
|
158 |
+ ## merge pspec with mzrange |
|
159 |
+ listpspec.merged <- lapply(listpspec, function(x) { |
|
160 |
+ merge(x, mz.range, by = "mz", all = TRUE) |
|
109 | 161 |
} |
110 |
- else { |
|
111 |
- spec |
|
162 |
+ ) |
|
112 | 163 |
} |
113 |
- abu$z <- merge(spec, mz, by = "mz", all = TRUE)[, |
|
114 |
- area] |
|
115 |
- }) |
|
116 |
- colnames(abu) <- spec.idx |
|
117 |
- abu[is.na(abu)] <- c(0) |
|
118 |
- return(abu) |
|
119 |
- }) |
|
120 |
- apex.rt <- lapply(seq(along = cdfFiles), function(x) { |
|
121 |
- filt <- sapply(xs[[x]]@pspectra, function(r) { |
|
122 |
- length(r) |
|
123 |
- }) |
|
124 |
- spec.idx <- c(1:length(xs[[x]]@pspectra))[which(filt >= |
|
125 |
- 6)] |
|
126 |
- apex.rt <- sapply(spec.idx, function(z) { |
|
127 |
- spec.rt <- getpspectra(xs[[x]], z)[, c("rt")] |
|
128 |
- rt <- round(mean(spec.rt)/60, digits = 3) |
|
129 |
- }) |
|
130 |
- return(apex.rt) |
|
131 |
- }) |
|
132 |
- spectra.ind <- lapply(seq(along = cdfFiles), function(x) { |
|
133 |
- filt <- sapply(xs[[x]]@pspectra, function(r) { |
|
134 |
- length(r) |
|
135 |
- }) |
|
136 |
- spec.idx <- c(1:length(xs[[x]]@pspectra))[which(filt >= |
|
137 |
- 6)] |
|
138 |
- }) |
|
139 |
- ind.start <- lapply(seq(along = cdfFiles), function(x) { |
|
140 |
- filt <- sapply(xs[[x]]@pspectra, function(r) { |
|
141 |
- length(r) |
|
142 |
- }) |
|
143 |
- spec.idx <- c(1:length(xs[[x]]@pspectra))[which(filt >= |
|
144 |
- 6)] |
|
145 |
- rt.start <- sapply(spec.idx, function(z) { |
|
146 |
- spec.rt <- getpspectra(xs[[x]], z)[, c("rtmin")] |
|
147 |
- rt <- round(mean(spec.rt), digits = 3) |
|
148 |
- }) |
|
149 |
- return(rt.start) |
|
150 |
- }) |
|
151 |
- ind.stop <- lapply(seq(along = cdfFiles), function(x) { |
|
152 |
- filt <- sapply(xs[[x]]@pspectra, function(r) { |
|
153 |
- length(r) |
|
154 |
- }) |
|
155 |
- spec.idx <- c(1:length(xs[[x]]@pspectra))[which(filt >= |
|
156 |
- 6)] |
|
157 |
- rt.stop <- sapply(spec.idx, function(z) { |
|
158 |
- spec.rt <- getpspectra(xs[[x]], z)[, c("rtmax")] |
|
159 |
- rt <- round(mean(spec.rt), digits = 3) |
|
160 |
- }) |
|
161 |
- return(rt.stop) |
|
162 |
- }) |
|
163 |
- object@files |
|
164 |
- object@mz |
|
164 |
+ ) |
|
165 |
+ ## merge again with a common mz range among all sampless |
|
166 |
+ max.mz <- max(sapply(res, function(x) { |
|
167 |
+ max(sapply(x, "[[", "mz"))})) |
|
168 |
+ min.mz <- min(sapply(res, function(x) { |
|
169 |
+ min(sapply(x, "[[", "mz"))})) |
|
170 |
+ mz.range.all <- data.frame(mz = c(min.mz:max.mz)) |
|
171 |
+ res.mz.mrg <- lapply(res, function(x) { |
|
172 |
+ lapply(x, function(y) { |
|
173 |
+ merge(y, mz.range.all, by = "mz", all = TRUE) |
|
174 |
+ } |
|
175 |
+ ) |
|
176 |
+ } |
|
177 |
+ ) |
|
178 |
+ ## prepare the S4 slots |
|
179 |
+ spec.ind <- lapply(res.mz.mrg, function(x) { |
|
180 |
+ 1:length(x) |
|
181 |
+ } |
|
182 |
+ ) |
|
183 |
+ apex.rt <- lapply(res.mz.mrg, function(x) { |
|
184 |
+ rt <- lapply(x, "[[", "rt") |
|
185 |
+ round(sapply(rt, function(x) { |
|
186 |
+ mean(x, na.rm = TRUE) / 60 |
|
187 |
+ } |
|
188 |
+ ), digits = 3) |
|
189 |
+ } |
|
190 |
+ ) |
|
191 |
+ start.rt <- lapply(res.mz.mrg, function(x) { |
|
192 |
+ rt <- lapply(x, "[[", "rtmin") |
|
193 |
+ round(sapply(rt, function(x) { |
|
194 |
+ mean(x, na.rm = TRUE) / 60 |
|
195 |
+ } |
|
196 |
+ ), digits = 3) |
|
197 |
+ } |
|
198 |
+ ) |
|
199 |
+ stop.rt <- lapply(res.mz.mrg, function(x) { |
|
200 |
+ rt <- lapply(x, "[[", "rtmax") |
|
201 |
+ round(sapply(rt, function(x) { |
|
202 |
+ mean(x, na.rm = TRUE) / 60 |
|
203 |
+ } |
|
204 |
+ ), digits = 3) |
|
205 |
+ } |
|
206 |
+ ) |
|
207 |
+ data <- lapply(res.mz.mrg, function(x) { |
|
208 |
+ a <- lapply(x, "[[", intensity) |
|
209 |
+ aa <- do.call(cbind, a) |
|
210 |
+ colnames(aa) <- c(1:ncol(aa)) |
|
211 |
+ aa[is.na(aa)] <- c(0) |
|
212 |
+ return(aa) |
|
213 |
+ } |
|
214 |
+ ) |
|
215 |
+ |
|
165 | 216 |
for (i in 1:length(files)) { |
166 | 217 |
ord <- order(apex.rt[[i]]) |
167 | 218 |
data[[i]] <- data[[i]][, ord] |
168 | 219 |
apex.rt[[i]] <- apex.rt[[i]][ord] |
169 |
- spectra.ind[[i]] <- spectra.ind[[i]][ord] |
|
170 |
- ind.start[[i]] <- ind.start[[i]][ord] |
|
171 |
- ind.stop[[i]] <- ind.stop[[i]][ord] |
|
220 |
+ spec.ind[[i]] <- spec.ind[[i]][ord] |
|
221 |
+ start.rt[[i]] <- start.rt[[i]][ord] |
|
222 |
+ stop.rt[[i]] <- stop.rt[[i]][ord] |
|
172 | 223 |
} |
173 |
- options(warn = 0) |
|
174 |
- nm <- lapply(files, function(u) { |
|
175 |
- sp <- strsplit(u, split = "/")[[1]] |
|
176 |
- sp[length(sp)] |
|
177 |
- }) |
|
178 |
- nm <- sub(".CDF$", "", nm) |
|
179 |
- names(data) <- names(apex.rt) <- names(spectra.ind) <- names(ind.start) <- names(ind.stop) <- nm |
|
180 |
- new("peaksDataset", files = object@files, peaksdata = data, |
|
181 |
- peaksrt = apex.rt, peaksind = spectra.ind, peaksind.start = ind.start, |
|
182 |
- peaksind.end = ind.stop, rawdata = object@rawdata, rawrt = object@rawrt, |
|
183 |
- mz = object@mz) |
|
224 |
+ new("peaksDataset", |
|
225 |
+ files = files, |
|
226 |
+ peaksdata = data, |
|
227 |
+ peaksrt = apex.rt, |
|
228 |
+ peaksind = spec.ind, |
|
229 |
+ peaksind.start = start.rt, |
|
230 |
+ peaksind.end = stop.rt, |
|
231 |
+ rawdata = object@rawdata, |
|
232 |
+ rawrt = object@rawrt, |
|
233 |
+ mz = c(min.mz:max.mz) |
|
234 |
+ ) |
|
184 | 235 |
} |
236 |
+ |
|
237 |
+ |
|
238 |
+##' Duplicate peak removal function |
|
239 |
+##' |
|
240 |
+##' Remove redundant peaks, in this case where there are any peaks within an |
|
241 |
+##' absolute m/z value of 0.2 and within 3 s for any one sample in the xcmsSet |
|
242 |
+##' (the largest peak is kept) |
|
243 |
+##' @title deDuper |
|
244 |
+##' @param object xcms object |
|
245 |
+##' @param mz_abs mz range |
|
246 |
+##' @param rt_abs rt range |
|
247 |
+##' @return an object of xcms class |
|
248 |
+##' @author r |
|
249 |
+de_Duper <- function(object, mz_abs = 0.1, rt_abs = 2) { |
|
250 |
+ mzdiff <- 0 |
|
251 |
+ peaks_mat <- xcms::chromPeaks(object) |
|
252 |
+ mz_min <- peaks_mat[, "mz"] - mz_abs |
|
253 |
+ mz_max <- peaks_mat[, "mz"] + mz_abs |
|
254 |
+ rt_min <- peaks_mat[, "rt"] - rt_abs |
|
255 |
+ rt_max <- peaks_mat[, "rt"] + rt_abs |
|
256 |
+ |
|
257 |
+ peaks_mat_out <- NULL |
|
258 |
+ |
|
259 |
+ samples <- unique(peaks_mat[, "sample"]) |
|
260 |
+ |
|
261 |
+ cat("\n", "Duplicate peak removal; % complete: ") |
|
262 |
+ percplus <- -1 |
|
263 |
+ |
|
264 |
+ for (i in 1:length(samples)) { |
|
265 |
+ perc <- round(i / length(samples) * 100) |
|
266 |
+ if (perc %% 10 == 0 && perc != percplus) { |
|
267 |
+ cat(perc, " ") |
|
268 |
+ } |
|
269 |
+ percplus <- perc |
|
270 |
+ |
|
271 |
+ peaks_mat_i <- peaks_mat[which(peaks_mat[, "sample"] == samples[i]), , |
|
272 |
+ drop = FALSE] |
|
273 |
+ mz_min_i <- mz_min[which(peaks_mat[, "sample"] == samples[i])] |
|
274 |
+ mz_max_i <- mz_max[which(peaks_mat[, "sample"] == samples[i])] |
|
275 |
+ rt_min_i <- rt_min[which(peaks_mat[, "sample"] == samples[i])] |
|
276 |
+ rt_max_i <- rt_max[which(peaks_mat[, "sample"] == samples[i])] |
|
277 |
+ uorder_i <- order(peaks_mat_i[, "into"], decreasing = TRUE) |
|
278 |
+ uindex_i <- xcms::rectUnique(cbind(mzmin = mz_min_i, mzmax = mz_max_i, |
|
279 |
+ rtmin = rt_min_i, rtmax = rt_max_i), |
|
280 |
+ uorder_i, mzdiff) |
|
281 |
+ peaks_mat_i <- peaks_mat_i[uindex_i, , drop = FALSE] |
|
282 |
+ peaks_mat_out <- rbind(peaks_mat_out, peaks_mat_i) |
|
283 |
+ } |
|
284 |
+ cat("\n") |
|
285 |
+ xcms::chromPeaks(object) <- peaks_mat_out |
|
286 |
+ return(object) |
|
287 |
+} |
|
185 | 288 |
\ No newline at end of file |
... | ... |
@@ -176,10 +176,11 @@ betweenAlignment <- function(pD, cAList, pAList, impList, filterMin = 1, |
176 | 176 |
#' @export |
177 | 177 |
#' @noRd |
178 | 178 |
setMethod("show","betweenAlignment", |
179 |
- function(object){ |
|
179 |
+ function(object) { |
|
180 | 180 |
cat("An object of class \"", class(object), "\"\n", sep = "") |
181 | 181 |
cat(length(object@mergedPeaksDataset@peaksrt), "groups:", |
182 |
- sapply(object@mergedPeaksDataset@peaksrt, length), "merged peaks\n" |
|
182 |
+ sapply(object@mergedPeaksDataset@peaksrt, length), |
|
183 |
+ "merged peaks\n" |
|
183 | 184 |
) |
184 | 185 |
} |
185 | 186 |
) |
... | ... |
@@ -66,13 +66,13 @@ setMethod("decompress","clusterAlignment", |
66 | 66 |
#' @export clusterAlignment |
67 | 67 |
clusterAlignment <- function(pD, runs = 1:length(pD@rawdata), |
68 | 68 |
timedf = NULL, usePeaks = TRUE, |
69 |
- verbose = TRUE, ...){ |
|
69 |
+ verbose = TRUE, ...) { |
|
70 | 70 |
n <- length(runs) |
71 |
- if(usePeaks) |
|
71 |
+ if (usePeaks) |
|
72 | 72 |
nr <- length(pD@peaksdata) |
73 | 73 |
else |
74 | 74 |
nr <- length(pD@rawdata) |
75 |
- alignments <- vector("list", n*(n-1)/2) |
|
75 |
+ alignments <- vector("list", n * (n - 1) / 2) |
|
76 | 76 |
aligned <- matrix(-1, nr, nr) |
77 | 77 |
colnames(aligned) <- names(pD@rawdata) |
78 | 78 |
rownames(aligned) <- names(pD@rawdata) |
... | ... |
@@ -80,48 +80,43 @@ clusterAlignment <- function(pD, runs = 1:length(pD@rawdata), |
80 | 80 |
colnames(dist) <- names(pD@rawdata)[runs] |
81 | 81 |
rownames(dist) <- names(pD@rawdata)[runs] |
82 | 82 |
count <- 0 |
83 |
- for(i in 1:(n-1)) |
|
84 |
- { |
|
83 |
+ for (i in 1:( n - 1)) { |
|
85 | 84 |
run.i <- runs[i] |
86 |
- for(j in (i+1):n) |
|
87 |
- { |
|
85 |
+ for (j in (i + 1):n) { |
|
88 | 86 |
run.j <- runs[j] |
89 |
- count <- count+1 |
|
90 |
- if(verbose) |
|
91 |
- { |
|
87 |
+ count <- count + 1 |
|
88 |
+ if(verbose) { |
|
92 | 89 |
cat("[clusterAlignment] Aligning", |
93 | 90 |
names(pD@rawdata)[run.i], "to", |
94 | 91 |
names(pD@rawdata)[run.j], "\n") |
95 | 92 |
} |
96 |
- if(usePeaks) |
|
97 |
- { |
|
98 |
- alignments[[count]] <- |
|
99 |
- peaksAlignment(pD@peaksdata[[run.i]], |
|
100 |
- pD@peaksdata[[run.j]], |
|
101 |
- pD@peaksrt[[run.i]], |
|
102 |
- pD@peaksrt[[run.j]], |
|
103 |
- usePeaks = usePeaks, |
|
104 |
- timedf = timedf[[count]], |
|
93 |
+ if (usePeaks) { |
|
94 |
+ alignments[[count]] <- |
|
95 |
+ peaksAlignment(pD@peaksdata[[run.i]], |
|
96 |
+ pD@peaksdata[[run.j]], |
|
97 |
+ pD@peaksrt[[run.i]], |
|
98 |
+ pD@peaksrt[[run.j]], |
|
99 |
+ usePeaks = usePeaks, |
|
100 |
+ timedf = timedf[[count]], |
|
105 | 101 |
verbose = verbose, ...) |
106 |
- } |
|
107 |
- else |
|
108 |
- { |
|
102 |
+ } else { |
|
109 | 103 |
alignments[[count]] <- |
110 | 104 |
peaksAlignment(pD@rawdata[[run.i]], |
111 | 105 |
pD@rawdata[[run.j]], |
112 | 106 |
pD@rawrt[[run.i]], |
113 | 107 |
pD@rawrt[[run.j]], |
114 |
- usePeaks=usePeaks, timedf=NULL, |
|
115 |
- verbose=verbose, ...) |
|
108 |
+ usePeaks = usePeaks, timedf = NULL, |
|
109 |
+ verbose = verbose, ...) |
|
116 | 110 |
} |
117 |
- aligned[runs[i],runs[j]] <- aligned[runs[j],runs[i]] <- count |
|
118 |
- dist[j,i] <- dist[i,j] <- alignments[[count]]@dist |
|
119 |
- } |
|
120 |
- } |
|
111 |
+ aligned[runs[i], runs[j]] <- aligned[runs[j], runs[i]] <- count |
|
112 |
+ dist[j, i] <- dist[i, j] <- alignments[[count]]@dist |
|
113 |
+ } |
|
114 |
+ } |
|
121 | 115 |
merge <- hclust(as.dist(dist), method = "average")$merge |
122 | 116 |
merge.copy <- merge |
123 |
- for(i in 1:length(runs)) |
|
124 |
- {merge[which(merge.copy == (-i))] <- (-runs[i])} |
|
117 |
+ for (i in 1:length(runs)) { |
|
118 |
+ merge[which(merge.copy == (-i))] <- (-runs[i]) |
|
119 |
+ } |
|
125 | 120 |
new("clusterAlignment", runs = runs, aligned = aligned, |
126 | 121 |
gap = alignments[[1]]@gap, D = alignments[[1]]@D, dist = dist, |
127 | 122 |
alignments = alignments, merge = merge) |
... | ... |
@@ -170,38 +165,36 @@ function(object) { |
170 | 165 |
##' @keywords classes |
171 | 166 |
##' @examples |
172 | 167 |
##' |
173 |
-##' require(gcspikelite) |
|
174 |
-##' |
|
175 |
-##' ## paths and files |
|
176 |
-##' gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/") |
|
177 |
-##' cdfFiles <- dir(gcmsPath, "CDF", full=TRUE) |
|
178 |
-##' eluFiles <- dir(gcmsPath, "ELU", full=TRUE) |
|
179 |
-##' |
|
180 |
-##' ## read data |
|
181 |
-##' pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,8.5)) |
|
182 |
-##' pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'), |
|
183 |
-##' snthresh=3, fwhm=10, step=0.1, steps=2, mzdiff=0.5) |
|
184 |
-##' ca <- clusterAlignment(pd, metric = 1, D = 50, type = 1, gap = 0.5) |
|
168 |
+#' require(gcspikelite) |
|
169 |
+#' |
|
170 |
+#' # paths and files |
|
171 |
+#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/") |
|
172 |
+#' cdfFiles <- dir(gcmsPath, "CDF", full=TRUE) |
|
173 |
+#' eluFiles <- dir(gcmsPath, "ELU", full=TRUE) |
|
174 |
+#' |
|
175 |
+#' # read data, peak detection results |
|
176 |
+#' pd <- peaksDataset(cdfFiles[1:2], mz=seq(50,550), rtrange=c(7.5,8.5)) |
|
177 |
+#' pd <- addAMDISPeaks(pd, eluFiles[1:2]) |
|
178 |
+#' |
|
179 |
+#' ca <- clusterAlignment(pd, gap=0.5, D=0.05, df=30, metric=1, type=1) |
|
185 | 180 |
##' plotClustAlignment(ca, run = 1) |
186 | 181 |
##' plotClustAlignment(ca, run = 2) |
187 |
-##' plotClustAlignment(ca, run = 3) |
|
182 |
+##' plotClustAlignment(ca, run = 3) |
|
188 | 183 |
##' |
189 | 184 |
##' @importFrom graphics plot |
190 | 185 |
##' @export |
191 | 186 |
setMethod("plotClustAlignment", "clusterAlignment", |
192 |
- function(object, alignment = 1, ...) |
|
193 |
- { |
|
187 |
+ function(object, alignment = 1, ...) { |
|
194 | 188 |
rn <- rownames(object@aligned) |
195 |
- for(i in alignment){ |
|
196 |
- ind <- which(object@aligned == i, arr.ind = TRUE)[2,] |
|
189 |
+ for (i in alignment) { |
|
190 |
+ ind <- which(object@aligned == i, arr.ind = TRUE)[2, ] |
|
197 | 191 |
plot(## object@alignments[[i]], |
198 | 192 |
object@alignments[[i]]@v$match, |
199 | 193 |
main = paste("D=", object@D, " gap=", object@gap, |
200 | 194 |
sep = ""), |
201 |
- xlab = paste("Peaks ",rn[ind[1]], sep = " - "), |
|
202 |
- ylab = paste("Peaks ",rn[ind[2]], sep = " - "), |
|
195 |
+ xlab = paste("Peaks ", rn[ind[1]], sep = " - "), |
|
196 |
+ ylab = paste("Peaks ", rn[ind[2]], sep = " - "), |
|
203 | 197 |
...) |
204 | 198 |
} |
205 | 199 |
} |
206 | 200 |
) |
207 |
- |
... | ... |
@@ -43,36 +43,29 @@ |
43 | 43 |
#' |
44 | 44 |
#' @useDynLib flagme |
45 | 45 |
#' @export dp |
46 |
-dp<-function(M,gap=.5,big=10000000000,verbose=FALSE) { |
|
46 |
+dp <- function(M, gap = .5, big= 10000000000, verbose = FALSE) { |
|
47 | 47 |
|
48 | 48 |
# setup score matrix |
49 |
- bigr<-c(0,big*(1:ncol(M))) |
|
50 |
- bigc<-c(big*(1:nrow(M))) |
|
51 |
- D<-rbind(bigr,cbind(bigc,M)); #D[1,1]<-0 |
|
52 |
- #bigr<-c(0,gap*(1:ncol(M))) |
|
53 |
- #bigc<-c(gap*(1:nrow(M))) |
|
54 |
- #D<-rbind(bigr,cbind(bigc,M)); #D[1,1]<-0 |
|
49 |
+ bigr <- c(0, big * (1:ncol(M))) |
|
50 |
+ bigc <- c(big * (1:nrow(M))) |
|
51 |
+ D <- rbind(bigr, cbind(bigc, M)); # D[1, 1] <-0 |
|
52 |
+ # bigr <- c(0, gap * (1:ncol(M))) |
|
53 |
+ # bigc <- c(gap * (1:nrow(M))) |
|
54 |
+ # D <- rbind(bigr, cbind(bigc, M)); # D[1, 1] <-0 |
|
55 | 55 |
|
56 | 56 |
# setup traceback |
57 |
- phi<-matrix(0,nrow=nrow(D),ncol=ncol(D)) |
|
58 |
- phi[1,]<-2; phi[,1]<-1; phi[1,1]<-3 |
|
59 |
- |
|
60 |
- match<-matrix(-1,nrow=nrow(M)+ncol(M),ncol=2) # matrix to store matches |
|
57 |
+ phi <- matrix(0, nrow = nrow(D), ncol = ncol(D)) |
|
58 |
+ phi[1, ] <- 2; phi[, 1] <- 1; phi[1, 1] <- 3 |
|
59 |
+ match <- matrix(-1, nrow = nrow(M) + ncol(M), ncol = 2) # matrix to store matches |
|
61 | 60 |
|
62 |
- out<-.C("dp", D=as.double(D), M=as.double(M), gap=as.double(gap), phi=as.integer(phi), |
|
63 |
- nr=as.integer(nrow(M)), ncol=as.integer(ncol(M)), |
|
64 |
- match=as.integer(match), nmatch=as.integer(0), |
|
65 |
- PACKAGE="flagme" |
|
66 |
- ) |
|
61 |
+ out <- .C("dp", D = as.double(D), M = as.double(M), gap = as.double(gap), |
|
62 |
+ phi = as.integer(phi), nr = as.integer(nrow(M)), ncol = as.integer(ncol(M)), |
|
63 |
+ match = as.integer(match), nmatch = as.integer(0), PACKAGE = "flagme") |
|
67 | 64 |
#list(D=matrix(out$D,ncol=ncol(D)),phi=matrix(out$phi,ncol=ncol(D)),match=1+matrix(out$match,ncol=2)[out$nmatch:1,]) |
68 |
- list(match=1+matrix(out$match,ncol=2)[out$nmatch:1,]) |
|
65 |
+ list(match = 1 + matrix(out$match, ncol = 2)[out$nmatch:1, ]) |
|
69 | 66 |
} |
70 | 67 |
|
71 | 68 |
|
72 |
- |
|
73 |
- |
|
74 |
- |
|
75 |
- |
|
76 | 69 |
#' dynRT |
77 | 70 |
#' |
78 | 71 |
#' Dynamic Retention Time Based Alignment algorithm, given a similarity matrix |
... | ... |
@@ -87,77 +80,72 @@ dp<-function(M,gap=.5,big=10000000000,verbose=FALSE) { |
87 | 80 |
#' @examples |
88 | 81 |
#' |
89 | 82 |
#' require(gcspikelite) |
90 |
-#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/") |
|
91 |
-#' cdfFiles <- dir(gcmsPath,"CDF", full=TRUE) |
|
92 |
-#' ## read data, peak detection results |
|
93 |
-#' pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), |
|
94 |
-#' rtrange=c(7.5,10.5)) |
|
95 |
-#' pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, |
|
96 |
-#' peakPicking=c('mF'),snthresh=3, fwhm=10, step=0.1, steps=2, |
|
97 |
-#' mzdiff=0.5, sleep=0) |
|
83 |
+#' files <- list.files(path = paste(find.package("gcspikelite"), "data", |
|
84 |
+#' sep = "/"),"CDF", full = TRUE) |
|
85 |
+#' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5)) |
|
86 |
+#' ## create settings object |
|
87 |
+#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5) |
|
88 |
+#' cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40), |
|
89 |
+#' prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0, |
|
90 |
+#' extendLengthMSW = TRUE, mzCenterFun = "wMean") |
|
91 |
+#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100, |
|
92 |
+#' multipleMatchedFilter = FALSE, multipleMatchedFilterParam = |
|
93 |
+#' list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1)) |
|
94 |
+#' data |
|
98 | 95 |
#' ## review peak picking |
99 |
-#' plotChrom(pd, rtrange=c(7.5, 10.5), runs=c(1:3)) |
|
96 |
+#' plotChrom(data, rtrange=c(7.5, 10.5), runs=c(1:2)) |
|
100 | 97 |
#' ## similarity |
101 |
-#' r <- ndpRT(pd@peaksdata[[1]], pd@peaksdata[[2]], pd@peaksrt[[1]], |
|
102 |
-#' pd@peaksrt[[2]], D=50) |
|
98 |
+#' r <- ndpRT(data@peaksdata[[1]], data@peaksdata[[2]], data@peaksrt[[1]], |
|
99 |
+#' data@peaksrt[[2]], D = 50) |
|
103 | 100 |
#' ## dynamic retention time based alignment algorithm |
104 |
-#' v <- dynRT(S=r) |
|
101 |
+#' v <- dynRT(S = r) |
|
105 | 102 |
#' |
106 | 103 |
#' @export dynRT |
107 |
-dynRT <- function(S){ |
|
108 |
- options(warn=-1) |
|
104 |
+dynRT <- function(S) { |
|
105 |
+ options(warn = -1) |
|
109 | 106 |
## S similarity matrix |
110 | 107 |
## move trought S to find the highest score |
111 |
- S <- round(S, digits=3) |
|
112 |
- id <- max(table(S))-1 |
|
108 |
+ S <- round(S, digits = 3) |
|
109 |
+ id <- max(table(S)) - 1 |
|
113 | 110 |
## filling <- which(table(S) > 200)# weak |
114 | 111 |
filling <- which(table(S) > id) |
115 | 112 |
filling.names <- as.numeric(names(filling)) |
116 |
- if(filling.names > 1) |
|
117 |
- { |
|
113 |
+ if (filling.names > 1) { |
|
118 | 114 |
filling.names <- 1 |
119 | 115 |
} |
120 | 116 |
trace <- c() |
121 | 117 |
## this boolean workaround is to allow to use both the ndp |
122 | 118 |
## function and the matching function from MR and RR |
123 |
- if(filling.names == 1) |
|
124 |
- { |
|
125 |
- for(i in 1:nrow(S)){ |
|
126 |
- trace[i] <- which(S[i,] == min(S[i,])) # MR |
|
119 |
+ if (filling.names == 1) { |
|
120 |
+ for (i in 1:nrow(S)) { |
|
121 |
+ trace[i] <- which(S[i, ] == min(S[i, ])) # MR |
|
127 | 122 |
} |
128 | 123 |
} |
129 |
- if(filling.names == 0) |
|
130 |
- { |
|
131 |
- for(i in 1:nrow(S)){ |
|
132 |
- trace[i] <- which(S[i,] == max(S[i,])) # RR |
|
124 |
+ if (filling.names == 0) { |
|
125 |
+ for (i in 1:nrow(S)) { |
|
126 |
+ trace[i] <- which(S[i, ] == max(S[i, ])) # RR |
|
133 | 127 |
} |
134 | 128 |
} |
135 |
- trace.mtx <- matrix(NA, nrow=nrow(S), ncol=2) |
|
136 |
- trace.mtx[,1] <- 1:nrow(S) |
|
129 |
+ trace.mtx <- matrix(NA, nrow = nrow(S), ncol = 2) |
|
130 |
+ trace.mtx[, 1] <- 1:nrow(S) |
|
137 | 131 |
trace[which(trace == 1)] <- NA |
138 |
- trace.mtx[,2] <- trace |
|
132 |
+ trace.mtx[, 2] <- trace |
|
139 | 133 |
## cercare i composti matchati piu di una volta e renderli univoci |
140 |
- idx <- which(table(trace.mtx[,2]) > 1) # colonne della matrice S |
|
134 |
+ idx <- which(table(trace.mtx[, 2]) > 1) # colonne della matrice S |
|
141 | 135 |
names.idx <- as.numeric(names(idx)) # i doppioni |
142 | 136 |
position <- c() # i doppi con il match piu alto |
143 |
- |
|
144 |
- if(filling.names == 1) |
|
145 |
- { |
|
146 |
- for(k in 1:length(names.idx)){ |
|
147 |
- position[k] <- which(S[,names.idx[k]] == min(S[,names.idx[k]])) |
|
148 |
- } |
|
137 |
+ if (filling.names == 1) { |
|
138 |
+ for (k in 1:length(names.idx)) { |
|
139 |
+ position[k] <- which(S[, names.idx[k]] == min(S[, names.idx[k]])) |
|
140 |
+ } |
|
149 | 141 |
} |
150 |
- if(filling.names == 0) |
|
151 |
- { |
|
152 |
- for(k in 1:length(names.idx)){ |
|
153 |
- position[k] <- which(S[,names.idx[k]] == max(S[,names.idx[k]])) |
|
142 |
+ if (filling.names == 0) { |
|
143 |
+ for (k in 1:length(names.idx)) { |
|
144 |
+ position[k] <- which(S[, names.idx[k]] == max(S[, names.idx[k]])) |
|
154 | 145 |
} |
155 | 146 |
} |
156 |
- |
|
157 | 147 |
tutti <- which(trace %in% names.idx) |
158 |
- trace.mtx[,2][tutti[! tutti %in% position]] <- NA |
|
159 |
- |
|
160 |
- return(list(match=trace.mtx)) |
|
161 |
- |
|
162 |
- options(warn=0) |
|
148 |
+ trace.mtx[, 2][tutti[! tutti %in% position]] <- NA |
|
149 |
+ return(list(match = trace.mtx)) |
|
150 |
+ options(warn = 0) |
|
163 | 151 |
} |
... | ... |
@@ -1,9 +1,9 @@ |
1 | 1 |
#' exportSpectra |
2 |
-#' |
|
2 |
+#' |
|
3 | 3 |
#' Write the mass spectum into a .msp file to be used in NIST search. |
4 |
-#' |
|
4 |
+#' |
|
5 | 5 |
#' Write the mass spectum into a .msp file to be used in NIST search. |
6 |
-#' |
|
6 |
+#' |
|
7 | 7 |
#' @param object an object of class "peaksDataset" |
8 | 8 |
#' @param outList an object created using the gatherInfo() function |
9 | 9 |
#' @param spectra numeric. The number of the mass spectra to be printed. It |
... | ... |
@@ -13,7 +13,7 @@ |
13 | 13 |
#' @return a .msp file |
14 | 14 |
#' @author riccardo.romoli@@unifi.com |
15 | 15 |
#' @export exportSpectra |
16 |
-exportSpectra <- function (object, outList, spectra, normalize = TRUE) |
|
16 |
+exportSpectra <- function (object, outList, spectra, normalize = TRUE) |
|
17 | 17 |
{ |
18 | 18 |
spectra <- as.numeric(spectra) |
19 | 19 |
mz <- outList[[spectra]]$mz |
... | ... |
@@ -21,7 +21,7 @@ exportSpectra <- function (object, outList, spectra, normalize = TRUE) |
21 | 21 |
if(normalize) |
22 | 22 |
{ |
23 | 23 |
for(i in 1:ncol(abu)){ |
24 |
- if(is.na(sum(abu[, i]))) |
|
24 |
+ if(is.na(sum(abu[, i]))) |
|
25 | 25 |
next |
26 | 26 |
abu[, i] <- 100 * abu[, i]/abu[which.max(abu[, i]), i] |
27 | 27 |
} |
... | ... |
@@ -33,11 +33,10 @@ exportSpectra <- function (object, outList, spectra, normalize = TRUE) |
33 | 33 |
idx <- 1 |
34 | 34 |
spec <- paste(mz, abu[,idx]) |
35 | 35 |
msp <- rbind(paste("NAME: Variable", spectra), |
36 |
- paste("COMMENT:", round(object@peaksrt[[idx]][spectra], digits = 2), "min"), |
|
37 |
- paste("FORMULA:"), paste("MW:"), paste("CAS:"), paste("SYNONYM:"), |
|
36 |
+ paste("COMMENT:", round(object@peaksrt[[idx]][spectra], digits = 2), "min"), |
|
37 |
+ paste("FORMULA:"), paste("MW:"), paste("CAS:"), paste("SYNONYM:"), |
|
38 | 38 |
paste("Num Peaks:", length(spec)), matrix(unlist(spec), nrow = length(unlist(spec)), ncol = 1)) |
39 | 39 |
|
40 | 40 |
write(msp, file = paste0(spectra,".msp"), sep = "\n") |
41 |
- |
|
42 |
-} |
|
43 | 41 |
|
42 |
+} |
44 | 43 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,195 @@ |
1 |
+##' Read the mass spectra from an external msp file |
|
2 |
+##' |
|
3 |
+##' Read the mass spectra from an external file in msp format. The format is |
|
4 |
+##' used in NIST search library database. |
|
5 |
+##' @title importSpec |
|
6 |
+##' @param file a .msp file from NIST search library database |
|
7 |
+##' @return list conaining the mass spctra |
|
8 |
+##' @author [email protected] |
|
9 |
+##' @export |
|
10 |
+importSpec <- function(file){ |
|
11 |
+ ## read msp lib |
|
12 |
+ lib <- scan(file, what = "", sep = "\n", quiet = TRUE) |
|
13 |
+ ## separate each mass spec |
|
14 |
+ starts <- which(regexpr("[Nn][Aa][Mm][Ee]:", lib) == 1) |
|
15 |
+ ends <- c(starts[-1] - 1, length(lib)) |
|
16 |
+ ## loop to extract the mass spec into a list |
|
17 |
+ list.spec <- lapply(1:length(starts), function(z){ |
|
18 |
+ ## meta data |
|
19 |
+# browser() |
|
20 |
+ comp <- lib[starts[z]:ends[z]] |
|
21 |
+ numPeaks.idx <- which(regexpr("[Nn][Uu][Mm] [Pp][Ee][Aa][Kk][Ss]:", comp) == 1) |
|
22 |
+ metaData <- comp[1:numPeaks.idx - 1] |
|
23 |
+ md <- strsplit(metaData, split = ": ") |
|
24 |
+ md1 <- sapply(md, "[[", 1) |
|
25 |
+ md2 <- sapply(md, "[", 2) |
|
26 |
+ metaData.list <- setNames(as.list(md2), md1) |
|
27 |
+ ## mass spec |
|
28 |
+ nlines <- length(comp) |
|
29 |
+ npeaks <- as.numeric(strsplit(comp[numPeaks.idx], ":")[[1]][2]) |
|
30 |
+ peaks.idx <- (numPeaks.idx + 1):nlines |
|
31 |
+ pks <- gsub("^ +", "", unlist(strsplit(comp[peaks.idx], ";"))) |
|
32 |
+ ## il separatore potrebbe essere anche (), va trovata una soluzione |
|
33 |
+ pks.2 <- gsub("^ +", "", unlist(strsplit(comp[peaks.idx], "[()]"))) |
|
34 |
+gsub("\"", "", pks.2) |
|
35 |
+ |
|
36 |
+ pks <- pks[pks != ""] |
|
37 |
+ if (length(pks) != npeaks) |
|
38 |
+ { |
|
39 |
+ stop("Not the right number of peaks in compound", metaData.list$NAME) |
|
40 |
+ } |
|
41 |
+ ## error due to the presence of tab \t instead of a blank space |
|
42 |
+ if(length(grep(pattern = "\t", pks)) > 0) |
|
43 |
+ { |
|
44 |
+ pklst <- strsplit(pks, "\t") |
|
45 |
+ pklst <- lapply(pklst, function(x) x[x != ""]) |
|
46 |
+ } |
|
47 |
+ else if(length(grep(pattern = "\\s", pks)) > 0) |
|
48 |
+ { |
|
49 |
+ pklst <- strsplit(pks, " ") |
|
50 |
+ pklst <- lapply(pklst, function(x) x[x != ""]) |
|
51 |
+ } |
|
52 |
+ else |
|
53 |
+ { |
|
54 |
+ cat("Some formatting errors in the msp library \n") |
|
55 |
+ } |
|
56 |
+ |
|
57 |
+ mz <- as.numeric(sapply(pklst, "[[", 1)) |
|
58 |
+ mz <- round(mz) |
|
59 |
+ int <- as.numeric(sapply(pklst, "[[", 2)) # error |
|
60 |
+ ## |
|
61 |
+ finaltab <- matrix(c(mz, int), ncol = 2) |
|
62 |
+ |
|
63 |
+ if (any(table(mz) > 1)) |
|
64 |
+ { |
|
65 |
+ warning("Duplicate mass in compound ", metaData.list$NAME, |
|
66 |
+ " (CAS ", metaData.list$NAME, ")... summing up intensities") |
|
67 |
+ finaltab <- aggregate(finaltab[,2], |
|
68 |
+ by = list(finaltab[,1]), |
|
69 |
+ FUN = sum) |
|
70 |
+ } |
|
71 |
+ colnames(finaltab) <- c("mz", "intensity") |
|
72 |
+ c(metaData.list, list(spec = finaltab)) |
|
73 |
+ } |
|
74 |
+ ) |
|
75 |
+ return(list.spec) |
|
76 |
+} |
|
77 |
+ |
|
78 |
+ |
|
79 |
+##' Calculate the distance between a reference mass spectrum |
|
80 |
+##' |
|
81 |
+##' Calculate the distance between a reference mass spectrum and one from the |
|
82 |
+##' sample |
|
83 |
+##' @title matchSpec |
|
84 |
+##' @param spec1 reference mass spectrum |
|
85 |
+##' @param outList the return of \code{\link{gatherInfo}} |
|
86 |
+##' @param whichSpec the entry number of outList |
|
87 |
+##' @return the distance between the reference mass spectrum and the others |
|
88 |
+##' @author Riccardo Romoli |
|
89 |
+##' @export |
|
90 |
+matchSpec <- function(spec1, outList, whichSpec){ |
|
91 |
+ ## if(whichSpec == 143) browser() |
|
92 |
+ ## first get the average spec from the gatherList |
|
93 |
+ averageInt <- |
|
94 |
+ apply(outList[[whichSpec]]$data, MARGIN = 1, FUN = mean, na.rm = TRUE) |
|
95 |
+ ## outList[[whichSpec]]$data[,1:49] # si ma perch49?!? |
|
96 |
+ ## normalize the intensity |
|
97 |
+ normInt <- sapply(averageInt, function(x){ |
|
98 |
+ i <- which.max(averageInt) |
|
99 |
+ (100*x)/averageInt[i] |
|
100 |
+ } |
|
101 |
+ ) |
|
102 |
+ ## combine mz and normalized intensity |
|
103 |
+ pspec <- matrix(c(outList[[whichSpec]]$mz, normInt), ncol = 2) |
|
104 |
+ colnames(pspec) <- c("mz", "intensity") |
|
105 |
+ libSpec <- spec1$spec |
|
106 |
+ ## merge the spectra to the same mz range and remove the NA |
|
107 |
+ mztomerge <- data.frame(mz = min(c(min(libSpec[,"mz"], na.rm = TRUE), |
|
108 |
+ min(pspec[,"mz"], na.rm = TRUE))) : |
|
109 |
+ max(c(max(libSpec[,"mz"], na.rm = TRUE), |
|
110 |
+ max(pspec[,"mz"], na.rm = TRUE))) |
|
111 |
+ ) |
|
112 |
+ pspec.merged <- merge(pspec, mztomerge, by = "mz", all = TRUE) |
|
113 |
+ libSpec.merged <- merge(libSpec, mztomerge, by = "mz", all = TRUE) |
|
114 |
+ pspec.merged[is.na(pspec.merged)] <- 0 |
|
115 |
+ libSpec.merged[is.na(libSpec.merged)] <- 0 |
|
116 |
+ ## calculate the distance among the spectra |
|
117 |
+ distance <- normDotProduct(as.matrix(pspec.merged[,"intensity"]), |
|
118 |
+ as.matrix(libSpec.merged[,"intensity"])) |
|
119 |
+ return(distance) |
|
120 |
+} |
|
121 |
+ |
|
122 |
+ |
|
123 |
+##' The function calculate the distance between each mas spec in the msp file |
|
124 |
+##' and the aligned mass spec from each sampe |
|
125 |
+##' |
|
126 |
+##' Return the distance matrix |
|
127 |
+##' @title distToLib |
|
128 |
+##' @param mspLib a .msp file from NIST |
|
129 |
+##' @param outList an object from gatherInfo() |
|
130 |
+##' @return the distance matrix between the mass spec and the aligned spec |
|
131 |
+##' @author Riccardo Romoli |
|
132 |
+##' @export |
|
133 |
+distToLib <- function(mspLib, outList ){ |
|
134 |
+ mspDist <- lapply(1:length(mspLib), |
|
135 |
+ function(x) |
|
136 |
+ { |
|
137 |
+ ## trace |
|
138 |
+ cat(x, "\n") |
|
139 |
+ sapply(1:length(outList), |
|
140 |
+ function(y) |
|
141 |
+ { |
|
142 |
+ ## cat("y is = ", y, "\n") # for debug only |
|
143 |
+ matchSpec(mspLib[[x]], outList, |
|
144 |
+ whichSpec = y) |
|
145 |
+ } |
|
146 |
+ ) |
|
147 |
+ }) |
|
148 |
+ mtx.dist <- do.call(rbind, mspDist) |
|
149 |
+ rownames(mtx.dist) <- sapply(mspLib, "[[", 1) |
|
150 |
+ colnames(mtx.dist) <- sapply(1:length(outList), |
|
151 |
+ function(x) |
|
152 |
+ { |
|
153 |
+ paste0("outListFold", x) |
|
154 |
+ } |
|
155 |
+ ) |
|
156 |
+ return(mtx.dist) |
|
157 |
+} |
|
158 |
+ |
|
159 |
+ |
|
160 |
+##' The head-to-tail-plot for the mass spectra |
|
161 |
+##' |
|
162 |
+##' Head-to-tail-plot to visually compare the mass spectra |
|
163 |
+##' @title Head to tail plot |
|
164 |
+##' @param specFromLib the mass spectra obtained from the .msp file |
|
165 |
+##' @param specFromList the mass spectra obtained from \code{\link{gatherInfo}} |
|
166 |
+##' @return the plot |
|
167 |
+##' @author Riccardo Romoli |
|
168 |
+##' @export |
|
169 |
+headToTailPlot <- function(specFromLib, specFromList){ |
|
170 |
+ libSpec <- specFromLib$spec |
|
171 |
+ pspec <- specFromList |
|
172 |
+ ## get average and normalized intensity of the mass spec |
|
173 |
+ averageInt <- apply(pspec$data, MARGIN = 1, FUN = mean, na.rm = TRUE) |
|
174 |
+ ## normalize the intensity |
|
175 |
+ normInt <- sapply(averageInt, function(x){ |
|
176 |
+ i <- which.max(averageInt) |
|
177 |
+ (1000*x)/averageInt[i] |
|
178 |
+ } |
|
179 |
+ ) |
|
180 |
+ pspec.av <- data.frame(mz = pspec$mz, intensity = normInt) |
|
181 |
+ ## merge the spectra to the same mz range and remove the NA |
|
182 |
+ mztomerge <- data.frame(mz = min(c(min(libSpec[,"mz"], na.rm = TRUE), |
|
183 |
+ min(pspec.av[,"mz"], na.rm = TRUE))) : |
|
184 |
+ max(c(max(libSpec[,"mz"], na.rm = TRUE), |
|
185 |
+ max(pspec.av[,"mz"], na.rm = TRUE))) |
|
186 |
+ ) |
|
187 |
+ pspec.merged <- merge(pspec.av, mztomerge, by = "mz", all = TRUE) |
|
188 |
+ libSpec.merged <- merge(libSpec, mztomerge, by = "mz", all = TRUE) |
|
189 |
+ pspec.merged[is.na(pspec.merged)] <- 0 |
|
190 |
+ libSpec.merged[is.na(libSpec.merged)] <- 0 |
|
191 |
+ ## now the plot |
|
192 |
+ plot(pspec.merged, type = "h", ylim = c(-1000, 1000), main = "Head to Tail Plot") |
|
193 |
+ points(libSpec.merged[,"mz"], y = -libSpec.merged[,"intensity"]*10, col = 2, |
|
194 |
+ type = "h") |
|
195 |
+} |
... | ... |
@@ -118,7 +118,7 @@ normDotProduct <- function (x1, x2, t1=NULL, t2=NULL, df=max(ncol(x1), ncol(x2)) |
118 | 118 |
#' Retention Time Penalized Normalized Dot Product |
119 | 119 |
#' |
120 | 120 |
#' This function calculates the similarity of all pairs of peaks from 2 |
121 |
-#' samples, using the spectra similarity and the rretention time differencies |
|
121 |
+#' samples, using the spectra similarity and the retention time differencies |
|
122 | 122 |
#' |
123 | 123 |
#' Computes the normalized dot product between every pair of peak vectors in |
124 | 124 |
#' the retention time window (\code{D})and returns a similarity matrix. |
... | ... |
@@ -136,65 +136,64 @@ normDotProduct <- function (x1, x2, t1=NULL, t2=NULL, df=max(ncol(x1), ncol(x2)) |
136 | 136 |
#' |
137 | 137 |
#' ## Not Run |
138 | 138 |
#' require(gcspikelite) |
139 |
-#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/") |
|
140 |
-#' cdfFiles <- dir(gcmsPath,"CDF", full=TRUE) |
|
141 |
-#' |
|
142 |
-#' # read data, peak detection results |
|
143 |
-#' pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,10.5)) |
|
144 |
-#' pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'), |
|
145 |
-#' snthresh=3, fwhm=10, step=0.1, steps=2, mzdiff=0.5, |
|
146 |
-#' sleep=0) |
|
139 |
+#' files <- list.files(path = paste(find.package("gcspikelite"), "data", |
|
140 |
+#' sep = "/"),"CDF", full = TRUE) |
|
141 |
+#' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5)) |
|
142 |
+#' ## create settings object |
|
143 |
+#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5) |
|
144 |
+#' cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40), |
|
145 |
+#' prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0, |
|
146 |
+#' extendLengthMSW = TRUE, mzCenterFun = "wMean") |
|
147 |
+#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100, |
|
148 |
+#' multipleMatchedFilter = FALSE, multipleMatchedFilterParam = |
|
149 |
+#' list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1)) |
|
150 |
+#' data |
|
147 | 151 |
#' ## review peak picking |
148 |
-#' plotChrom(pd, rtrange=c(7.5, 10.5), runs=c(1:3)) |
|
152 |
+#' plotChrom(data, rtrange = c(7.5, 10.5), runs = c(1:2)) |
|
149 | 153 |
#' |
150 |
-#' r <- ndpRT(pd@peaksdata[[1]], pd@peaksdata[[2]], |
|
151 |
-#' pd@peaksrt[[1]], pd@peaksrt[[2]], D=50) |
|
154 |
+#' r <- ndpRT(data@peaksdata[[1]], data@peaksdata[[2]], |
|
155 |
+#' data@peaksrt[[1]], data@peaksrt[[2]], D = 50) |
|
152 | 156 |
#' ## End (Not Run) |
153 | 157 |
#' |
154 | 158 |
#' @export ndpRT |
155 |
-ndpRT <- function(s1, s2, t1, t2, D){ |
|
156 |
- |
|
159 |
+ndpRT <- function(s1, s2, t1, t2, D) { |
|
157 | 160 |
Normalize <- function(j){ |
158 |
- n <- apply(j, 2, function(k){ |
|
161 |
+ n <- apply(j, 2, function(k) { |
|
159 | 162 |
m <- k[which.max(k)] |
160 |
- norm <- k/m*100 |
|
163 |
+ norm <- k / m * 100 |
|
161 | 164 |
}) |
162 | 165 |
return(n) |
163 | 166 |
} |
164 |
- |
|
165 |
- scoring <- function(s1, s2, t1, t2, D){ |
|
166 |
- angle <- function(s1, s2){ |
|
167 |
- theta <- acos(sum(s1*s2) / (sqrt(sum(s1 * s1)) * sqrt(sum(s2 * s2)))) |
|
168 |
- theta <- 1-theta |
|
169 |
- if(theta < 0) |
|
170 |
- { |
|
167 |
+ scoring <- function(s1, s2, t1, t2, D) { |
|
168 |
+ angle <- function(s1, s2) { |
|
169 |
+ theta <- acos( |
|
170 |
+ sum(s1 * s2) / (sqrt(sum(s1 * s1)) * sqrt(sum(s2 * s2))) |
|
171 |
+ ) |
|
172 |
+ theta <- 1 - theta |
|
173 |
+ if(theta < 0) { |
|
171 | 174 |
theta <- 0 |
172 |
- } |
|
173 |
- return(theta) |
|
175 |
+ } |
|
176 |
+ return(theta) |
|
174 | 177 |
} |
175 |
- |
|
176 |
- rtPen <- function(t1, t2, D){ |
|
178 |
+ rtPen <- function(t1, t2, D) { |
|
177 | 179 |
## D espresso in secondi |
178 |
- t1 <- t1/60 # trasformo in secondi |
|
179 |
- t2 <- t2/60 # trasformo in secondi |
|
180 |
- srt <- exp(-(((t1-t2)^2) / D^2)) # da articolo MR, modificato |
|
180 |
+ t1 <- t1 / 60 # trasformo in secondi |
|
181 |
+ t2 <- t2 / 60 # trasformo in secondi |
|
182 |
+ srt <- exp(- (((t1 - t2)^2) / D^2)) # da articolo MR, modificato |
|
181 | 183 |
# era 2*D^2 |
182 | 184 |
return(srt) |
183 | 185 |
} |
184 |
- |
|
185 | 186 |
score <- angle(s1, s2) * rtPen(t1, t2, D) |
186 | 187 |
return(score) |
187 | 188 |
} |
188 |
- |
|
189 | 189 |
s1 <- Normalize(s1) |
190 | 190 |
s2 <- Normalize(s2) |
191 |
- |
|
192 |
- res <- matrix(0, nrow=ncol(s1), ncol=ncol(s2)) |
|
193 |
- for(i in 1:ncol(s1)){ |
|
194 |
- for(j in 1:ncol(s2)){ |
|
195 |
- res[i,j] <- scoring(s1[,i], s2[,j], t1[i], t2[j], D=D) |
|
191 |
+ res <- matrix(0, nrow = ncol(s1), ncol = ncol(s2)) |
|
192 |
+ for (i in 1:ncol(s1)) { |
|
193 |
+ for (j in 1:ncol(s2)) { |
|
194 |
+ res[i, j] <- scoring(s1[, i], s2[, j], t1[i], t2[j], D = D) |
|
196 | 195 |
} |
197 |
- } |
|
196 |
+ } |
|
198 | 197 |
return(res) |
199 | 198 |
} |
200 | 199 |
|
... | ... |
@@ -226,55 +225,58 @@ ndpRT <- function(s1, s2, t1, t2, D){ |
226 | 225 |
#' |
227 | 226 |
#' ## Not Run |
228 | 227 |
#' require(gcspikelite) |
229 |
-#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/") |
|
230 |
-#' cdfFiles <- dir(gcmsPath,"CDF", full=TRUE) |
|
231 |
-#' ## read data, peak detection results |
|
232 |
-#' pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,10.5)) |
|
233 |
-#' pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'), |
|
234 |
-#' snthresh=3, fwhm=10, step=0.1, steps=2, mzdiff=0.5, |
|
235 |
-#' sleep=0) |
|
228 |
+#' files <- list.files(path = paste(find.package("gcspikelite"), "data", |
|
229 |
+#' sep = "/"),"CDF", full = TRUE) |
|
230 |
+#' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5)) |
|
231 |
+#' ## create settings object |
|
232 |
+#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5) |
|
233 |
+#' cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40), |
|
234 |
+#' prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0, |
|
235 |
+#' extendLengthMSW = TRUE, mzCenterFun = "wMean") |
|
236 |
+#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100, |
|
237 |
+#' multipleMatchedFilter = FALSE, multipleMatchedFilterParam = |
|
238 |
+#' list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1)) |
|
239 |
+#' data |
|
236 | 240 |
#' ## review peak picking |
237 |
-#' plotChrom(pd, rtrange=c(7.5, 10.5), runs=c(1:3)) |
|
241 |
+#' plotChrom(data, rtrange=c(7.5, 10.5), runs=c(1:2)) |
|
238 | 242 |
#' |
239 |
-#' r <- corPrt(pd@peaksdata[[1]], pd@peaksdata[[2]], |
|
240 |
-#' pd@peaksrt[[1]], pd@peaksrt[[2]], D=50, penality=0.2) |
|
243 |
+#' r <- corPrt(data@peaksdata[[1]], data@peaksdata[[2]], |
|
244 |
+#' data@peaksrt[[1]], data@peaksrt[[2]], D = 50, penality = 0.2) |
|
241 | 245 |
#' ## End (Not Run) |
242 | 246 |
#' |
243 | 247 |
#' @importFrom stats complete.cases |
244 | 248 |
#' @export corPrt |
245 |
-corPrt <- function(d1, d2, t1, t2, D, penality=0.2){ |
|
249 |
+corPrt <- function(d1, d2, t1, t2, D, penality = 0.2) { |
|
246 | 250 |
D <- as.numeric(D) # time window in second |
247 | 251 |
pn <- as.numeric(penality)# penality if out of time window |
248 |
- pearson <- function(x,y){ |
|
252 |
+ pearson <- function(x,y) { |
|
249 | 253 |
size <- length(x) |
250 |
- cfun <- .C("pearson", size=as.integer(size), x=as.double(x), |
|
251 |
- y=as.double(y), result=double(1), PACKAGE='flagme') |
|
254 |
+ cfun <- .C("pearson", size = as.integer(size), x = as.double(x), |
|
255 |
+ y = as.double(y), result = double(1), PACKAGE = 'flagme') |
|
252 | 256 |
return(cfun[["result"]]) |
253 | 257 |
} |
254 |
- Normalize <- function(j){ |
|
255 |
- n <- apply(j, 2, function(k){ |
|
258 |
+ Normalize <- function(j) { |
|
259 |
+ n <- apply(j, 2, function(k) { |
|
256 | 260 |
m <- k[which.max(k)] |
257 |
- norm <- k/m*100 |
|
261 |
+ norm <- k / m * 100 |
|
258 | 262 |
}) |
259 | 263 |
} |
260 | 264 |
Rank <- function(u) { |
261 |
- if (length(u) == 0L) |
|
265 |
+ if (length(u) == 0L) |
|
262 | 266 |
u |
263 | 267 |
else if (is.matrix(u)) { |
264 |
- if (nrow(u) > 1L) |
|
265 |
- apply(u, 2L, rank, na.last="keep") |
|
268 |
+ if (nrow(u) > 1L) |
|
269 |
+ apply(u, 2L, rank, na.last = "keep") |
|
266 | 270 |
else row(u) |
267 | 271 |
} |
268 |
- else rank(u, na.last="keep") |
|
272 |
+ else rank(u, na.last = "keep") |
|
269 | 273 |
} |
270 |
- # |
|
271 | 274 |
x <- Normalize(d1) |
272 | 275 |
y <- Normalize(d2) |
273 |
- |
|
274 | 276 |
## method <- c("pearson", "kendall", "spearman") |
275 | 277 |
ncx <- ncol(x) |
276 | 278 |
ncy <- ncol(y) |
277 |
- r <- matrix(0, nrow=ncx, ncol=ncy) |
|
279 |
+ r <- matrix(0, nrow = ncx, ncol = ncy) |
|
278 | 280 |
for (i in seq_len(ncx)) { |
279 | 281 |
for (j in seq_len(ncy)) { |
280 | 282 |
x2 <- x[, i] |
... | ... |
@@ -283,24 +285,20 @@ corPrt <- function(d1, d2, t1, t2, D, penality=0.2){ |
283 | 285 |
x2 <- rank(x2[ok]) |
284 | 286 |
y2 <- rank(y2[ok]) |
285 | 287 |
## insert rt penality in seconds |
286 |
- rtDiff <- t1[i]*60 - t2[j]*60 # retention time in seconds |
|
288 |
+ rtDiff <- t1[i] * 60 - t2[j] * 60 # retention time in seconds |
|
287 | 289 |
rtDiff <- abs(rtDiff) |
288 | 290 |
r[i, j] <- if (any(ok)) |
289 |
- if(rtDiff <= D) |
|
291 |
+ if (rtDiff <= D) |
|
290 | 292 |
pearson(x2, y2) |
291 | 293 |
else |
292 | 294 |
pearson(x2, y2) - pn |
293 | 295 |
else 0 |
294 | 296 |
} |
295 | 297 |
} |
296 |
- |
|
297 |
- r <- apply(r, MARGIN=c(1,2), function(x){ |
|
298 |
- if(x < 0.2) |
|
299 |
- { |
|
298 |
+ r <- apply(r, MARGIN = c(1, 2), function(x) { |
|
299 |
+ if (x < 0.2) { |
|
300 | 300 |
x <- 0 |
301 |
- } |
|
302 |
- else |
|
303 |
- { |
|
301 |
+ } else { |
|
304 | 302 |
x <- x |
305 | 303 |
} |
306 | 304 |
}) |
... | ... |
@@ -138,36 +138,39 @@ setMethod("show","peaksAlignment", |
138 | 138 |
#' ## see clusterAlignment, it calls peaksAlignment |
139 | 139 |
#' |
140 | 140 |
#' ## Not Run: |
141 |
-#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/") |
|
142 |
-#' cdfFiles <- dir(gcmsPath,"CDF", full=TRUE) |
|
143 |
-#' |
|
144 |
-#' # read data, peak detection results |
|
145 |
-#' pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,10.5)) |
|
146 |
-#' pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'), |
|
147 |
-#' snthresh=3, fwhm=10, step=0.1, steps=2, mzdiff=0.5, |
|
148 |
-#' sleep=0) |
|
149 |
-#' ## review peak picking |
|
150 |
-#' plotChrom(pd, rtrange=c(7.5, 10.5), runs=c(1:3)) |
|
141 |
+#' files <- list.files(path = paste(find.package("gcspikelite"), "data", |
|
142 |
+#' sep = "/"),"CDF", full = TRUE) |
|
143 |
+#' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5)) |
|
144 |
+#' ## create settings object |
|
145 |
+#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5) |
|
146 |
+#' cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40), |
|
147 |
+#' prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0, |
|
148 |
+#' extendLengthMSW = TRUE, mzCenterFun = "wMean") |
|
149 |
+#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100, |
|
150 |
+#' multipleMatchedFilter = FALSE, multipleMatchedFilterParam = |
|
151 |
+#' list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1)) |
|
152 |
+#' data |
|
153 |
+#' plotChrom(data, rtrange=c(7.5, 10.5), runs=c(1:2)) |
|
151 | 154 |
#' |
152 | 155 |
#' ## align two chromatogram |
153 |
-#' pA <- peaksAlignment(pd@peaksdata[[1]], pd@peaksdata[[2]], |
|
154 |
-#' pd@peaksrt[[1]], pd@peaksrt[[2]], D=50, |
|
155 |
-#' metric=3, compress=FALSE, type=2, penality=0.2) |
|
156 |
+#' pA <- peaksAlignment(data@peaksdata[[1]], data@peaksdata[[2]], |
|
157 |
+#' data@peaksrt[[1]], data@peaksrt[[2]], D = 50, |
|
158 |
+#' metric = 3, compress = FALSE, type = 2, penality = 0.2) |
|
156 | 159 |
#' |
157 | 160 |
#' plotAlignment(pA) |
158 | 161 |
#' pA@v$match |
159 | 162 |
#' |
160 | 163 |
#' par(mfrow=c(2,1)) |
161 |
-#' plot(pd@peaksdata[[1]][,15], type='h', main=paste(pd@peaksrt[[1]][[15]])) |
|
162 |
-#' plot(pd@peaksdata[[2]][,17], type='h', |
|
163 |
-#' main=paste(pd@peaksrt[[2]][[17]])) |
|
164 |
+#' plot(data@peaksdata[[1]][,15], type = 'h', main = paste(data@peaksrt[[1]][[15]])) |
|
165 |
+#' plot(data@peaksdata[[2]][,17], type = 'h', |
|
166 |
+#' main = paste(data@peaksrt[[2]][[17]])) |
|
164 | 167 |
#' ## End (Not Run) |
165 | 168 |
#' |
166 | 169 |
#' @export |
167 |
-peaksAlignment <- function(d1, d2, t1, t2, gap=0.5, D=50, |
|
168 |
- timedf=NULL, df=30, verbose=TRUE, |
|
169 |
- usePeaks=TRUE, compress=TRUE, metric=2, |
|
170 |
- type=2, penality=0.2){ |
|
170 |
+peaksAlignment <- function(d1, d2, t1, t2, gap = 0.5, D = 50, |
|
171 |
+ timedf = NULL, df = 30, verbose = TRUE, |
|
172 |
+ usePeaks = TRUE, compress = TRUE, metric = 2, |
|
173 |
+ type = 2, penality = 0.2) { |
|
171 | 174 |
|
172 | 175 |
## r <- switch(metric, |
173 | 176 |
## normDotProduct(d1,d2,t1,t2,D=D, |
... | ... |
@@ -190,48 +193,47 @@ peaksAlignment <- function(d1, d2, t1, t2, gap=0.5, D=50, |
190 | 193 |
## D <- D/100 |
191 | 194 |
## } |
192 | 195 |
r <- switch(metric, |
193 |
- normDotProduct(d1, d2, t1, t2, D=D, |
|
194 |
- df=df+abs(ncol(d1)-ncol(d2)), |
|
195 |
- timedf=timedf, verbose=verbose), |
|
196 |
- ndpRT(s1=d1, s2=d2, t1, t2, D=D), |
|
197 |
- corPrt(d1, d2, t1, t2, D=D, penality=penality) |
|
196 |
+ normDotProduct(d1, d2, t1, t2, D = D, |
|
197 |
+ df = df + abs(ncol(d1) - ncol(d2)), |
|
198 |
+ timedf = timedf, verbose = verbose), |
|
199 |
+ ndpRT(s1 = d1, s2 = d2, t1, t2, D = D), |
|
200 |
+ corPrt(d1, d2, t1, t2, D = D, penality = penality) |
|
198 | 201 |
) |
199 | 202 |
|
200 | 203 |
r[is.nan(r)] <- 1 ## remove NaN |
201 | 204 |
|
202 |
- if(type == 1) |
|
203 |
- { |
|
205 |
+ if (type == 1) { |
|
204 | 206 |
if(verbose) |
205 | 207 |
cat("[peaksAlignment] Comparing", ncol(d1), "peaks to", |
206 | 208 |
ncol(d2), "peaks -- gap=", gap, "D=", D, ', metric=', |
207 | 209 |
metric, ', type=', type, "\n") |
208 |
- v <- dp(r, gap=gap, verbose=verbose) # dynamic programming |
|
210 |
+ v <- dp(r, gap = gap, verbose = verbose) # dynamic programming |
|
209 | 211 |
} |
210 | 212 |
|
211 |
- if(type == 2) |
|
212 |
- { |
|
213 |
+ if (type == 2) { |
|
213 | 214 |
if(verbose) |
214 | 215 |
cat("[peaksAlignment] Comparing", ncol(d1), "peaks to", |
215 | 216 |
ncol(d2), "peaks -- D=", D, "seconds,", 'metric=', |
216 | 217 |
metric, ', type=', type, '\n') |
217 |
- v <- dynRT(S=r) # RR, modified to be used with normDotProduct() |
|
218 |
+ v <- dynRT(S = r) # RR, modified to be used with normDotProduct() |
|
218 | 219 |
} |
219 | 220 |
|
220 |
- v$match <- v$match[!is.na(v$match[,2]),] # remove non-matched peaks |
|
221 |
+ v$match <- v$match[!is.na(v$match[,2]), ] # remove non-matched peaks |
|
221 | 222 |
|
222 | 223 |
sim <- 0 |
223 |
- for(i in 1:nrow(v$match)){ |
|
224 |
+ for(i in 1:nrow(v$match)) { |
|
224 | 225 |
sim <- sim + r[v$match[i, 1], v$match[i, 2]]# |
225 | 226 |
} |
226 |
- sim <- sim/nrow(v$match) |
|
227 |
- if(verbose) |
|
228 |
- cat("[peaksAlignment] ", nrow(v$match), "matched. Similarity=", |
|
229 |
- sim, "\n") |
|
230 |
- object <- new("peaksAlignment", v=v, r=r, dist=sim, |
|
231 |
- compressed=FALSE, gap=gap, D=D) |
|
232 |
- if(compress){ |
|
227 |
+ sim <- sim / nrow(v$match) |
|
228 |
+ if(verbose) { |
|
229 |
+ cat("[peaksAlignment] ", nrow(v$match), "matched. Similarity=", |
|
230 |
+ sim, "\n") |
|
231 |
+ } |
|
232 |
+ object <- new("peaksAlignment", v = v, r = r, dist = sim, |
|
233 |
+ compressed = FALSE, gap = gap, D = D) |
|
234 |
+ if(compress) { |
|
233 | 235 |
compress(object) |
234 |
- }else{ |
|
236 |
+ } else { |
|
235 | 237 |
object |
236 | 238 |
} |
237 | 239 |
} |
... | ... |
@@ -267,23 +269,24 @@ peaksAlignment <- function(d1, d2, t1, t2, gap=0.5, D=50, |
267 | 269 |
#' @examples |
268 | 270 |
#' |
269 | 271 |
#' require(gcspikelite) |
270 |
-#' |
|
271 |
-#' ## paths and files |
|
272 |
-#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/") |
|
273 |
-#' cdfFiles <- dir(gcmsPath, "CDF", full=TRUE) |
|
274 |
-#' eluFiles <- dir(gcmsPath, "ELU", full=TRUE) |
|
275 |
-#' |
|
276 |
-#' ## read data |
|
277 |
-#' pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,8.5)) |
|
278 |
-#' pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'), |
|
279 |
-#' snthresh=3, fwhm=10, step=0.1, steps=2, mzdiff=0.5) |
|
280 |
-#' |
|
272 |
+#' files <- list.files(path = paste(find.package("gcspikelite"), "data", |
|
273 |
+#' sep = "/"),"CDF", full = TRUE) |
|
274 |
+#' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5)) |
|
275 |
+#' ## create settings object |
|
276 |
+#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5) |
|
277 |
+#' cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40), |
|
278 |
+#' prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0, |
|
279 |
+#' extendLengthMSW = TRUE, mzCenterFun = "wMean") |
|
280 |
+#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100, |
|
281 |
+#' multipleMatchedFilter = FALSE, multipleMatchedFilterParam = |
|
282 |
+#' list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1)) |
|
283 |
+#' data |
|
281 | 284 |
#' ## image plot |
282 |
-#' plotChrom(pd, rtrange=c(7.5,8.5), plotPeaks=TRUE, plotPeakLabels=TRUE) |
|
285 |
+#' plotChrom(data, rtrange = c(7.5,8.5), plotPeaks = TRUE, plotPeakLabels =TRUE) |
|
283 | 286 |
#' |
284 | 287 |
#' ## align two chromatogram |
285 |
-#' pA <- peaksAlignment(pd@peaksdata[[1]], pd@peaksdata[[2]], |
|
286 |
-#' pd@peaksrt[[1]], pd@peaksrt[[2]], D = 50, |
|
288 |
+#' pA <- peaksAlignment(data@peaksdata[[1]], data@peaksdata[[2]], |
|
289 |
+#' data@peaksrt[[1]], data@peaksrt[[2]], D = 50, |
|
287 | 290 |
#' compress = FALSE, type = 1, metric = 1, |
288 | 291 |
#' gap = 0.5) |
289 | 292 |
#' plotAlignment(pA) |
... | ... |
@@ -16,53 +16,52 @@ |
16 | 16 |
#' @author riccardo.romoli@@unifi.it |
17 | 17 |
#' @examples |
18 | 18 |
#' |
19 |
-#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/") |
|
20 |
-#' cdfFiles <- dir(gcmsPath,"CDF", full=TRUE) |
|
21 |
-#' # read data, peak detection results |
|
22 |
-#' pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,10.5)) |
|
23 |
-#' pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'), |
|
24 |
-#' snthresh=3, fwhm=10, step=0.1, steps=2, mzdiff=0.5, |
|
25 |
-#' sleep=0) |
|
19 |
+#' files <- list.files(path = paste(find.package("gcspikelite"), "data", |
|
20 |
+#' sep = "/"),"CDF", full = TRUE) |
|
21 |
+#' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5)) |
|
22 |
+#' ## create settings object |
|
23 |
+#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5) |
|
24 |
+#' cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40), |
|
25 |
+#' prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0, |
|
26 |
+#' extendLengthMSW = TRUE, mzCenterFun = "wMean") |
|
27 |
+#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100, |
|
28 |
+#' multipleMatchedFilter = FALSE, multipleMatchedFilterParam = |
|
29 |
+#' list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1)) |
|
30 |
+#' data |
|
26 | 31 |
#' ## align two chromatogram |
27 |
-#' pA <- peaksAlignment(pd@peaksdata[[1]], pd@peaksdata[[2]], |
|
28 |
-#' pd@peaksrt[[1]], pd@peaksrt[[2]], D=50, |
|
29 |
-#' metric=3, compress=FALSE, type=2, penality=0.2) |
|
32 |
+#' pA <- peaksAlignment(data@peaksdata[[1]], data@peaksdata[[2]], |
|
33 |
+#' data@peaksrt[[1]], data@peaksrt[[2]], D = 50, |
|
34 |
+#' metric = 3, compress = FALSE, type = 2, penality = 0.2) |
|
30 | 35 |
#' pA@v$match |
31 | 36 |
#' ## plot the mass spectra |
32 | 37 |
#' par(mfrow=c(2,1)) |
33 |
-#' plotFrags(object=pd, sample=1, specID=10) |
|
34 |
-#' plotFrags(object=pd, sample=2, specID=12) |
|
38 |
+#' plotFrags(object=data, sample=1, specID=10) |
|
39 |
+#' plotFrags(object=data, sample=2, specID=12) |
|
35 | 40 |
#' |
36 | 41 |
#' @export plotFrags |
37 |
-plotFrags <- function(object, sample, specID, normalize = TRUE, ...){ |
|
42 |
+plotFrags <- function(object, sample, specID, normalize = TRUE, ...) { |
|
38 | 43 |
sp <- as.numeric(specID) |
39 |
- ## sample <- object@files[sample] |
|
40 |
- ## i <- grep(pattern = sample, object@files) |
|
41 |
- if(length(sp) > 0){ |
|
42 |
- y <- object@peaksdata[[sample]][,sp] |
|
43 |
- if(normalize){ |
|
44 |
- y <- sapply(y, function(x){ |
|
44 |
+ if (length(sp) > 0) { |
|
45 |
+ y <- object@peaksdata[[sample]][, sp] |
|
46 |
+ if (normalize) { |
|
47 |
+ y <- sapply(y, function(x) { |
|
45 | 48 |
i <- which.max(y) |
46 |
- (100*x)/y[i]} |
|
47 |
- )} |
|
48 |
- plot(y, |
|
49 |
- type = 'h', |
|
50 |
- main = paste('Sample', names(object@peaksdata[sample]), 'RT', object@peaksrt[[sample]][sp], 'min'), |
|
51 |
- xlab = 'm/z', |
|
49 |
+ (100 * x) / y[i]} |
|
50 |
+ ) |
|
51 |
+ } |
|
52 |
+ plot(y, type = "h", main = paste("Sample", |
|
53 |
+ names(object@peaksdata[sample]), "RT", |
|
54 |
+ object@peaksrt[[sample]][sp], "min"), xlab = "m/z", |
|
52 | 55 |
ylab = |
53 |
- if(normalize) |
|
54 |
- { |
|
55 |
- 'Rel. Abundance' |
|
56 |
- } |
|
57 |
- else |
|
58 |
- { |
|
59 |
- 'Abs. Abundance' |
|
56 |
+ if (normalize) { |
|
57 |
+ "Rel. Abundance" |
|
58 |
+ } else { |
|
59 |
+ "Abs. Abundance" |
|
60 | 60 |
}, ...) |
61 |
- } |
|
62 |
- else |
|
63 |
- { |
|
64 |
- stop(paste('The spectrum is not present in the sample', object@files[sample], '\n')) |
|
65 |
- } |
|
61 |
+ } else { |
|
62 |
+ stop(paste("The spectrum is not present in the sample", |
|
63 |
+ object@files[sample], '\n')) |
|
64 |
+ } |
|
66 | 65 |
} |
67 | 66 |
|
68 | 67 |
|
... | ... |
@@ -89,41 +88,42 @@ plotFrags <- function(object, sample, specID, normalize = TRUE, ...){ |
89 | 88 |
#' @keywords gatherInfo() plot() |
90 | 89 |
#' @examples |
91 | 90 |
#' |
92 |
-#' ## Rd workflow |
|
93 |
-#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep = "/") |
|
94 |
-#' cdfFiles <- dir(gcmsPath,"CDF", full = TRUE) |
|
95 |
-#' |
|
96 |
-#' # read data, peak detection results |
|
97 |
-#' pd <- peaksDataset(cdfFiles[1:4], mz = seq(50,550), rtrange = c(7.5,10.5)) |
|
98 |
-#' pd <- addXCMSPeaks(files = cdfFiles[1:4], object = pd, peakPicking = c('mF'), |
|
99 |
-#' snthresh = 2, fwhm = 8, step = 0.5, steps = 2, mzdiff = 0.5, |
|
100 |
-#' sleep = 0) |
|
91 |
+#' files <- list.files(path = paste(find.package("gcspikelite"), "data", |
|
92 |
+#' sep = "/"),"CDF", full = TRUE) |
|
93 |
+#' data <- peaksDataset(files[1:4], mz = seq(50, 550), rtrange = c(7.5, 8.5)) |
|
94 |
+#' ## create settings object |
|
95 |
+#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5) |
|
96 |
+#' cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40), |
|
97 |
+#' prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0, |
|
98 |
+#' extendLengthMSW = TRUE, mzCenterFun = "wMean") |
|
99 |
+#' data <- addXCMSPeaks(files[1:4], data, settings = mfp, minintens = 100, |
|
100 |
+#' multipleMatchedFilter = FALSE, multipleMatchedFilterParam = |
|
101 |
+#' list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1)) |
|
102 |
+#' data |
|
101 | 103 |
#' ## multiple alignment |
102 |
-#' ma <- multipleAlignment(pd, c(1,1,2,2), wn.gap = 0.5, wn.D = 0.05, bw.gap = 0.6, |
|
103 |
-#' bw.D = 0.2, usePeaks = TRUE, filterMin = 1, df = 50, |
|
104 |
-#' verbose = TRUE, metric = 2, type = 2) |
|
104 |
+#' ma <- multipleAlignment(data, c(1,1,2,2), wn.gap = 0.5, wn.D = 0.05, |
|
105 |
+#' bw.gap = 0.6, bw.D = 0.2, usePeaks = TRUE, filterMin = 1, df = 50, |
|
106 |
+#' verbose = TRUE, metric = 2, type = 2) |
|
105 | 107 |
#' |
106 | 108 |
#' ## gather apex intensities |
107 |
-#' gip <- gatherInfo(pd, ma) |
|
109 |
+#' gip <- gatherInfo(data, ma) |
|
108 | 110 |
#' gip[[33]] |
109 |
-#' plotAlignedFrags(object = pd, outList = gip, specID = 33) |
|
111 |
+#' plotAlignedFrags(object = data, outList = gip, specID = 33) |
|
110 | 112 |
#' |
111 | 113 |
#' @export plotAlignedFrags |
112 |
-plotAlignedFrags <- function(object, outList, specID, fullRange = TRUE, normalize = TRUE, ...) |
|
113 |
-{ |
|
114 |
-specID <- as.numeric(specID) |
|
115 |
-mz <- outList[[specID]]$mz |
|
116 |
-abundance <- outList[[specID]]$data |
|
117 |
- |
|
118 |
-if(normalize) |
|
119 |
-{ |
|
120 |
- for(i in 1:ncol(abundance)) |
|
121 |
- { |
|
122 |
- if(is.na(sum(abundance[,i]))) |
|
123 |
- next |
|
124 |
- abundance[,i] <- 100 * abundance[,i] / abundance[which.max(abundance[,i]), i] |
|
125 |
- } |
|
126 |
-} |
|
114 |
+plotAlignedFrags <- function(object, outList, specID, fullRange = TRUE, |
|
115 |
+ normalize = TRUE, ...) { |
|
116 |
+ specID <- as.numeric(specID) |
|
117 |
+ mz <- outList[[specID]]$mz |
|
118 |
+ abundance <- outList[[specID]]$data |
|
119 |
+ if (normalize) { |
|
120 |
+ for(i in 1:ncol(abundance)) { |
|
121 |
+ if(is.na(sum(abundance[,i]))) |
|
122 |
+ next |
|
123 |
+ abundance[,i] <- 100 * abundance[,i] / |
|
124 |
+ abundance[which.max(abundance[,i]), i] |
|
125 |
+ } |
|
126 |
+ } |
|
127 | 127 |
|
128 | 128 |
## set the plot grid |
129 | 129 |
specnum <- table(apply(abundance, 2, sum)) # count number of massSpec different from NA |
... | ... |
@@ -1,27 +1,34 @@ |
1 |
-##' Compress method for progressiveAlignment |
|
1 |
+##' Decompress method for progressiveAlignment |
|
2 | 2 |
##' |
3 |
-##' Compress method for progressiveAlignment |
|
3 |
+##' Decompress method for progressiveAlignment |
|
4 | 4 |
##' @title Compress method for progressiveAlignment |
5 |
-##' @param object dummy |
|
6 |
-##' @param verbose dummy |
|
5 |
+##' @param object progressiveAlignment object |
|
6 |
+##' @param verbose logical |
|
7 | 7 |
##' @param ... dummy |
8 | 8 |
##' @author MR |
9 |
+##' @importFrom SparseM as.matrix |
|
10 |
+##' @import methods |
|
11 |
+##' @importFrom methods setMethod new |
|
9 | 12 |
##' @keywords internal |
10 | 13 |
setMethod("decompress", "progressiveAlignment", |
11 |
- function(object, verbose=TRUE, ...){ |
|
12 |
- if(object@merges[[1]]$compressed == FALSE) { |
|
13 |
- if(verbose) |
|
14 |
- cat("[decompress.progressiveAlignment] Already decompressed.\n") |
|
14 |
+ function(object, verbose = TRUE, ...) { |
|
15 |
+ if (object@merges[[1]]$compressed == FALSE) { |
|
16 |
+ if (verbose) |
|
17 |
+ cat("[decompress.progressiveAlignment] |
|
18 |
+ Already decompressed.\n") |
|
15 | 19 |
return(object) |
16 | 20 |
} |
17 |
- for(i in 1:length(object@merges)) { |
|
18 |
- if(object@merges[[i]]$compressed) { |
|
19 |
- object@merges[[i]]$r <- 1-as.matrix(object@merges[[i]]$r) |
|
21 |
+ for (i in 1:length(object@merges)) { |
|
22 |
+ if (object@merges[[i]]$compressed) { |
|
23 |
+ object@merges[[i]]$r <- |
|
24 |
+ 1 - as.matrix(object@merges[[i]]$r) |
|
20 | 25 |
object@merges[[i]]$compressed <- FALSE |
21 | 26 |
} |
22 | 27 |
} |
23 | 28 |
new("progressiveAlignment", object) |
24 |
- }) |
|
29 |
+ } |
|
30 |
+ ) |
|
31 |
+ |
|
25 | 32 |
|
26 | 33 |
##' Decompress method for progressiveAlignment |
27 | 34 |
##' |
... | ... |
@@ -30,24 +37,29 @@ setMethod("decompress", "progressiveAlignment", |
30 | 37 |
##' @param object dummy |
31 | 38 |
##' @param verbose dummy |
32 | 39 |
##' @param ... dummy |
33 |
-##' @keywords internal |
|
34 | 40 |
##' @author MR |
35 | 41 |
##' @importFrom SparseM as.matrix.csc |
36 |
-setMethod("compress","progressiveAlignment", |
|
37 |
- function(object,verbose=TRUE,...) { |
|
38 |
- if(object@merges[[1]]$compressed) { |
|
39 |
- if(verbose) |
|
40 |
- cat("[compress.progressiveAlignment] Already compressed.\n") |
|
42 |
+##' @import methods |
|
43 |
+##' @importFrom methods setMethod new |
|
44 |
+##' @keywords internal |
|
45 |
+setMethod("compress", "progressiveAlignment", |
|
46 |
+ function(object, verbose = TRUE, ...) { |
|
47 |
+ if (object@merges[[1]]$compressed) { |
|
48 |
+ if (verbose) |
|
49 |
+ cat("[compress.progressiveAlignment] |
|
50 |
+ Already compressed.\n") |
|
41 | 51 |
return(object) |
42 | 52 |
} |
43 |
- for(i in 1:length(object@merges)) { |
|
44 |
- if(!(object@merges[[i]]$compressed)) { |
|
45 |
- object@merges[[i]]$r <- as.matrix.csc(1-object@merges[[i]]$r) |
|
53 |
+ for (i in 1:length(object@merges)) { |
|
54 |
+ if (!(object@merges[[i]]$compressed)) { |
|
55 |
+ object@merges[[i]]$r <- |
|
56 |
+ as.matrix.csc(1 - object@merges[[i]]$r) |
|
46 | 57 |
object@merges[[i]]$compressed <- TRUE |
47 | 58 |
} |
48 | 59 |
} |
49 |
- new("progressiveAlignment",object) |
|
50 |
- }) |
|
60 |
+ new("progressiveAlignment", object) |
|
61 |
+ } |
|
62 |
+ ) |
|
51 | 63 |
|
52 | 64 |
|
53 | 65 |
##' Show method for progressiveAlignment object |
... | ... |
@@ -57,19 +69,19 @@ setMethod("compress","progressiveAlignment", |
57 | 69 |
##' @author MR |
58 | 70 |
##' @export |
59 | 71 |
##' @noRd |
60 |
-setMethod("show","progressiveAlignment", |
|
61 |
-function(object){ |
|
62 |
- cat("An object of class \"", class(object), "\"\n", sep="") |
|
72 |
+setMethod("show", "progressiveAlignment", function(object){ |
|
73 |
+ cat("An object of class \"", class(object), "\"\n", sep = "") |
|
63 | 74 |
cat(length(object@merges), "merges\n") |
64 |
-}) |
|
75 |
+} |
|
76 |
+) |
|
65 | 77 |
|
66 | 78 |
|
67 | 79 |
|
68 | 80 |
#' Data Structure for progressive alignment of many GCMS samples |
69 |
-#' |
|
81 |
+#' |
|
70 | 82 |
#' Performs a progressive peak alignment (clustalw style) of multiple GCMS peak |
71 | 83 |
#' lists |
72 |
-#' |
|
84 |
+#' |
|
73 | 85 |
#' The progressive peak alignment we implemented here for multiple GCMS peak |
74 | 86 |
#' lists is analogous to how \code{clustalw} takes a set of pairwise sequence |
75 | 87 |
#' alignments and progressively builds a multiple alignment. More details can |
... | ... |
@@ -97,237 +109,188 @@ function(object){ |
97 | 109 |
#' of Melbourne. |
98 | 110 |
#' @keywords classes |
99 | 111 |
#' @examples |
100 |
-#' |
|
112 |
+#' |
|
101 | 113 |
#' require(gcspikelite) |
102 |
-#' ## paths and files |
|
103 |
-#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/") |
|
104 |
-#' cdfFiles <- dir(gcmsPath, "CDF", full=TRUE) |
|
105 |
-#' eluFiles <- dir(gcmsPath, "ELU", full=TRUE) |
|
106 |
-#' |
|
107 |
-#' ## read data, peak detection results |
|
108 |
-#' pd <- peaksDataset(cdfFiles[1:2], mz=seq(50,550), rtrange=c(7.5,8.5)) |
|
109 |
-#' pd <- addAMDISPeaks(pd, eluFiles[1:2]) |
|
110 |
-#' |
|
111 |
-#' ca <- clusterAlignment(pd, gap=.5, D=.05, df=30, metric=1, type=1, |
|
112 |
-#' compress = FALSE) |
|
113 |
-#' pa <- progressiveAlignment(pd, ca, gap=.6, D=.1, df=30, type=1, compress = FALSE) |
|
114 |
+#' files <- list.files(path = paste(find.package("gcspikelite"), "data", |
|
115 |
+#' sep = "/"),"CDF", full = TRUE) |
|
116 |
+#' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5)) |
|
117 |
+#' ## create settings object |
|
118 |
+#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5) |
|
119 |
+#' cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40), |
|
120 |
+#' prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0, |
|
121 |
+#' extendLengthMSW = TRUE, mzCenterFun = "wMean") |
|
122 |
+#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100, |
|
123 |
+#' multipleMatchedFilter = FALSE, multipleMatchedFilterParam = |
|
124 |
+#' list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1)) |
|
125 |
+#' data |
|
126 |
+#' ca <- clusterAlignment(data, gap = 0.5, D = 0.05, df = 30, metric = 1, |
|
127 |
+#' type = 1, compress = FALSE) |
|
128 |
+#' pa <- progressiveAlignment(data, ca, gap = 0.6, D = 0.1, df = 30, |
|
129 |
+#' type = 1, compress = FALSE) |
|
114 | 130 |
#' |
115 | 131 |
#' @export |
116 | 132 |
progressiveAlignment <- function(pD, cA, D = 50, gap = 0.5, verbose = TRUE, |
117 | 133 |
usePeaks = TRUE, df = 30, compress = FALSE, |
118 |
- type=2) |
|
119 |
-{ |
|
120 |
- ## options(error = recover) |
|
134 |
+ type = 2) { |
|
121 | 135 |
m <- cA@merge |
122 | 136 |
merges <- vector("list", nrow(m)) |
123 |
- if(usePeaks) |
|
124 |
- { |
|
125 |
- pd <- pD@peaksdata |
|
126 |
- } |
|
127 |
- else |
|
128 |
- { |
|
129 |
- pd <- pD@rawdata |
|
137 |
+ if (usePeaks) { |
|
138 |
+ pd <- pD@peaksdata |
|
139 |
+ } else { |
|
140 |
+ pd <- pD@rawdata |
|
130 | 141 |
} |
131 | 142 |
pD <- NULL #? |
132 | 143 |
cA <- decompress(cA, verbose = verbose) |
133 |
- |
|
134 |
- for(i in 1:nrow(m)) |
|
135 |
- { |
|
136 |
- if(verbose) |
|
137 |
- { |
|
138 |
- cat("[progressiveAlignment] Doing merge", m[i,], "\n") |
|
139 |
- } |
|
140 |
- |
|
141 |
- ## left |
|
142 |
- if(m[i,1] < 0) |
|
143 |
- { |
|
144 |
- left.runs <- (-m[i,1]) |
|
145 |
- left.ind <- matrix(1:ncol(pd[[left.runs]]), ncol = 1) |
|
146 |
- } |
|
147 |
- else |
|
148 |
- { |
|
149 |
- left.runs <- merges[[m[i,1]]]$runs |
|
150 |
- left.ind <- merges[[m[i,1]]]$ind |
|
151 |
- } |
|
152 |
- |
|
153 |
- ## right |
|
154 |
- if(m[i,2] < 0) |
|
155 |
- { |
|
156 |
- right.runs <- abs(m[i,2]) |
|
157 |
- right.ind <- matrix(1:ncol(pd[[right.runs]]), ncol = 1) ## error |
|
158 |
- ## subscript out of bound |
|
144 |
+ |
|
145 |
+ for (i in 1:nrow(m)) { |
|
146 |
+ if (verbose) { |
|
147 |
+ cat("[progressiveAlignment] Doing merge", m[i, ], "\n") |
|
159 | 148 |
} |
160 |
- else |
|
161 |
- { |
|
162 |
- right.ind <- merges[[m[i,2]]]$ind |
|
163 |
- right.runs <- merges[[m[i,2]]]$runs |
|
149 |
+ ## left |
|
150 |
+ if (m[i, 1] < 0) { |
|
151 |
+ left.runs <- (-m[i, 1]) |
|
152 |
+ left.ind <- matrix(1:ncol(pd[[left.runs]]), ncol = 1) |
|
153 |
+ } else { |
|
154 |
+ left.runs <- merges[[m[i, 1]]]$runs |
|
155 |
+ left.ind <- merges[[m[i, 1]]]$ind |
|
164 | 156 |
} |
165 |
- |
|
166 |
- if(verbose) |
|
167 |
- { |
|
157 |
+ ## right |
|
158 |
+ if (m[i, 2] < 0) { |
|
159 |
+ right.runs <- abs(m[i, 2]) |
|
160 |
+ # subscript out of bound |
|
161 |
+ right.ind <- matrix(1:ncol(pd[[right.runs]]), ncol = 1) |
|
162 |
+ } else { |
|
163 |
+ right.ind <- merges[[m[i, 2]]]$ind |
|
164 |
+ right.runs <- merges[[m[i, 2]]]$runs |
|
165 |
+ } |
|
166 |
+ if (verbose) { |
|
168 | 167 |
cat("[progressiveAlignment] left.runs:", left.runs, ", ") |
169 | 168 |
cat("right.runs:", right.runs, "\n") |
170 | 169 |
} |
171 |
- |
|
172 |
- if(length(right.runs) == 1 & length(left.runs) == 1) |
|
173 |
- { |
|
174 |
- al <- cA@alignments[[cA@aligned[left.runs,right.runs]]] |
|
175 |
- v <- al@v |
|
176 |
- sim <- al@r |
|
177 |
- mi <- .merge.indices(nrow(left.ind), nrow(right.ind), v$match) |
|
178 |
- new.ind <- mi |
|
179 |
- } |
|
180 |
- else |
|
181 |
- { |
|
170 |
+ if (length(right.runs) == 1 & length(left.runs) == 1) { |
|
171 |
+ al <- cA@alignments[[cA@aligned[left.runs, right.runs]]] |
|
172 |
+ v <- al@v |
|
173 |
+ sim <- al@r |
|
174 |
+ mi <- .merge.indices(nrow(left.ind), nrow(right.ind), v$match) |
|
175 |
+ new.ind <- mi |
|
176 |
+ } else { |
|
182 | 177 |
sim <- matrix(0, nrow = nrow(left.ind), ncol = nrow(right.ind)) |
183 | 178 |
count <- matrix(0, nrow = nrow(left.ind), ncol = nrow(right.ind)) |
184 |
- if(verbose) |
|
185 |
- { |
|
186 |
- cat("[progressiveAlignment] (dot=50) going to", nrow(sim), ":") |
|
179 |
+ if (verbose) { |
|
180 |
+ cat("[progressiveAlignment] (dot=50) going to", nrow(sim), ":") |
|
187 | 181 |
} |
188 |
- for(r in 1:nrow(sim)) |
|
189 |
- { |
|
190 |
- if(verbose) |
|
191 |
- { |
|
192 |
- if(r %% 50 == 0) |
|
193 |
- { |
|
194 |
- cat(".") |
|
182 |
+ for (r in 1:nrow(sim)) { |
|
183 |
+ if (verbose) { |
|
184 |
+ if(r %% 50 == 0) { |
|
185 |
+ cat(".") |
|
195 | 186 |
} |
196 |
- } |
|
197 |
- for(cc in max(1, r - df) : min(r + df, ncol(sim))) |
|
198 |
- { |
|
199 |
- # cc generate subsrcipt out of bound |
|
200 |
- if(cc > dim(sim)[2]) |
|
201 |
- { |
|
202 |
- cat("\n\n", "Try to increase the df parameter to get a better alignment", "\n\n") |
|
187 |
+ } |
|
188 |
+ for (cc in max(1, r - df) : min(r + df, ncol(sim))) { |
|
189 |
+ # cc generate subsrcipt out of bound |
|
190 |
+ if (cc > dim(sim)[2]) { |
|
191 |
+ cat("\n\n", "Try to increase the df parameter to get a |
|
192 |
+ better alignment", "\n\n") |
|
203 | 193 |
# cc <- dim(sim)[2] |
204 |
- } |
|
205 |
- for(lr in 1:length(left.runs)) |
|
206 |
- { |
|
207 |
- for(rr in 1:length(right.runs)) |
|
208 |
- { |
|
209 |
- ai <- cA@aligned[left.runs[lr], right.runs[rr]] |
|
210 |
- # this.sim <- cA$alignments[[ai]]$r |
|
211 |
- lind <- left.ind[r,lr] |
|
212 |
- rind <- right.ind[cc,rr]# Error: subscript out of bounds. Solution: increase df |
|
213 |
- if(!is.na(lind) & !is.na(rind)) |
|
214 |
- { |
|
215 |
- if(left.runs[lr] < right.runs[rr]) |
|
216 |
- { |
|
217 |
- sim[r,cc] <- sim[r,cc] + cA@alignments[[ai]]@r[lind,rind] |
|
194 |
+ } |
|
195 |
+ for (lr in 1:length(left.runs)) { |
|
196 |
+ for(rr in 1:length(right.runs)) { |
|
197 |
+ ai <- cA@aligned[left.runs[lr], right.runs[rr]] |
|
198 |
+ # this.sim <- cA$alignments[[ai]]$r |
|
199 |
+ lind <- left.ind[r, lr] |
|
200 |
+ rind <- right.ind[cc, rr] |
|
201 |
+ # Error: subscript out of bounds. Solution: increase df |
|
202 |
+ if (!is.na(lind) & !is.na(rind)) { |
|
203 |
+ if(left.runs[lr] < right.runs[rr]) { |
|
204 |
+ sim[r, cc] <- sim[r, cc] + cA@alignments[[ai]]@r[lind, rind] |
|
205 |
+ } else { |
|
206 |
+ sim[r, cc] <- sim[r, cc] + cA@alignments[[ai]]@r[rind, lind] |
|
218 | 207 |
} |
219 |
- else |
|
220 |
- { |
|
221 |
- sim[r,cc] <- sim[r,cc] + cA@alignments[[ai]]@r[rind,lind] |
|
208 |
+ count[r, cc] = count[r, cc] + 1 |
|
222 | 209 |
} |
223 |
- count[r,cc] = count[r,cc] + 1 |
|
224 | 210 |
} |
211 |
+ } |
|
225 | 212 |
} |
226 | 213 |
} |
227 |
- } |
|
228 |
- } |
|
229 |
- if(verbose) |
|
230 |
- { |
|
231 |
- cat("\n") |
|
232 |
- } |
|
233 |
- |
|
234 |
- if(type == 2) # RR |
|
235 |
- { |
|
236 |
- v <- dynRT(S = sim) |
|
237 |
- v$match <- v$match[!is.na(v$match[,2]),] # remove non-matched peaks |
|
238 |
- } |
|
239 |
- if(type == 1) |
|
240 |
- { |
|
241 |
- sim[count > 0] <- sim[count > 0] / count[count > 0] |
|
242 |
- sim[sim == 0] <- 1 |
|
243 |
- v <- dp(sim, gap = gap, verbose = verbose)# |
|
244 |
- } |
|
245 |
- mi <- .merge.indices(nrow(left.ind), nrow(right.ind), v$match) |
|
246 |
- new.ind <- cbind(left.ind[mi[,1],], right.ind[mi[,2],]) |
|
247 |
- } |
|
248 |
- rownames(new.ind) <- 1:nrow(new.ind) |
|
249 |
- merges[[i]] <- list(ind = new.ind, mi = mi, runs = c(left.runs, right.runs), |
|
250 |
- left = left.runs, right = right.runs, r = sim, |
|
251 |
- compressed = FALSE) |
|
252 |
- } |
|
253 |
- |
|
214 |
+ if (verbose) { |
|
215 |
+ cat("\n") |
|
216 |
+ } |
|
217 |
+ if (type == 2) { |
|
218 |
+ v <- dynRT(S = sim) |
|
219 |
+ # remove non-matched peaks |
|
220 |
+ v$match <- v$match[!is.na(v$match[, 2]), ] |
|
221 |
+ } |
|
222 |
+ if (type == 1) { |
|
223 |
+ sim[count > 0] <- sim[count > 0] / count[count > 0] |
|
224 |
+ sim[sim == 0] <- 1 |
|
225 |
+ v <- dp(sim, gap = gap, verbose = verbose) |
|
226 |
+ } |
|
227 |
+ mi <- .merge.indices(nrow(left.ind), nrow(right.ind), v$match) |
|
228 |
+ new.ind <- cbind(left.ind[mi[, 1], ], right.ind[mi[, 2], ]) |
|
229 |
+ } |
|
230 |
+ rownames(new.ind) <- 1:nrow(new.ind) |
|
231 |
+ merges[[i]] <- list(ind = new.ind, mi = mi, runs = c(left.runs, right.runs), |
|
232 |
+ left = left.runs, right = right.runs, r = sim, compressed = FALSE) |
|
233 |
+ } |
|
254 | 234 |
cA <- NULL |
255 |
- if(verbose) |
|
256 |
- { |
|
235 |
+ if (verbose) { |
|
257 | 236 |
print(gc()) |
258 | 237 |
} |
259 | 238 |
pA <- new("progressiveAlignment", merges = merges) |
260 |
- if(compress) |
|
261 |
- { |
|
262 |
- return(compress(pA, verbose=verbose)) |
|
263 |
- } |
|
264 |
- else |
|
265 |
- { |
|
239 |
+ if (compress) { |
|
240 |
+ return(compress(pA, verbose = verbose)) |
|
241 |
+ } else { |
|
266 | 242 |
return(pA) |
267 | 243 |
} |
268 | 244 |
} |
269 | 245 |
|
270 | 246 |
|
271 | 247 |
.merge.indices <- function(nl, nr, m) { |
272 |
- lind <- cbind(1:nl, 0) |
|
273 |
- lind[lind[,1] %in% m[,1],2] <- 1 |
|
274 |
- rind <- cbind(1:nr,0) |
|
275 |
- rind[rind[,1] %in% m[,2],2] <- 1 |
|
276 |
- mg <- matrix(NA, nrow=nrow(lind)+nrow(rind)-nrow(m), ncol=2) |
|
277 |
- #print(dim(mg)) |
|
278 |
- li <- 1; ri <- 1; i <- 1 |
|
279 |
- while(i <= nrow(mg)){ |
|
280 |
- if(li > nrow(lind) & ri <= nrow(rind)) |
|
281 |
- { |
|
282 |
- mg[i,] <- c(NA,rind[ri,1]) |
|
283 |
- ri <- ri+1 |
|
284 |
- i <- i+1 |
|
285 |
- next |
|
286 |
- } |
|
287 |
- if(li <= nrow(lind) & ri > nrow(rind)) |
|
288 |
- { |
|
289 |
- mg[i,] <- c(lind[li,1], NA) |
|
290 |
- li <- li+1 |
|
291 |
- i <- i+1 |
|
292 |
- next |
|
293 |
- } |
|
294 |
- if(lind[li,2] == 1) |
|
295 |
- { |
|
296 |
- if(rind[ri,2] == 1) |
|
297 |
- { # match |
|
298 |
- mg[i,] <- c(lind[li,1], rind[ri,1]) |
|
299 |
- ri <- ri+1 |
|
300 |
- li <- li+1 |
|
301 |
- #cat("match",i,li,ri,"-",mg[i,],"\n") |
|
302 |
- } |
|
303 |
- else |
|
304 |
- { # right unmatched |
|
305 |
- mg[i,] <- c(NA, rind[ri,1]) |
|
306 |
- ri <- ri+1 |
|
307 |
- #cat("right unmatched",i,li,ri,"-",mg[i,],"\n") |
|
308 |
- } |
|
309 |
- } |
|
310 |
- else |
|
311 |
- { |
|
312 |
- if(rind[ri,2] == 1) |
|
313 |
- { # left unmatched |
|
314 |
- mg[i,] <- c(lind[li,1], NA) |
|
315 |
- li <- li+1 |
|
316 |
- #cat("left unmatched",i,li,ri,"-",mg[i,],"\n") |
|
317 |
- } |
|
318 |
- else |
|
319 |
- { # both unmatched |
|
320 |
- mg[i,] <- c(lind[li,1], NA) |
|
321 |
- #cat("both unmatched - A",i,li,ri,"-",mg[i,],"\n") |
|
322 |
- i <- i+1 |
|
323 |
- mg[i,] <- c(NA, rind[ri,1]) |
|
324 |
- li <- li+1 |
|
325 |
- ri <- ri+1 |
|
326 |
- #cat("both unmatched - B",i,li,ri,"-",mg[i,],"\n") |
|
248 |
+ lind <- cbind(1:nl, 0) |
|
249 |
+ lind[lind[, 1] %in% m[, 1], 2] <- 1 |
|
250 |
+ rind <- cbind(1:nr, 0) |
|
251 |
+ rind[rind[, 1] %in% m[, 2], 2] <- 1 |
|
252 |
+ mg <- matrix(NA, nrow = nrow(lind) + nrow(rind) - nrow(m), ncol = 2) |
|
253 |
+ li <- 1; ri <- 1; i <- 1 |
|
254 |
+ while (i <= nrow(mg)) { |
|
255 |
+ if (li > nrow(lind) & ri <= nrow(rind)) { |
|
256 |
+ mg[i, ] <- c(NA, rind[ri, 1]) |
|
257 |
+ ri <- ri + 1 |
|
258 |
+ i <- i + 1 |
|
259 |
+ next |
|
260 |
+ } |
|
261 |
+ if (li <= nrow(lind) & ri > nrow(rind)) { |
|
262 |
+ mg[i, ] <- c(lind[li, 1], NA) |
|
263 |
+ li <- li + 1 |
|
264 |
+ i <- i + 1 |
|
265 |
+ next |
|
266 |
+ } |
|
267 |
+ if (lind[li, 2] == 1) { |
|
268 |
+ if(rind[ri, 2] == 1) { # match |
|
269 |
+ mg[i, ] <- c(lind[li, 1], rind[ri, 1]) |
|
270 |
+ ri <- ri + 1 |
|
271 |
+ li <- li + 1 |
|
272 |
+ # cat("match", i, li, ri, "-", mg[i, ], "\n") |
|
273 |
+ } else { # right unmatched |
|
274 |
+ mg[i, ] <- c(NA, rind[ri, 1]) |
|
275 |
+ ri <- ri + 1 |
|
276 |
+ # cat("right unmatched", i, li, ri, "-", mg[i, ], "\n") |
|
277 |
+ } |
|
278 |
+ } else { |
|
279 |
+ if (rind[ri, 2] == 1) { # left unmatched |
|
280 |
+ mg[i, ] <- c(lind[li, 1], NA) |
|
281 |
+ li <- li + 1 |
|
282 |
+ # cat("left unmatched", i, li, ri, "-", mg[i, ], "\n") |
|
283 |
+ } else {# both unmatched |
|
284 |
+ mg[i, ] <- c(lind[li, 1], NA) |
|
285 |
+ # cat("both unmatched - A", i, li, ri, "-", mg[i, ], "\n") |
|
286 |
+ i <- i + 1 |
|
287 |
+ mg[i, ] <- c(NA, rind[ri, 1]) |
|
288 |
+ li <- li + 1 |
|
289 |
+ ri <- ri + 1 |
|
290 |
+ # cat("both unmatched - B", i, li, ri, "-", mg[i, ], "\n") |
|
327 | 291 |
} |
328 |
- } |
|
329 |
- i <- i+1 |
|
292 |
+ } |
|
293 |
+ i <- i + 1 |
|
330 | 294 |
} |
331 | 295 |
mg |
332 |
-} |
|
333 |
- |
|
296 |
+} |
|
334 | 297 |
\ No newline at end of file |
... | ... |
@@ -19,52 +19,55 @@ |
19 | 19 |
#' @examples |
20 | 20 |
#' |
21 | 21 |
#' require(gcspikelite) |
22 |
-#' # paths and files |
|
23 |
-#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep = "/") |
|
24 |
-#' cdfFiles <- dir(gcmsPath,"CDF",full=TRUE) |
|
25 |
-#' # read data, peak detection results |
|
26 |
-#' pd <- peaksDataset(cdfFiles[1:2], mz=seq(50,550), |
|
27 |
-#' rtrange=c(7.5,8.5)) |
|
28 |
-#' pd <- addXCMSPeaks(files=cdfFiles[1:2], object=pd, |
|
29 |
-#' peakPicking=c('mF'), snthresh=3, fwhm=4, |
|
30 |
-#' step=1, steps=2, mzdiff=0.5) |
|
31 |
-#' ma <- multipleAlignment(pd = pd, group = c(1,1), |
|
22 |
+#' files <- list.files(path = paste(find.package("gcspikelite"), "data", |
|
23 |
+#' sep = "/"),"CDF", full = TRUE) |
|
24 |
+#' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5)) |
|
25 |
+#' ## create settings object |
|
26 |
+#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5) |
|
27 |
+#' cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40), |
|
28 |
+#' prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0, |
|
29 |
+#' extendLengthMSW = TRUE, mzCenterFun = "wMean") |
|
30 |
+#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100, |
|
31 |
+#' multipleMatchedFilter = FALSE, multipleMatchedFilterParam = |
|
32 |
+#' list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1)) |
|
33 |
+#' data |
|
34 |
+#' ma <- multipleAlignment(pd = data, group = c(1,1), |
|
32 | 35 |
#' filterMin = 1, metric = 2, type = 2) |
33 |
-#' outList <- gatherInfo(pd, ma) |
|
34 |
-#' mtxD <- retFatMatrix(object = pd, data = outList, minFilter = 1) |
|
36 |
+#' outList <- gatherInfo(data, ma) |
|
37 |
+#' mtxD <- retFatMatrix(object = data, data = outList, minFilter = 1) |
|
35 | 38 |
#' |
36 | 39 |
#' @export retFatMatrix |
37 |
-retFatMatrix <- function (object, data, minFilter = round(length(object@files)/3*2)) |
|
38 |
-{ |
|
39 |
- a <- lapply(seq(along = data), function(x) |
|
40 |
- { |
|
40 |
+retFatMatrix <- function (object, data, |
|
41 |
+ minFilter = round(length(object@files) / 3 * 2)) { |
|
42 |
+ a <- lapply(seq(along = data), function(x) { |
|
41 | 43 |
apply(data[[x]]$data, 2, sum) |
42 |
- } |
|
43 |
- ) |
|
44 |
+ } |
|
45 |
+ ) |
|
44 | 46 |
## i nomi delle colonne equivalgono al numero del file; |
45 | 47 |
## il numero della riga equivale alla tasca della lista di gatherInfo() |
46 |
- abumtx <- do.call(rbind, a) |
|
48 |
+ abumtx <- do.call(rbind, a) |
|
47 | 49 |
abumtx <- apply(abumtx, 1, "[") |
48 | 50 |
files_to_merge <- rownames(abumtx) |
49 |
- if(length(grep(pattern = "^[1-9].", files_to_merge)) == 0) |
|
50 |
- { |
|
51 |
- files.idx <- as.numeric(sub(pattern = "^.", replacement = "", files_to_merge)) |
|
51 |
+ if (length(grep(pattern = "^[1-9].", files_to_merge)) == 0) { |
|
52 |
+ files.idx <- as.numeric(sub(pattern = "^.", replacement = "", |
|
53 |
+ files_to_merge)) |
|
52 | 54 |
} |
53 |
- if(length(grep(pattern = "^[1-9].", files_to_merge)) > 0) |
|
54 |
- { |
|
55 |
- files.idx <- as.numeric(sub(pattern = "^[1-9].", replacement = "", files_to_merge)) |
|
55 |
+ if (length(grep(pattern = "^[1-9].", files_to_merge)) > 0) { |
|
56 |
+ files.idx <- as.numeric(sub(pattern = "^[1-9].", replacement = "", |
|
57 |
+ files_to_merge)) |
|
56 | 58 |
} |
57 | 59 |
sample <- object@files[files.idx] |
58 |
- colnames(abumtx) <- sapply(1:ncol(abumtx), function(x){paste0("Feat", x)}) |
|
60 |
+ colnames(abumtx) <- sapply(1:ncol(abumtx), function(x) { |
|
61 |
+ paste0("Feat", x) |
|
62 |
+ } |
|
63 |
+ ) |
|
59 | 64 |
mf <- minFilter |
60 | 65 |
keep <- c() |
61 |
- for(g in 1:ncol(abumtx)) |
|
62 |
- { |
|
66 |
+ for (g in 1:ncol(abumtx)) { |
|
63 | 67 |
keep[g] <- sum(!is.na(abumtx[, g])) >= mf |
64 |
- } |
|
68 |
+ } |
|
65 | 69 |
|
66 | 70 |
abumtx[is.na(abumtx)] <- c(0) |
67 | 71 |
df <- cbind.data.frame(sample, abumtx[, keep]) |
68 | 72 |
return(df) |
69 |
-} |
|
70 |
- |
|
73 |
+} |
|
71 | 74 |
\ No newline at end of file |
... | ... |
@@ -4,8 +4,13 @@ |
4 | 4 |
\alias{addChromaTOFPeaks} |
5 | 5 |
\title{Add ChromaTOF peak detection results} |
6 | 6 |
\usage{ |
7 |
-addChromaTOFPeaks(object, fns = dir(, "[Tt][Xx][Tx]"), rtDivide = 60, |
|
8 |
- verbose = TRUE, ...) |
|
7 |
+addChromaTOFPeaks( |
|
8 |
+ object, |
|
9 |
+ fns = dir(, "[Tt][Xx][Tx]"), |
|
10 |
+ rtDivide = 60, |
|
11 |
+ verbose = TRUE, |
|
12 |
+ ... |
|
13 |
+) |
|
9 | 14 |
} |
10 | 15 |
\arguments{ |
11 | 16 |
\item{object}{a \code{peaksDataset} object.} |
... | ... |
@@ -2,35 +2,57 @@ |
2 | 2 |
% Please edit documentation in R/addXCMSPeaks.R |
3 | 3 |
\name{addXCMSPeaks} |
4 | 4 |
\alias{addXCMSPeaks} |
5 |
-\title{Add xcms/CAMERA peak detection results} |
|
5 |
+\title{addXCMSPeaks} |
|
6 | 6 |
\usage{ |
7 |
-addXCMSPeaks(files, object, peakPicking = c("cwt", "mF"), |
|
8 |
- perfwhm = 0.75, quick = TRUE, ...) |
|
7 |
+addXCMSPeaks( |
|
8 |
+ files, |
|
9 |
+ object, |
|
10 |
+ settings, |
|
11 |
+ rtrange = NULL, |
|
12 |
+ mzrange = NULL, |
|
13 |
+ perfwhm = 0.75, |
|
14 |
+ minintens = 100, |
|
15 |
+ minfeat = 6, |
|
16 |
+ multipleMatchedFilter = FALSE, |
|
17 |
+ multipleMatchedFilterParam = list(fwhm = c(5, 10, 20), mz_abs = 0.1, rt_abs = 3) |
|
18 |
+) |
|
9 | 19 |
} |
10 | 20 |
\arguments{ |
11 |
-\item{files}{character vector of same length as \code{object@rawdata} (user |
|
12 |
-ensures the order matches)} |
|
21 |
+\item{files}{list of chromatogram files} |
|
13 | 22 |
|
14 |
-\item{object}{a \code{peaksDataset} object.} |
|
23 |
+\item{object}{a \code{peakDataset} object} |
|
15 | 24 |
|
16 |
-\item{peakPicking}{Methods to use for peak detection. See details.} |
|
25 |
+\item{settings}{list. It conteins the settings for the peak-picking} |
|
17 | 26 |
|
18 |
-\item{perfwhm}{percentage of full width half maximum. See |
|
19 |
-CAMERA::groupFWHM() for more details} |
|
27 |
+\item{rtrange}{vector; retention time range} |
|
20 | 28 |
|
21 |
-\item{quick}{logical. See CAMERA::annotate() for more details} |
|
29 |
+\item{mzrange}{vector, mz range} |
|
22 | 30 |
|
23 |
-\item{...}{arguments passed on to \code{xcmsSet} and \code{annotate}} |
|
31 |
+\item{perfwhm}{etermines the maximal retentiontime difference of features in |
|
32 |
+one pseudospectrum.} |
|
33 |
+ |
|
34 |
+\item{minintens}{minimum ion intensity to be included into a pseudospectra} |
|
35 |
+ |
|
36 |
+\item{minfeat}{minimum number of ion to be created a pseudospectra} |
|
37 |
+ |
|
38 |
+\item{multipleMatchedFilter}{logical Try to remove redundant peaks, in |
|
39 |
+this case where there are any peaks within an absolute m/z value of 0.2 and |
|
40 |
+within 3 s for any one sample in the xcmsSet (the largest peak is kept)} |
|
41 |
+ |
|
42 |
+\item{multipleMatchedFilterParam}{list. It conteins the settings for the |
|
43 |
+peak-picking. mz_abs represent the the mz range; rt_abs represent thert range} |
|
24 | 44 |
} |
25 | 45 |
\value{ |
26 | 46 |
\code{peaksDataset} object |
27 | 47 |
} |
28 | 48 |
\description{ |
49 |
+Add xcms/CAMERA peak detection results |
|
50 |
+} |
|
51 |
+\details{ |
|
29 | 52 |
Reads the raw data using xcms, group each extracted ion according to their |
30 | 53 |
retention time using CAMERA and attaches them to an already created |
31 | 54 |
\code{peaksDataset} object |
32 |
-} |
|
33 |
-\details{ |
|
55 |
+ |
|
34 | 56 |
Repeated calls to xcmsSet and annotate to perform peak-picking and |
35 | 57 |
deconvolution. The peak detection results are added to the original |
36 | 58 |
\code{peaksDataset} object. Two peak detection alorithms are available: |
... | ... |
@@ -39,18 +61,18 @@ approach (peakPicking=c('mF')) described by Smith et al (2006). For further |
39 | 61 |
information consult the xcms package manual. |
40 | 62 |
} |
41 | 63 |
\examples{ |
42 |
- |
|
43 |
-# need access to CDF (raw data) |
|
44 |
-require(gcspikelite) |
|
45 |
-gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/") |
|
46 |
- |
|
47 |
-# full paths to file names |
|
48 |
-cdfFiles <- dir(gcmsPath, "CDF", full=TRUE) |
|
49 |
- |
|
50 |
-# create a 'peaksDataset' object and add XCMS peaks to it |
|
51 |
-pd <- peaksDataset(cdfFiles[1], mz=seq(50,550), rtrange=c(7.5,8.5)) |
|
52 |
-pd <- addXCMSPeaks(cdfFiles[1], pd, peakPicking=c('mF'), |
|
53 |
- snthresh=3, fwhm=4, step=1, steps=2, mzdiff=0.5) |
|
64 |
+files <- list.files(path = paste(find.package("gcspikelite"), "data", |
|
65 |
+ sep = "/"),"CDF", full = TRUE) |
|
66 |
+data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5)) |
|
67 |
+## create settings object |
|
68 |
+mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5) |
|
69 |
+cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40), |
|
70 |
+ prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0, |
|
71 |
+ extendLengthMSW = TRUE, mzCenterFun = "wMean") |
|
72 |
+data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100, |
|
73 |
+ multipleMatchedFilter = FALSE, multipleMatchedFilterParam = |
|
74 |
+ list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1)) |
|
75 |
+data |
|
54 | 76 |
|
55 | 77 |
} |
56 | 78 |
\seealso{ |
... | ... |
@@ -8,9 +8,22 @@ |
8 | 8 |
\alias{betweenAlignment-method} |
9 | 9 |
\title{Data Structure for "between" alignment of many GCMS samples} |
10 | 10 |
\usage{ |
11 |
-betweenAlignment(pD, cAList, pAList, impList, filterMin = 1, gap = 0.7, |
|
12 |
- D = 10, usePeaks = TRUE, df = 30, verbose = TRUE, metric = 2, |
|
13 |
- type = 2, penality = 0.2, compress = FALSE) |
|
11 |
+betweenAlignment( |
|
12 |
+ pD, |
|
13 |
+ cAList, |
|
14 |
+ pAList, |
|
15 |
+ impList, |
|
16 |
+ filterMin = 1, |
|
17 |
+ gap = 0.7, |
|
18 |
+ D = 10, |
|
19 |
+ usePeaks = TRUE, |
|
20 |
+ df = 30, |
|
21 |
+ verbose = TRUE, |
|
22 |
+ metric = 2, |
|
23 |
+ type = 2, |
|
24 |
+ penality = 0.2, |
|
25 |
+ compress = FALSE |
|
26 |
+) |
|
14 | 27 |
} |
15 | 28 |
\arguments{ |
16 | 29 |
\item{pD}{a \code{peaksDataset} object} |
... | ... |
@@ -10,8 +10,14 @@ |
10 | 10 |
\alias{plot,clusterAlignment,ANY-method} |
11 | 11 |
\title{Data Structure for a collection of all pairwise alignments of GCMS runs} |
12 | 12 |
\usage{ |
13 |
-clusterAlignment(pD, runs = 1:length(pD@rawdata), timedf = NULL, |
|
14 |
- usePeaks = TRUE, verbose = TRUE, ...) |
|
13 |
+clusterAlignment( |
|
14 |
+ pD, |
|
15 |
+ runs = 1:length(pD@rawdata), |
|
16 |
+ timedf = NULL, |
|
17 |
+ usePeaks = TRUE, |
|
18 |
+ verbose = TRUE, |
|
19 |
+ ... |
|
20 |
+) |
|
15 | 21 |
} |
16 | 22 |
\arguments{ |
17 | 23 |
\item{pD}{a \code{peaksDataset} object.} |
... | ... |
@@ -1,6 +1,5 @@ |
1 | 1 |
% Generated by roxygen2: do not edit by hand |
2 | 2 |
% Please edit documentation in R/progressiveAlignment.R |
3 |
-\docType{methods} |
|
4 | 3 |
\name{compress,progressiveAlignment-method} |
5 | 4 |
\alias{compress,progressiveAlignment-method} |
6 | 5 |
\title{Compress method for progressiveAlignment} |
... | ... |
@@ -35,18 +35,23 @@ retention time window (\code{D})and returns the similarity matrix. |
35 | 35 |
|
36 | 36 |
## Not Run |
37 | 37 |
require(gcspikelite) |
38 |
-gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/") |
|
39 |
-cdfFiles <- dir(gcmsPath,"CDF", full=TRUE) |
|
40 |
-## read data, peak detection results |
|
41 |
-pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,10.5)) |
|
42 |
-pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'), |
|
43 |
- snthresh=3, fwhm=10, step=0.1, steps=2, mzdiff=0.5, |
|
44 |
- sleep=0) |
|
38 |
+files <- list.files(path = paste(find.package("gcspikelite"), "data", |
|
39 |
+ sep = "/"),"CDF", full = TRUE) |
|
40 |
+data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5)) |
|
41 |
+## create settings object |
|
42 |
+mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5) |
|
43 |
+cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40), |
|
44 |
+ prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0, |
|
45 |
+ extendLengthMSW = TRUE, mzCenterFun = "wMean") |
|
46 |
+data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100, |
|
47 |
+ multipleMatchedFilter = FALSE, multipleMatchedFilterParam = |
|
48 |
+ list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1)) |
|
49 |
+data |
|
45 | 50 |
## review peak picking |
46 |
-plotChrom(pd, rtrange=c(7.5, 10.5), runs=c(1:3)) |
|
51 |
+plotChrom(data, rtrange=c(7.5, 10.5), runs=c(1:2)) |
|
47 | 52 |
|
48 |
-r <- corPrt(pd@peaksdata[[1]], pd@peaksdata[[2]], |
|
49 |
- pd@peaksrt[[1]], pd@peaksrt[[2]], D=50, penality=0.2) |
|
53 |
+r <- corPrt(data@peaksdata[[1]], data@peaksdata[[2]], |
|
54 |
+ data@peaksrt[[1]], data@peaksrt[[2]], D = 50, penality = 0.2) |
|
50 | 55 |
## End (Not Run) |
51 | 56 |
|
52 | 57 |
} |
53 | 58 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,29 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/addXCMSPeaks.R |
|
3 |
+\name{de_Duper} |
|
4 |
+\alias{de_Duper} |
|
5 |
+\title{deDuper} |
|
6 |
+\usage{ |
|
7 |
+de_Duper(object, mz_abs = 0.1, rt_abs = 2) |
|
8 |
+} |
|
9 |
+\arguments{ |
|
10 |
+\item{object}{xcms object} |
|
11 |
+ |
|
12 |
+\item{mz_abs}{mz range} |
|
13 |
+ |
|
14 |
+\item{rt_abs}{rt range} |
|
15 |
+} |
|
16 |
+\value{ |
|
17 |
+an object of xcms class |
|
18 |
+} |
|
19 |
+\description{ |
|
20 |
+Duplicate peak removal function |
|
21 |
+} |
|
22 |
+\details{ |
|
23 |
+Remove redundant peaks, in this case where there are any peaks within an |
|
24 |
+absolute m/z value of 0.2 and within 3 s for any one sample in the xcmsSet |
|
25 |
+(the largest peak is kept) |
|
26 |
+} |
|
27 |
+\author{ |
|
28 |
+r |
|
29 |
+} |
... | ... |
@@ -1,6 +1,5 @@ |
1 | 1 |
% Generated by roxygen2: do not edit by hand |
2 | 2 |
% Please edit documentation in R/progressiveAlignment.R |
3 |
-\docType{methods} |
|
4 | 3 |
\name{decompress,progressiveAlignment-method} |
5 | 4 |
\alias{decompress,progressiveAlignment-method} |
6 | 5 |
\title{Compress method for progressiveAlignment} |
... | ... |
@@ -8,17 +7,17 @@ |
8 | 7 |
\S4method{decompress}{progressiveAlignment}(object, verbose = TRUE, ...) |
9 | 8 |
} |
10 | 9 |
\arguments{ |
11 |
-\item{object}{dummy} |
|
10 |
+\item{object}{progressiveAlignment object} |
|
12 | 11 |
|
13 |
-\item{verbose}{dummy} |
|
12 |
+\item{verbose}{logical} |
|
14 | 13 |
|
15 | 14 |
\item{...}{dummy} |
16 | 15 |
} |
17 | 16 |
\description{ |
18 |
-Compress method for progressiveAlignment |
|
17 |
+Decompress method for progressiveAlignment |
|
19 | 18 |
} |
20 | 19 |
\details{ |
21 |
-Compress method for progressiveAlignment |
|
20 |
+Decompress method for progressiveAlignment |
|
22 | 21 |
} |
23 | 22 |
\author{ |
24 | 23 |
MR |
25 | 24 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,26 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/importSpectra.R |
|
3 |
+\name{distToLib} |
|
4 |
+\alias{distToLib} |
|
5 |
+\title{distToLib} |
|
6 |
+\usage{ |
|
7 |
+distToLib(mspLib, outList) |
|
8 |
+} |
|
9 |
+\arguments{ |
|
10 |
+\item{mspLib}{a .msp file from NIST} |
|
11 |
+ |
|
12 |
+\item{outList}{an object from gatherInfo()} |
|
13 |
+} |
|
14 |
+\value{ |
|
15 |
+the distance matrix between the mass spec and the aligned spec |
|
16 |
+} |
|
17 |
+\description{ |
|
18 |
+The function calculate the distance between each mas spec in the msp file |
|
19 |
+and the aligned mass spec from each sampe |
|
20 |
+} |
|
21 |
+\details{ |
|
22 |
+Return the distance matrix |
|
23 |
+} |
|
24 |
+\author{ |
|
25 |
+Riccardo Romoli |
|
26 |
+} |
... | ... |
@@ -23,21 +23,25 @@ the mass spectra |
23 | 23 |
\examples{ |
24 | 24 |
|
25 | 25 |
require(gcspikelite) |
26 |
-gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/") |
|
27 |
-cdfFiles <- dir(gcmsPath,"CDF", full=TRUE) |
|
28 |
-## read data, peak detection results |
|
29 |
-pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), |
|
30 |
- rtrange=c(7.5,10.5)) |
|
31 |
-pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, |
|
32 |
- peakPicking=c('mF'),snthresh=3, fwhm=10, step=0.1, steps=2, |
|
33 |
- mzdiff=0.5, sleep=0) |
|
26 |
+files <- list.files(path = paste(find.package("gcspikelite"), "data", |
|
27 |
+ sep = "/"),"CDF", full = TRUE) |
|
28 |
+data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5)) |
|
29 |
+## create settings object |
|
30 |
+mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5) |
|
31 |
+cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40), |
|
32 |
+ prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0, |
|
33 |
+ extendLengthMSW = TRUE, mzCenterFun = "wMean") |
|
34 |
+data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100, |
|
35 |
+ multipleMatchedFilter = FALSE, multipleMatchedFilterParam = |
|
36 |
+ list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1)) |
|
37 |
+data |
|
34 | 38 |
## review peak picking |
35 |
-plotChrom(pd, rtrange=c(7.5, 10.5), runs=c(1:3)) |
|
39 |
+plotChrom(data, rtrange=c(7.5, 10.5), runs=c(1:2)) |
|
36 | 40 |
## similarity |
37 |
-r <- ndpRT(pd@peaksdata[[1]], pd@peaksdata[[2]], pd@peaksrt[[1]], |
|
38 |
- pd@peaksrt[[2]], D=50) |
|
41 |
+r <- ndpRT(data@peaksdata[[1]], data@peaksdata[[2]], data@peaksrt[[1]], |
|
42 |
+ data@peaksrt[[2]], D = 50) |
|
39 | 43 |
## dynamic retention time based alignment algorithm |
40 |
-v <- dynRT(S=r) |
|
44 |
+v <- dynRT(S = r) |
|
41 | 45 |
|
42 | 46 |
} |
43 | 47 |
\author{ |
... | ... |
@@ -4,9 +4,16 @@ |
4 | 4 |
\alias{gatherInfo} |
5 | 5 |
\title{Gathers abundance informations from an alignment} |
6 | 6 |
\usage{ |
7 |
-gatherInfo(pD, obj, newind = NULL, method = c("apex"), |
|
8 |
- findmzind = TRUE, useTIC = FALSE, top = NULL, |
|
9 |
- intensity.cut = 0.05) |
|
7 |
+gatherInfo( |
|
8 |
+ pD, |
|
9 |
+ obj, |
|
10 |
+ newind = NULL, |
|
11 |
+ method = c("apex"), |
|
12 |
+ findmzind = TRUE, |
|
13 |
+ useTIC = FALSE, |
|
14 |
+ top = NULL, |
|
15 |
+ intensity.cut = 0.05 |
|
16 |
+) |
|
10 | 17 |
} |
11 | 18 |
\arguments{ |
12 | 19 |
\item{pD}{a \code{peaksDataset} object, to get the abundance data from} |
13 | 20 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,25 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/importSpectra.R |
|
3 |
+\name{headToTailPlot} |
|
4 |
+\alias{headToTailPlot} |
|
5 |
+\title{Head to tail plot} |
|
6 |
+\usage{ |
|
7 |
+headToTailPlot(specFromLib, specFromList) |
|
8 |
+} |
|
9 |
+\arguments{ |
|
10 |
+\item{specFromLib}{the mass spectra obtained from the .msp file} |
|
11 |
+ |
|
12 |
+\item{specFromList}{the mass spectra obtained from \code{\link{gatherInfo}}} |
|
13 |
+} |
|
14 |
+\value{ |
|
15 |
+the plot |
|
16 |
+} |
|
17 |
+\description{ |
|
18 |
+The head-to-tail-plot for the mass spectra |
|
19 |
+} |
|
20 |
+\details{ |
|
21 |
+Head-to-tail-plot to visually compare the mass spectra |
|
22 |
+} |
|
23 |
+\author{ |
|
24 |
+Riccardo Romoli |
|
25 |
+} |
0 | 26 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,24 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/importSpectra.R |
|
3 |
+\name{importSpec} |
|
4 |
+\alias{importSpec} |
|
5 |
+\title{importSpec} |
|
6 |
+\usage{ |
|
7 |
+importSpec(file) |
|
8 |
+} |
|
9 |
+\arguments{ |
|
10 |
+\item{file}{a .msp file from NIST search library database} |
|
11 |
+} |
|
12 |
+\value{ |
|
13 |
+list conaining the mass spctra |
|
14 |
+} |
|
15 |
+\description{ |
|
16 |
+Read the mass spectra from an external msp file |
|
17 |
+} |
|
18 |
+\details{ |
|
19 |
+Read the mass spectra from an external file in msp format. The format is |
|
20 |
+used in NIST search library database. |
|
21 |
+} |
|
22 |
+\author{ |
|
23 |
[email protected] |
|
24 |
+} |
... | ... |
@@ -4,8 +4,7 @@ |
4 | 4 |
\alias{imputePeaks} |
5 | 5 |
\title{Imputatin of locations of peaks that were undetected} |
6 | 6 |
\usage{ |
7 |
-imputePeaks(pD, obj, typ = 1, obj2 = NULL, filterMin = 1, |
|
8 |
- verbose = TRUE) |
|
7 |
+imputePeaks(pD, obj, typ = 1, obj2 = NULL, filterMin = 1, verbose = TRUE) |
|
9 | 8 |
} |
10 | 9 |
\arguments{ |
11 | 10 |
\item{pD}{a \code{peaksDataset} object} |
12 | 11 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,28 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/importSpectra.R |
|
3 |
+\name{matchSpec} |
|
4 |
+\alias{matchSpec} |
|
5 |
+\title{matchSpec} |
|
6 |
+\usage{ |
|
7 |
+matchSpec(spec1, outList, whichSpec) |
|
8 |
+} |
|
9 |
+\arguments{ |
|
10 |
+\item{spec1}{reference mass spectrum} |
|
11 |
+ |
|
12 |
+\item{outList}{the return of \code{\link{gatherInfo}}} |
|
13 |
+ |
|
14 |
+\item{whichSpec}{the entry number of outList} |
|
15 |
+} |
|
16 |
+\value{ |
|
17 |
+the distance between the reference mass spectrum and the others |
|
18 |
+} |
|
19 |
+\description{ |
|
20 |
+Calculate the distance between a reference mass spectrum |
|
21 |
+} |
|
22 |
+\details{ |
|
23 |
+Calculate the distance between a reference mass spectrum and one from the |
|
24 |
+sample |
|
25 |
+} |
|
26 |
+\author{ |
|
27 |
+Riccardo Romoli |
|
28 |
+} |
... | ... |
@@ -8,10 +8,25 @@ |
8 | 8 |
\alias{multipleAlignment-method} |
9 | 9 |
\title{Data Structure for multiple alignment of many GCMS samples} |
10 | 10 |
\usage{ |
11 |
-multipleAlignment(pd, group, bw.gap = 0.8, wn.gap = 0.6, bw.D = 0.2, |
|
12 |
- wn.D = 0.05, filterMin = 1, lite = FALSE, usePeaks = TRUE, |
|
13 |
- df = 50, verbose = TRUE, timeAdjust = FALSE, doImpute = FALSE, |
|
14 |
- metric = 2, type = 2, penality = 0.2, compress = FALSE) |
|
11 |
+multipleAlignment( |
|
12 |
+ pd, |
|
13 |
+ group, |
|
14 |
+ bw.gap = 0.8, |
|
15 |
+ wn.gap = 0.6, |
|
16 |
+ bw.D = 0.2, |
|
17 |
+ wn.D = 0.05, |
|
18 |
+ filterMin = 1, |
|
19 |
+ lite = FALSE, |
|
20 |
+ usePeaks = TRUE, |
|
21 |
+ df = 50, |
|
22 |
+ verbose = TRUE, |
|
23 |
+ timeAdjust = FALSE, |
|
24 |
+ doImpute = FALSE, |
|
25 |
+ metric = 2, |
|
26 |
+ type = 2, |
|
27 |
+ penality = 0.2, |
|
28 |
+ compress = FALSE |
|
29 |
+) |
|
15 | 30 |
} |
16 | 31 |
\arguments{ |
17 | 32 |
\item{pd}{a \code{peaksDataset} object} |
... | ... |
@@ -22,7 +22,7 @@ matrix of similarities |
22 | 22 |
} |
23 | 23 |
\description{ |
24 | 24 |
This function calculates the similarity of all pairs of peaks from 2 |
25 |
-samples, using the spectra similarity and the rretention time differencies |
|
25 |
+samples, using the spectra similarity and the retention time differencies |
|
26 | 26 |
} |
27 | 27 |
\details{ |
28 | 28 |
Computes the normalized dot product between every pair of peak vectors in |
... | ... |
@@ -32,19 +32,23 @@ the retention time window (\code{D})and returns a similarity matrix. |
32 | 32 |
|
33 | 33 |
## Not Run |
34 | 34 |
require(gcspikelite) |
35 |
-gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/") |
|
36 |
-cdfFiles <- dir(gcmsPath,"CDF", full=TRUE) |
|
37 |
- |
|
38 |
- # read data, peak detection results |
|
39 |
-pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,10.5)) |
|
40 |
-pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'), |
|
41 |
- snthresh=3, fwhm=10, step=0.1, steps=2, mzdiff=0.5, |
|
42 |
- sleep=0) |
|
35 |
+files <- list.files(path = paste(find.package("gcspikelite"), "data", |
|
36 |
+ sep = "/"),"CDF", full = TRUE) |
|
37 |
+data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5)) |
|
38 |
+## create settings object |
|
39 |
+mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5) |
|
40 |
+cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40), |
|
41 |
+ prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0, |
|
42 |
+ extendLengthMSW = TRUE, mzCenterFun = "wMean") |
|
43 |
+data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100, |
|
44 |
+ multipleMatchedFilter = FALSE, multipleMatchedFilterParam = |
|
45 |
+ list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1)) |
|
46 |
+data |
|
43 | 47 |
## review peak picking |
44 |
-plotChrom(pd, rtrange=c(7.5, 10.5), runs=c(1:3)) |
|
48 |
+plotChrom(data, rtrange = c(7.5, 10.5), runs = c(1:2)) |
|
45 | 49 |
|
46 |
-r <- ndpRT(pd@peaksdata[[1]], pd@peaksdata[[2]], |
|
47 |
- pd@peaksrt[[1]], pd@peaksrt[[2]], D=50) |
|
50 |
+r <- ndpRT(data@peaksdata[[1]], data@peaksdata[[2]], |
|
51 |
+ data@peaksrt[[1]], data@peaksrt[[2]], D = 50) |
|
48 | 52 |
## End (Not Run) |
49 | 53 |
|
50 | 54 |
} |
... | ... |
@@ -4,8 +4,16 @@ |
4 | 4 |
\alias{normDotProduct} |
5 | 5 |
\title{Normalized Dot Product} |
6 | 6 |
\usage{ |
7 |
-normDotProduct(x1, x2, t1 = NULL, t2 = NULL, df = max(ncol(x1), |
|
8 |
- ncol(x2)), D = 1e+05, timedf = NULL, verbose = FALSE) |
|
7 |
+normDotProduct( |
|
8 |
+ x1, |
|
9 |
+ x2, |
|
10 |
+ t1 = NULL, |
|
11 |
+ t2 = NULL, |
|
12 |
+ df = max(ncol(x1), ncol(x2)), |
|
13 |
+ D = 1e+05, |
|
14 |
+ timedf = NULL, |
|
15 |
+ verbose = FALSE |
|
16 |
+) |
|
9 | 17 |
} |
10 | 18 |
\arguments{ |
11 | 19 |
\item{x1}{data matrix for sample 1} |
... | ... |
@@ -4,8 +4,15 @@ |
4 | 4 |
\alias{parseChromaTOF} |
5 | 5 |
\title{Parser for ChromaTOF files} |
6 | 6 |
\usage{ |
7 |
-parseChromaTOF(fn, min.pc = 0.01, mz = seq(85, 500), rt.cut = 0.008, |
|
8 |
- rtrange = NULL, skip = 1, rtDivide = 60) |
|
7 |
+parseChromaTOF( |
|
8 |
+ fn, |
|
9 |
+ min.pc = 0.01, |
|
10 |
+ mz = seq(85, 500), |
|
11 |
+ rt.cut = 0.008, |
|
12 |
+ rtrange = NULL, |
|
13 |
+ skip = 1, |
|
14 |
+ rtDivide = 60 |
|
15 |
+) |
|
9 | 16 |
} |
10 | 17 |
\arguments{ |
11 | 18 |
\item{fn}{ChromaTOF filename to read.} |
... | ... |
@@ -4,8 +4,7 @@ |
4 | 4 |
\alias{parseELU} |
5 | 5 |
\title{Parser for ELU files} |
6 | 6 |
\usage{ |
7 |
-parseELU(f, min.pc = 0.01, mz = seq(50, 550), rt.cut = 0.008, |
|
8 |
- rtrange = NULL) |
|
7 |
+parseELU(f, min.pc = 0.01, mz = seq(50, 550), rt.cut = 0.008, rtrange = NULL) |
|
9 | 8 |
} |
10 | 9 |
\arguments{ |
11 | 10 |
\item{f}{ELU filename to read.} |
... | ... |
@@ -10,9 +10,22 @@ |
10 | 10 |
\alias{plot,peaksAlignment,ANY-method} |
11 | 11 |
\title{Data Structure for pairwise alignment of 2 GCMS samples} |
12 | 12 |
\usage{ |
13 |
-peaksAlignment(d1, d2, t1, t2, gap = 0.5, D = 50, timedf = NULL, |
|
14 |
- df = 30, verbose = TRUE, usePeaks = TRUE, compress = TRUE, |
|
15 |
- metric = 2, type = 2, penality = 0.2) |
|
13 |
+peaksAlignment( |
|
14 |
+ d1, |
|
15 |
+ d2, |
|
16 |
+ t1, |
|
17 |
+ t2, |
|
18 |
+ gap = 0.5, |
|
19 |
+ D = 50, |
|
20 |
+ timedf = NULL, |
|
21 |
+ df = 30, |
|
22 |
+ verbose = TRUE, |
|
23 |
+ usePeaks = TRUE, |
|
24 |
+ compress = TRUE, |
|
25 |
+ metric = 2, |
|
26 |
+ type = 2, |
|
27 |
+ penality = 0.2 |
|
28 |
+) |
|
16 | 29 |
} |
17 | 30 |
\arguments{ |
18 | 31 |
\item{d1}{matrix of MS intensities for 1st sample (if doing a peak |
... | ... |
@@ -76,29 +89,32 @@ data. |
76 | 89 |
## see clusterAlignment, it calls peaksAlignment |
77 | 90 |
|
78 | 91 |
## Not Run: |
79 |
-gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/") |
|
80 |
-cdfFiles <- dir(gcmsPath,"CDF", full=TRUE) |
|
81 |
- |
|
82 |
-# read data, peak detection results |
|
83 |
-pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,10.5)) |
|
84 |
-pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'), |
|
85 |
- snthresh=3, fwhm=10, step=0.1, steps=2, mzdiff=0.5, |
|
86 |
- sleep=0) |
|
87 |
-## review peak picking |
|
88 |
-plotChrom(pd, rtrange=c(7.5, 10.5), runs=c(1:3)) |
|
92 |
+files <- list.files(path = paste(find.package("gcspikelite"), "data", |
|
93 |
+ sep = "/"),"CDF", full = TRUE) |
|
94 |
+data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5)) |
|
95 |
+## create settings object |
|
96 |
+mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5) |
|
97 |
+cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40), |
|
98 |
+ prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0, |
|
99 |
+ extendLengthMSW = TRUE, mzCenterFun = "wMean") |
|
100 |
+data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100, |
|
101 |
+ multipleMatchedFilter = FALSE, multipleMatchedFilterParam = |
|
102 |
+ list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1)) |
|
103 |
+data |
|
104 |
+plotChrom(data, rtrange=c(7.5, 10.5), runs=c(1:2)) |
|
89 | 105 |
|
90 | 106 |
## align two chromatogram |
91 |
-pA <- peaksAlignment(pd@peaksdata[[1]], pd@peaksdata[[2]], |
|
92 |
- pd@peaksrt[[1]], pd@peaksrt[[2]], D=50, |
|
93 |
- metric=3, compress=FALSE, type=2, penality=0.2) |
|
107 |
+pA <- peaksAlignment(data@peaksdata[[1]], data@peaksdata[[2]], |
|
108 |
+ data@peaksrt[[1]], data@peaksrt[[2]], D = 50, |
|
109 |
+ metric = 3, compress = FALSE, type = 2, penality = 0.2) |
|
94 | 110 |
|
95 | 111 |
plotAlignment(pA) |
96 | 112 |
pA@v$match |
97 | 113 |
|
98 | 114 |
par(mfrow=c(2,1)) |
99 |
-plot(pd@peaksdata[[1]][,15], type='h', main=paste(pd@peaksrt[[1]][[15]])) |
|
100 |
-plot(pd@peaksdata[[2]][,17], type='h', |
|
101 |
- main=paste(pd@peaksrt[[2]][[17]])) |
|
115 |
+plot(data@peaksdata[[1]][,15], type = 'h', main = paste(data@peaksrt[[1]][[15]])) |
|
116 |
+plot(data@peaksdata[[2]][,17], type = 'h', |
|
117 |
+ main = paste(data@peaksrt[[2]][[17]])) |
|
102 | 118 |
## End (Not Run) |
103 | 119 |
|
104 | 120 |
} |
... | ... |
@@ -10,8 +10,13 @@ |
10 | 10 |
\alias{plot,peaksDataset,ANY-method} |
11 | 11 |
\title{Data Structure for raw GCMS data and peak detection results} |
12 | 12 |
\usage{ |
13 |
-peaksDataset(fns = dir(, "[Cc][Dd][Ff]"), verbose = TRUE, |
|
14 |
- mz = seq(50, 550), rtDivide = 60, rtrange = NULL) |
|
13 |
+peaksDataset( |
|
14 |
+ fns = dir(, "[Cc][Dd][Ff]"), |
|
15 |
+ verbose = TRUE, |
|
16 |
+ mz = seq(50, 550), |
|
17 |
+ rtDivide = 60, |
|
18 |
+ rtrange = NULL |
|
19 |
+) |
|
15 | 20 |
} |
16 | 21 |
\arguments{ |
17 | 22 |
\item{fns}{character vector, filenames of raw data in CDF format.} |
... | ... |
@@ -4,8 +4,14 @@ |
4 | 4 |
\alias{plotAlignedFrags} |
5 | 5 |
\title{plotAlignedFrags} |
6 | 6 |
\usage{ |
7 |
-plotAlignedFrags(object, outList, specID, fullRange = TRUE, |
|
8 |
- normalize = TRUE, ...) |
|
7 |
+plotAlignedFrags( |
|
8 |
+ object, |
|
9 |
+ outList, |
|
10 |
+ specID, |
|
11 |
+ fullRange = TRUE, |
|
12 |
+ normalize = TRUE, |
|
13 |
+ ... |
|
14 |
+) |
|
9 | 15 |
} |
10 | 16 |
\arguments{ |
11 | 17 |
\item{object}{where to keep the mass range of the experiment} |
... | ... |
@@ -31,24 +37,27 @@ Plot the deconvoluted and aligned mass spectra collected using gatherInfo() |
31 | 37 |
} |
32 | 38 |
\examples{ |
33 | 39 |
|
34 |
-## Rd workflow |
|
35 |
-gcmsPath <- paste(find.package("gcspikelite"), "data", sep = "/") |
|
36 |
-cdfFiles <- dir(gcmsPath,"CDF", full = TRUE) |
|
37 |
- |
|
38 |
-# read data, peak detection results |
|
39 |
-pd <- peaksDataset(cdfFiles[1:4], mz = seq(50,550), rtrange = c(7.5,10.5)) |
|
40 |
-pd <- addXCMSPeaks(files = cdfFiles[1:4], object = pd, peakPicking = c('mF'), |
|
41 |
- snthresh = 2, fwhm = 8, step = 0.5, steps = 2, mzdiff = 0.5, |
|
42 |
- sleep = 0) |
|
40 |
+files <- list.files(path = paste(find.package("gcspikelite"), "data", |
|
41 |
+ sep = "/"),"CDF", full = TRUE) |
|
42 |
+data <- peaksDataset(files[1:4], mz = seq(50, 550), rtrange = c(7.5, 8.5)) |
|
43 |
+## create settings object |
|
44 |
+mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5) |
|
45 |
+cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40), |
|
46 |
+ prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0, |
|
47 |
+ extendLengthMSW = TRUE, mzCenterFun = "wMean") |
|
48 |
+data <- addXCMSPeaks(files[1:4], data, settings = mfp, minintens = 100, |
|
49 |
+ multipleMatchedFilter = FALSE, multipleMatchedFilterParam = |
|
50 |
+ list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1)) |
|
51 |
+data |
|
43 | 52 |
## multiple alignment |
44 |
-ma <- multipleAlignment(pd, c(1,1,2,2), wn.gap = 0.5, wn.D = 0.05, bw.gap = 0.6, |
|
45 |
- bw.D = 0.2, usePeaks = TRUE, filterMin = 1, df = 50, |
|
46 |
- verbose = TRUE, metric = 2, type = 2) |
|
53 |
+ma <- multipleAlignment(data, c(1,1,2,2), wn.gap = 0.5, wn.D = 0.05, |
|
54 |
+ bw.gap = 0.6, bw.D = 0.2, usePeaks = TRUE, filterMin = 1, df = 50, |
|
55 |
+ verbose = TRUE, metric = 2, type = 2) |
|
47 | 56 |
|
48 | 57 |
## gather apex intensities |
49 |
-gip <- gatherInfo(pd, ma) |
|
58 |
+gip <- gatherInfo(data, ma) |
|
50 | 59 |
gip[[33]] |
51 |
-plotAlignedFrags(object = pd, outList = gip, specID = 33) |
|
60 |
+plotAlignedFrags(object = data, outList = gip, specID = 33) |
|
52 | 61 |
|
53 | 62 |
} |
54 | 63 |
\author{ |
... | ... |
@@ -1,15 +1,22 @@ |
1 | 1 |
% Generated by roxygen2: do not edit by hand |
2 | 2 |
% Please edit documentation in R/peaksAlignment.R |
3 |
-\docType{methods} |
|
4 | 3 |
\name{plotAlignment,peaksAlignment-method} |
5 | 4 |
\alias{plotAlignment,peaksAlignment-method} |
6 | 5 |
\title{plotAlignment} |
7 | 6 |
\usage{ |
8 |
-\S4method{plotAlignment}{peaksAlignment}(object, xlab = "Peaks - run 1", |
|
9 |
- ylab = "Peaks - run 2", plotMatches = TRUE, matchPch = 19, |
|
10 |
- matchLwd = 3, matchCex = 0.5, matchCol = "black", |
|
11 |
- col = colorpanel(50, "white", "green", "navyblue"), breaks = seq(0, |
|
12 |
- 1, length = 51), ...) |
|
7 |
+\S4method{plotAlignment}{peaksAlignment}( |
|
8 |
+ object, |
|
9 |
+ xlab = "Peaks - run 1", |
|
10 |
+ ylab = "Peaks - run 2", |
|
11 |
+ plotMatches = TRUE, |
|
12 |
+ matchPch = 19, |
|
13 |
+ matchLwd = 3, |
|
14 |
+ matchCex = 0.5, |
|
15 |
+ matchCol = "black", |
|
16 |
+ col = colorpanel(50, "white", "green", "navyblue"), |
|
17 |
+ breaks = seq(0, 1, length = 51), |
|
18 |
+ ... |
|
19 |
+) |
|
13 | 20 |
} |
14 | 21 |
\arguments{ |
15 | 22 |
\item{object}{a \code{clusterAlignment} object} |
... | ... |
@@ -50,23 +57,24 @@ The similarity matrix is plotted and optionally, the set of matching peaks. |
50 | 57 |
\examples{ |
51 | 58 |
|
52 | 59 |
require(gcspikelite) |
53 |
- |
|
54 |
-## paths and files |
|
55 |
-gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/") |
|
56 |
-cdfFiles <- dir(gcmsPath, "CDF", full=TRUE) |
|
57 |
-eluFiles <- dir(gcmsPath, "ELU", full=TRUE) |
|
58 |
- |
|
59 |
-## read data |
|
60 |
-pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,8.5)) |
|
61 |
-pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'), |
|
62 |
- snthresh=3, fwhm=10, step=0.1, steps=2, mzdiff=0.5) |
|
63 |
- |
|
60 |
+files <- list.files(path = paste(find.package("gcspikelite"), "data", |
|
61 |
+ sep = "/"),"CDF", full = TRUE) |
|
62 |
+data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5)) |
|
63 |
+## create settings object |
|
64 |
+mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5) |
|
65 |
+cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40), |
|
66 |
+ prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0, |
|
67 |
+ extendLengthMSW = TRUE, mzCenterFun = "wMean") |
|
68 |
+data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100, |
|
69 |
+ multipleMatchedFilter = FALSE, multipleMatchedFilterParam = |
|
70 |
+ list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1)) |
|
71 |
+data |
|
64 | 72 |
## image plot |
65 |
-plotChrom(pd, rtrange=c(7.5,8.5), plotPeaks=TRUE, plotPeakLabels=TRUE) |
|
73 |
+plotChrom(data, rtrange = c(7.5,8.5), plotPeaks = TRUE, plotPeakLabels =TRUE) |
|
66 | 74 |
|
67 | 75 |
## align two chromatogram |
68 |
-pA <- peaksAlignment(pd@peaksdata[[1]], pd@peaksdata[[2]], |
|
69 |
- pd@peaksrt[[1]], pd@peaksrt[[2]], D = 50, |
|
76 |
+pA <- peaksAlignment(data@peaksdata[[1]], data@peaksdata[[2]], |
|
77 |
+ data@peaksrt[[1]], data@peaksrt[[2]], D = 50, |
|
70 | 78 |
compress = FALSE, type = 1, metric = 1, |
71 | 79 |
gap = 0.5) |
72 | 80 |
plotAlignment(pA) |
... | ... |
@@ -1,19 +1,33 @@ |
1 | 1 |
% Generated by roxygen2: do not edit by hand |
2 | 2 |
% Please edit documentation in R/peaksDataset.R |
3 |
-\docType{methods} |
|
4 | 3 |
\name{plotChrom,peaksDataset-method} |
5 | 4 |
\alias{plotChrom,peaksDataset-method} |
6 | 5 |
\title{Plotting functions for GCMS data objects} |
7 | 6 |
\usage{ |
8 |
-\S4method{plotChrom}{peaksDataset}(object, |
|
9 |
- runs = 1:length(object@rawdata), mzind = 1:nrow(object@rawdata[[1]]), |
|
10 |
- mind = NULL, plotSampleLabels = TRUE, calcGlobalMax = FALSE, |
|
11 |
- peakCex = 0.8, plotPeaks = TRUE, plotPeakBoundaries = FALSE, |
|
12 |
- plotPeakLabels = FALSE, plotMergedPeakLabels = TRUE, mlwd = 3, |
|
13 |
- usePeaks = TRUE, plotAcrossRuns = FALSE, overlap = F, |
|
14 |
- rtrange = NULL, cols = NULL, thin = 1, |
|
15 |
- max.near = median(object@rawrt[[1]]), how.near = 50, scale.up = 1, |
|
16 |
- ...) |
|
7 |
+\S4method{plotChrom}{peaksDataset}( |
|
8 |
+ object, |
|
9 |
+ runs = 1:length(object@rawdata), |
|
10 |
+ mzind = 1:nrow(object@rawdata[[1]]), |
|
11 |
+ mind = NULL, |
|
12 |
+ plotSampleLabels = TRUE, |
|
13 |
+ calcGlobalMax = FALSE, |
|
14 |
+ peakCex = 0.8, |
|
15 |
+ plotPeaks = TRUE, |
|
16 |
+ plotPeakBoundaries = FALSE, |
|
17 |
+ plotPeakLabels = FALSE, |
|
18 |
+ plotMergedPeakLabels = TRUE, |
|
19 |
+ mlwd = 3, |
|
20 |
+ usePeaks = TRUE, |
|
21 |
+ plotAcrossRuns = FALSE, |
|
22 |
+ overlap = F, |
|
23 |
+ rtrange = NULL, |
|
24 |
+ cols = NULL, |
|
25 |
+ thin = 1, |
|
26 |
+ max.near = median(object@rawrt[[1]]), |
|
27 |
+ how.near = 50, |
|
28 |
+ scale.up = 1, |
|
29 |
+ ... |
|
30 |
+) |
|
17 | 31 |
} |
18 | 32 |
\arguments{ |
19 | 33 |
\item{object}{a \code{peaksDataset} object.} |
... | ... |
@@ -1,12 +1,10 @@ |
1 | 1 |
% Generated by roxygen2: do not edit by hand |
2 | 2 |
% Please edit documentation in R/clusterAlignment.R |
3 |
-\docType{methods} |
|
4 | 3 |
\name{plotClustAlignment,clusterAlignment-method} |
5 | 4 |
\alias{plotClustAlignment,clusterAlignment-method} |
6 | 5 |
\title{plotClustAlignment} |
7 | 6 |
\usage{ |
8 |
-\S4method{plotClustAlignment}{clusterAlignment}(object, alignment = 1, |
|
9 |
- ...) |
|
7 |
+\S4method{plotClustAlignment}{clusterAlignment}(object, alignment = 1, ...) |
|
10 | 8 |
} |
11 | 9 |
\arguments{ |
12 | 10 |
\item{object}{\code{clusterAlignment} object.} |
... | ... |
@@ -31,19 +29,19 @@ just a collection of all pairwise \code{peakAlignment} objects. |
31 | 29 |
|
32 | 30 |
require(gcspikelite) |
33 | 31 |
|
34 |
-## paths and files |
|
32 |
+# paths and files |
|
35 | 33 |
gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/") |
36 | 34 |
cdfFiles <- dir(gcmsPath, "CDF", full=TRUE) |
37 | 35 |
eluFiles <- dir(gcmsPath, "ELU", full=TRUE) |
38 | 36 |
|
39 |
-## read data |
|
40 |
-pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,8.5)) |
|
41 |
-pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'), |
|
42 |
- snthresh=3, fwhm=10, step=0.1, steps=2, mzdiff=0.5) |
|
43 |
-ca <- clusterAlignment(pd, metric = 1, D = 50, type = 1, gap = 0.5) |
|
37 |
+# read data, peak detection results |
|
38 |
+pd <- peaksDataset(cdfFiles[1:2], mz=seq(50,550), rtrange=c(7.5,8.5)) |
|
39 |
+pd <- addAMDISPeaks(pd, eluFiles[1:2]) |
|
40 |
+ |
|
41 |
+ca <- clusterAlignment(pd, gap=0.5, D=0.05, df=30, metric=1, type=1) |
|
44 | 42 |
plotClustAlignment(ca, run = 1) |
45 | 43 |
plotClustAlignment(ca, run = 2) |
46 |
-plotClustAlignment(ca, run = 3) |
|
44 |
+plotClustAlignment(ca, run = 3) |
|
47 | 45 |
|
48 | 46 |
} |
49 | 47 |
\references{ |
... | ... |
@@ -29,22 +29,27 @@ Plot the deconvoluted mass spectra from the profile matrix |
29 | 29 |
} |
30 | 30 |
\examples{ |
31 | 31 |
|
32 |
-gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/") |
|
33 |
-cdfFiles <- dir(gcmsPath,"CDF", full=TRUE) |
|
34 |
-# read data, peak detection results |
|
35 |
-pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,10.5)) |
|
36 |
-pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'), |
|
37 |
- snthresh=3, fwhm=10, step=0.1, steps=2, mzdiff=0.5, |
|
38 |
- sleep=0) |
|
32 |
+files <- list.files(path = paste(find.package("gcspikelite"), "data", |
|
33 |
+ sep = "/"),"CDF", full = TRUE) |
|
34 |
+data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5)) |
|
35 |
+## create settings object |
|
36 |
+mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5) |
|
37 |
+cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40), |
|
38 |
+ prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0, |
|
39 |
+ extendLengthMSW = TRUE, mzCenterFun = "wMean") |
|
40 |
+data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100, |
|
41 |
+ multipleMatchedFilter = FALSE, multipleMatchedFilterParam = |
|
42 |
+ list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1)) |
|
43 |
+data |
|
39 | 44 |
## align two chromatogram |
40 |
-pA <- peaksAlignment(pd@peaksdata[[1]], pd@peaksdata[[2]], |
|
41 |
- pd@peaksrt[[1]], pd@peaksrt[[2]], D=50, |
|
42 |
- metric=3, compress=FALSE, type=2, penality=0.2) |
|
45 |
+pA <- peaksAlignment(data@peaksdata[[1]], data@peaksdata[[2]], |
|
46 |
+ data@peaksrt[[1]], data@peaksrt[[2]], D = 50, |
|
47 |
+ metric = 3, compress = FALSE, type = 2, penality = 0.2) |
|
43 | 48 |
pA@v$match |
44 | 49 |
## plot the mass spectra |
45 | 50 |
par(mfrow=c(2,1)) |
46 |
-plotFrags(object=pd, sample=1, specID=10) |
|
47 |
-plotFrags(object=pd, sample=2, specID=12) |
|
51 |
+plotFrags(object=data, sample=1, specID=10) |
|
52 |
+plotFrags(object=data, sample=2, specID=12) |
|
48 | 53 |
|
49 | 54 |
} |
50 | 55 |
\author{ |
... | ... |
@@ -1,13 +1,19 @@ |
1 | 1 |
% Generated by roxygen2: do not edit by hand |
2 | 2 |
% Please edit documentation in R/peaksDataset.R |
3 |
-\docType{methods} |
|
4 | 3 |
\name{plotImage} |
5 | 4 |
\alias{plotImage} |
6 | 5 |
\alias{plotImage,peaksDataset-method} |
7 | 6 |
\title{Plot of images of GCMS data} |
8 | 7 |
\usage{ |
9 |
-\S4method{plotImage}{peaksDataset}(object, run = 1, rtrange = c(11, |
|
10 |
- 13), main = NULL, mzrange = c(50, 200), SCALE = log2, ...) |
|
8 |
+\S4method{plotImage}{peaksDataset}( |
|
9 |
+ object, |
|
10 |
+ run = 1, |
|
11 |
+ rtrange = c(11, 13), |
|
12 |
+ main = NULL, |
|
13 |
+ mzrange = c(50, 200), |
|
14 |
+ SCALE = log2, |
|
15 |
+ ... |
|
16 |
+) |
|
11 | 17 |
} |
12 | 18 |
\arguments{ |
13 | 19 |
\item{object}{a \code{peaksDataset} object} |
... | ... |
@@ -7,8 +7,17 @@ |
7 | 7 |
\alias{show,progressiveAlignment-method} |
8 | 8 |
\title{Data Structure for progressive alignment of many GCMS samples} |
9 | 9 |
\usage{ |
10 |
-progressiveAlignment(pD, cA, D = 50, gap = 0.5, verbose = TRUE, |
|
11 |
- usePeaks = TRUE, df = 30, compress = FALSE, type = 2) |
|
10 |
+progressiveAlignment( |
|
11 |
+ pD, |
|
12 |
+ cA, |
|
13 |
+ D = 50, |
|
14 |
+ gap = 0.5, |
|
15 |
+ verbose = TRUE, |
|
16 |
+ usePeaks = TRUE, |
|
17 |
+ df = 30, |
|
18 |
+ compress = FALSE, |
|
19 |
+ type = 2 |
|
20 |
+) |
|
12 | 21 |
} |
13 | 22 |
\arguments{ |
14 | 23 |
\item{pD}{a \code{peaksDataset} object} |
... | ... |
@@ -47,18 +56,22 @@ be found in the reference below. |
47 | 56 |
\examples{ |
48 | 57 |
|
49 | 58 |
require(gcspikelite) |
50 |
-## paths and files |
|
51 |
-gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/") |
|
52 |
-cdfFiles <- dir(gcmsPath, "CDF", full=TRUE) |
|
53 |
-eluFiles <- dir(gcmsPath, "ELU", full=TRUE) |
|
54 |
- |
|
55 |
-## read data, peak detection results |
|
56 |
-pd <- peaksDataset(cdfFiles[1:2], mz=seq(50,550), rtrange=c(7.5,8.5)) |
|
57 |
-pd <- addAMDISPeaks(pd, eluFiles[1:2]) |
|
58 |
- |
|
59 |
-ca <- clusterAlignment(pd, gap=.5, D=.05, df=30, metric=1, type=1, |
|
60 |
- compress = FALSE) |
|
61 |
-pa <- progressiveAlignment(pd, ca, gap=.6, D=.1, df=30, type=1, compress = FALSE) |
|
59 |
+files <- list.files(path = paste(find.package("gcspikelite"), "data", |
|
60 |
+ sep = "/"),"CDF", full = TRUE) |
|
61 |
+data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5)) |
|
62 |
+## create settings object |
|
63 |
+mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5) |
|
64 |
+cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40), |
|
65 |
+ prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0, |
|
66 |
+ extendLengthMSW = TRUE, mzCenterFun = "wMean") |
|
67 |
+data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100, |
|
68 |
+ multipleMatchedFilter = FALSE, multipleMatchedFilterParam = |
|
69 |
+ list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1)) |
|
70 |
+data |
|
71 |
+ca <- clusterAlignment(data, gap = 0.5, D = 0.05, df = 30, metric = 1, |
|
72 |
+ type = 1, compress = FALSE) |
|
73 |
+pa <- progressiveAlignment(data, ca, gap = 0.6, D = 0.1, df = 30, |
|
74 |
+ type = 1, compress = FALSE) |
|
62 | 75 |
|
63 | 76 |
} |
64 | 77 |
\references{ |
... | ... |
@@ -31,19 +31,22 @@ different peaks. |
31 | 31 |
\examples{ |
32 | 32 |
|
33 | 33 |
require(gcspikelite) |
34 |
-# paths and files |
|
35 |
-gcmsPath <- paste(find.package("gcspikelite"), "data", sep = "/") |
|
36 |
-cdfFiles <- dir(gcmsPath,"CDF",full=TRUE) |
|
37 |
-# read data, peak detection results |
|
38 |
-pd <- peaksDataset(cdfFiles[1:2], mz=seq(50,550), |
|
39 |
- rtrange=c(7.5,8.5)) |
|
40 |
-pd <- addXCMSPeaks(files=cdfFiles[1:2], object=pd, |
|
41 |
- peakPicking=c('mF'), snthresh=3, fwhm=4, |
|
42 |
- step=1, steps=2, mzdiff=0.5) |
|
43 |
-ma <- multipleAlignment(pd = pd, group = c(1,1), |
|
34 |
+files <- list.files(path = paste(find.package("gcspikelite"), "data", |
|
35 |
+ sep = "/"),"CDF", full = TRUE) |
|
36 |
+data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5)) |
|
37 |
+## create settings object |
|
38 |
+mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5) |
|
39 |
+cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40), |
|
40 |
+ prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0, |
|
41 |
+ extendLengthMSW = TRUE, mzCenterFun = "wMean") |
|
42 |
+data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100, |
|
43 |
+ multipleMatchedFilter = FALSE, multipleMatchedFilterParam = |
|
44 |
+ list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1)) |
|
45 |
+data |
|
46 |
+ma <- multipleAlignment(pd = data, group = c(1,1), |
|
44 | 47 |
filterMin = 1, metric = 2, type = 2) |
45 |
-outList <- gatherInfo(pd, ma) |
|
46 |
-mtxD <- retFatMatrix(object = pd, data = outList, minFilter = 1) |
|
48 |
+outList <- gatherInfo(data, ma) |
|
49 |
+mtxD <- retFatMatrix(object = data, data = outList, minFilter = 1) |
|
47 | 50 |
|
48 | 51 |
} |
49 | 52 |
\seealso{ |
... | ... |
@@ -4,8 +4,15 @@ |
4 | 4 |
\alias{rmaFitUnit} |
5 | 5 |
\title{Fits a robust linear model (RLM) for one metabolite} |
6 | 6 |
\usage{ |
7 |
-rmaFitUnit(u, maxit = 5, mzEffect = TRUE, cls = NULL, |
|
8 |
- fitSample = TRUE, fitOrCoef = c("coef", "fit"), TRANSFORM = log2) |
|
7 |
+rmaFitUnit( |
|
8 |
+ u, |
|
9 |
+ maxit = 5, |
|
10 |
+ mzEffect = TRUE, |
|
11 |
+ cls = NULL, |
|
12 |
+ fitSample = TRUE, |
|
13 |
+ fitOrCoef = c("coef", "fit"), |
|
14 |
+ TRANSFORM = log2 |
|
15 |
+) |
|
9 | 16 |
} |
10 | 17 |
\arguments{ |
11 | 18 |
\item{u}{a metabolite unit (list object with vectors \code{mz} and \code{rt} |
... | ... |
@@ -1,6 +1,5 @@ |
1 | 1 |
% Generated by roxygen2: do not edit by hand |
2 | 2 |
% Please edit documentation in R/multipleAlignment.R |
3 |
-\docType{methods} |
|
4 | 3 |
\name{show,multipleAlignment-method} |
5 | 4 |
\alias{show,multipleAlignment-method} |
6 | 5 |
\title{Store the raw data and optionally, information regarding signal peaks for |
7 | 6 |
deleted file mode 100644 |
... | ... |
@@ -1,16 +0,0 @@ |
1 |
-(TeX-add-style-hook |
|
2 |
- "flagme" |
|
3 |
- (lambda () |
|
4 |
- (TeX-add-to-alist 'LaTeX-provided-package-options |
|
5 |
- '(("caption" "tableposition=top") ("inputenc" "utf8"))) |
|
6 |
- (TeX-run-style-hooks |
|
7 |
- "latex2e" |
|
8 |
- "article" |
|
9 |
- "art10" |
|
10 |
- "amsmath" |
|
11 |
- "amscd" |
|
12 |
- "caption" |
|
13 |
- "ifthen" |
|
14 |
- "inputenc")) |
|
15 |
- :latex) |
|
16 |
- |
17 | 0 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,416 @@ |
1 |
+\documentclass{article} |
|
2 |
+ |
|
3 |
+\usepackage{amsmath} |
|
4 |
+\usepackage{amscd} |
|
5 |
+\usepackage[tableposition=top]{caption} |
|
6 |
+\usepackage{ifthen} |
|
7 |
+\usepackage[utf8]{inputenc} |
|
8 |
+\topmargin 0in |
|
9 |
+\headheight 0in |
|
10 |
+\headsep 0in |
|
11 |
+\oddsidemargin 0in |
|
12 |
+\evensidemargin 0in |
|
13 |
+\textwidth 176mm |
|
14 |
+\textheight 215mm |
|
15 |
+ |
|
16 |
+ |
|
17 |
+\begin{document} |
|
18 |
+ |
|
19 |
+%\VignetteIndexEntry{Using flagme -- Fragment-level analysis of GC-MS-based metabolomics data} |
|
20 |
+ |
|
21 |
+\title{\texttt{flagme}: Fragment-level analysis of \\ GC-MS-based |
|
22 |
+ metabolomics data} |
|
23 |
+\author{Mark Robinson \\ \texttt{[email protected]} \\ Riccardo |
|
24 |
+ Romoli \\ \texttt{[email protected]}} |
|
25 |
+\maketitle |
|
26 |
+ |
|
27 |
+ |
|
28 |
+\section{Introduction} |
|
29 |
+\noindent This document gives a brief introduction to the |
|
30 |
+\texttt{flagme} package, which is designed to process, visualise and |
|
31 |
+statistically analyze sets of GC-MS samples. The ideas discussed here |
|
32 |
+were originally designed with GC-MS-based metabolomics in mind, but |
|
33 |
+indeed some of the methods and visualizations could be useful for |
|
34 |
+LC-MS data sets. The {\em fragment-level analysis} though, takes |
|
35 |
+advantage of the rich fragmentation patterns observed from electron |
|
36 |
+interaction (EI) ionization. |
|
37 |
+ |
|
38 |
+There are many aspects of data processing for GC-MS data. Generally, |
|
39 |
+algorithms are run separately on each sample to detect features, or |
|
40 |
+{\em peaks} (e.g. AMDIS). Due to retention time shifts from |
|
41 |
+run-to-run, an alignment algorithm is employed to allow the matching |
|
42 |
+of the same feature across multiple samples. Alternatively, if known |
|
43 |
+standards are introduced to the samples, retention {\em indices} can |
|
44 |
+be computed for each peak and used for alignment. After peaks are |
|
45 |
+matched across all samples, further processing steps are employed to |
|
46 |
+create a matrix of abundances, leading into detecting differences in |
|
47 |
+abundance. |
|
48 |
+ |
|
49 |
+Many of these data processing steps are prone to errors and they often |
|
50 |
+tend to be black boxes. But, with effective exploratory data |
|
51 |
+analysis, many of the pitfalls can be avoided and any problems can be |
|
52 |
+fixed before proceeding to the downstream statistical analysis. The |
|
53 |
+package provides various visualizations to ensure the methods applied |
|
54 |
+are not black boxes. |
|
55 |
+ |
|
56 |
+The \texttt{flagme} package gives a complete suite of methods to go |
|
57 |
+through all common stages of data processing. In addition, R is |
|
58 |
+especially well suited to the downstream data analysis tasks since it |
|
59 |
+is very rich in analysis tools and has excellent visualization |
|
60 |
+capabilities. In addition, it is freely available |
|
61 |
+(\texttt{www.r-project.org}), extensible and there is a growing |
|
62 |
+community of users and developers. For routine analyses, graphical |
|
63 |
+user interfaces could be designed. |
|
64 |
+ |
|
65 |
+ |
|
66 |
+\section{Reading and visualizing GC-MS data} |
|
67 |
+To run these examples, you must have the \texttt{gcspikelite} package |
|
68 |
+installed. This data package contains several GC-MS samples from a |
|
69 |
+spike-in experiment we designed to interrogate data processing |
|
70 |
+methods. So, first, we load the packages: |
|
71 |
+ |
|
72 |
+<<libraries, echo=FALSE>>= |
|
73 |
+require(gcspikelite) |
|
74 |
+library(flagme) |
|
75 |
+@ |
|
76 |
+ |
|
77 |
+ |
|
78 |
+To load the data and corresponding peak detection results, we simply |
|
79 |
+create vectors of the file-names and create a \texttt{peakDataset} |
|
80 |
+object. Note that we can speed up the import time by setting the |
|
81 |
+retention time range to a subset of the elution, as below: |
|
82 |
+ |
|
83 |
+<<rawdata>>= |
|
84 |
+gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/") |
|
85 |
+data(targets) |
|
86 |
+cdfFiles <- paste(gcmsPath, targets$FileName, sep="/") |
|
87 |
+eluFiles <- gsub("CDF", "ELU", cdfFiles) |
|
88 |
+pd <- peaksDataset(cdfFiles, mz=seq(50,550), rtrange=c(7.5,8.5)) |
|
89 |
+pd <- addAMDISPeaks(pd, eluFiles) |
|
90 |
+pd |
|
91 |
+@ |
|
92 |
+ |
|
93 |
+Here, we have added peaks from AMDIS, a well known and mature |
|
94 |
+algorithm for deconvolution of GC-MS data. For GC-TOF-MS data, we have |
|
95 |
+implemented a parser for the \texttt{ChromaTOF} output (see the |
|
96 |
+analogous \texttt{addChromaTOFPeaks} function). The |
|
97 |
+\texttt{addXCMSPeaks} allows to use all the XCMS peak-picking |
|
98 |
+algorithms; using this approach it is also possible to elaborate the |
|
99 |
+raw data file from within R instead of using an external software. |
|
100 |
+%% Support for XMCS or MzMine may be added in the future. Ask the author |
|
101 |
+%% if another detection result format is desired as the parsers are |
|
102 |
+%% generally easy to design. |
|
103 |
+In particular the function reads the raw data using XCMS, group each extracted ion |
|
104 |
+according to their retention time using CAMERA and attaches them to an |
|
105 |
+already created \texttt{peaksDataset} object: |
|
106 |
+ |
|
107 |
+<<addXCMS>>= |
|
108 |
+pd.2 <- peaksDataset(cdfFiles[1:3], mz = seq(50, 550), rtrange = c(7.5, 8.5)) |
|
109 |
+cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40), |
|
110 |
+ prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0, |
|
111 |
+ extendLengthMSW = TRUE, mzCenterFun = "wMean") |
|
112 |
+mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5) |
|
113 |
+pd.2 <- addXCMSPeaks(cdfFiles[1:3], pd.2, settings = mfp, minintens = 100, |
|
114 |
+ multipleMatchedFilter = FALSE, multipleMatchedFilterParam = |
|
115 |
+ list(fwhm = c(5, 10, 20), rt_abs = 2, mz_abs = 0.1) |
|
116 |
+ ) |
|
117 |
+pd.2 |
|
118 |
+@ |
|
119 |
+ |
|
120 |
+The possibility to work using computer cluster will be added in the future. |
|
121 |
+ |
|
122 |
+Regardless of platform and peak detection algorithm, a useful |
|
123 |
+visualization of a set of samples is the set of total ion currents |
|
124 |
+(TIC), or extracted ion currents (XICs). To view TICs, you can call: |
|
125 |
+ |
|
126 |
+<<plotexample1, fig.width=9, fig.height=7>>= |
|
127 |
+plotChrom(pd, rtrange=c(7.5,8.5), plotPeaks=TRUE, plotPeakLabels=TRUE, |
|
128 |
+ max.near=8, how.near=0.5, col=rep(c("blue","red","black"), each=3)) |
|
129 |
+@ |
|
130 |
+ |
|
131 |
+Note here the little {\em hashes} represent the detected peaks and are |
|
132 |
+labelled with an integer index. One of the main challenges is to match |
|
133 |
+these peak detections across several samples, given that the appear at |
|
134 |
+slightly different times in different runs. |
|
135 |
+ |
|
136 |
+For XICs, you need to give the indices (of \texttt{pd@mz}, the grid of |
|
137 |
+mass-to-charge values) that you want to plot through the |
|
138 |
+\texttt{mzind} argument. This could be a single ion |
|
139 |
+(e.g. \texttt{mzind=24}) or could be a range of indices if multiple |
|
140 |
+ions are of interest (e.g. \texttt{mzind=c(24,25,98,99)}). |
|
141 |
+ |
|
142 |
+There are several other features within the \texttt{plot} command on |
|
143 |
+\texttt{peaksDataset} objects that can be useful. See \texttt{?plot} |
|
144 |
+(and select the flagme version) for full details. |
|
145 |
+ |
|
146 |
+Another useful visualization, at least for individual samples, is a 2D |
|
147 |
+heatmap of intensity. Such plots can be enlightening, especially when |
|
148 |
+peak detection results are overlaid. For example (with detected |
|
149 |
+fragment peaks from AMDIS shown in white): |
|
150 |
+ |
|
151 |
+<<plotexample2,fig.width=9,fig.height=7>>= |
|
152 |
+r <- 1 |
|
153 |
+plotImage(pd, run=r, rtrange=c(7.5,8.5), main="") |
|
154 |
+v <- which(pd@peaksdata[[r]] > 0, arr.ind=TRUE) # find detected peaks |
|
155 |
+abline(v=pd@peaksrt[[r]]) |
|
156 |
+points(pd@peaksrt[[r]][v[,2]], pd@mz[v[,1]], pch=19, cex=.6, col="white") |
|
157 |
+@ |
|
158 |
+ |
|
159 |
+ |
|
160 |
+\section{Pairwise alignment with dynamic programming algorithm} |
|
161 |
+One of the first challenges of GC-MS data is the matching of detected |
|
162 |
+peaks (i.e. metabolites) across several samples. Although gas |
|
163 |
+chromatography is quite robust, there can be some drift in the elution |
|
164 |
+time of the same analyte from run to run. We have devised a strategy, |
|
165 |
+based on dynamic programming, that takes into account both the |
|
166 |
+similarity in spectrum (at the apex of the called peak) and the |
|
167 |
+similarity in retention time, without requiring the identity of each |
|
168 |
+peak; this matching uses the data alone. If each sample gives a `peak |
|
169 |
+list' of the detected peaks (such as that from AMDIS that we have |
|
170 |
+attached to our \texttt{peaksDataset} object), the challenge is to |
|
171 |
+introduce gaps into these lists such that they are best aligned. From |
|
172 |
+this a matrix of retention times or a matrix of peak abundances can be |
|
173 |
+extracted for further statistical analysis, visualization and |
|
174 |
+interpretation. For this matching, we created a procedure analogous to |
|
175 |
+a multiple {\em sequence} alignment. |
|
176 |
+ |
|
177 |
+To highlight the dynamic programming-based alignment strategy, we |
|
178 |
+first illustrate a pairwise alignment of two peak lists. This example |
|
179 |
+also illustrates the selection of parameters necessary for the |
|
180 |
+alignment. From the data read in previously, let us consider the |
|
181 |
+alignment of two samples, denoted \texttt{0709\_468} and |
|
182 |
+\texttt{0709\_474}. First, a similarity matrix for two samples is |
|
183 |
+calculated. This is calculated based on a scoring function and takes |
|
184 |
+into account the similarity in retention time and in the similarity of |
|
185 |
+the apex spectra, according to: |
|
186 |
+\[ |
|
187 |
+S_{ij}(D) = \frac{\sum_{k=1}^K x_{ik} y_{jk}}{\sqrt{ \sum_{k=1}^K |
|
188 |
+ x_{ik}^2 \cdot \sum_{k=1}^K y_{jk}^2 } } \cdot \exp \left( - |
|
189 |
+ \frac{1}{2} \frac{(t_i-t_j)^2}{D^2} \right) |
|
190 |
+\] |
|
191 |
+\noindent where $i$ is the index of the peak in the first sample and |
|
192 |
+$j$ is the index of the peak in the second sample, $\mathbf{x}_i$ and |
|
193 |
+$\mathbf{y}_j$ are the spectra vectors and $t_i$ and $t_j$ are their |
|
194 |
+respective retention times. As you can see, there are two components |
|
195 |
+to the similarity: spectra similarity (left term) and similarity in |
|
196 |
+retention time (right term). Of course, other metrics for spectra |
|
197 |
+similarity are feasible. Ask the author if you want to see other |
|
198 |
+metrices implemented. We have some non-optimized code for a few |
|
199 |
+alternative metrics. |
|
200 |
+ |
|
201 |
+The peak alignment algorithm, much like sequence alignments, requires |
|
202 |
+a \texttt{gap} parameter to be set, here a number between 0 and 1. A |
|
203 |
+high gap penalty discourages gaps when matching the two lists of peaks |
|
204 |
+and a low gap penalty allows gaps at a very low {\em cost}. We find |
|
205 |
+that a gap penalty in the middle range (0.4-0.6) works well for GC-MS |
|
206 |
+peak matching. Another parameter, \texttt{D}, modulates the impact of |
|
207 |
+the difference in retention time penalty. A large value for |
|
208 |
+\texttt{D} essentially eliminates the effect. Generally, we set this |
|
209 |
+parameter to be a bit larger than the average width of a peak, |
|
210 |
+allowing a little flexibility for retention time shifts between |
|
211 |
+samples. Keep in mind the \texttt{D} parameter should be set on the |
|
212 |
+scale (i.e. seconds or minutes) of the \texttt{peaksrt} slot of the |
|
213 |
+\texttt{peaksDataset} object. The next example shows the effect of |
|
214 |
+the \texttt{gap} and \texttt{D} penalty on the matching of a small |
|
215 |
+ranges of peaks. |
|
216 |
+ |
|
217 |
+<<pairwisealignexample, fig.width=7, fig.height=7>>= |
|
218 |
+Ds <- c(0.1, 10, 0.1, 0.1) |
|
219 |
+gaps <- c(0.5, 0.5, 0.1, 0.9) |
|
220 |
+par(mfrow=c(2,2), mai=c(0.8466,0.4806,0.4806,0.1486)) |
|
221 |
+for(i in 1:4){ |
|
222 |
+ pa <- peaksAlignment(pd@peaksdata[[1]], pd@peaksdata[[2]], |
|
223 |
+ pd@peaksrt[[1]], pd@peaksrt[[2]], D=Ds[i], |
|
224 |
+ gap=gaps[i], metric=1, type=1, compress = FALSE) |
|
225 |
+ plotAlignment(pa, xlim=c(0, 17), ylim=c(0, 16), matchCol="yellow", |
|
226 |
+ main=paste("D=", Ds[i], " gap=", gaps[i], sep="")) |
|
227 |
+} |
|
228 |
+@ |
|
229 |
+ |
|
230 |
+You might ask: is the flagme package useful without peak detection |
|
231 |
+results? Possibly. There have been some developments in alignment |
|
232 |
+(generally on LC-MS proteomics experiments) without peak/feature |
|
233 |
+detection, such as Prince et al. 2006, where a very similar dynamic |
|
234 |
+programming is used for a pairwise alignment. We have experimented |
|
235 |
+with alignments without using the peaks, but do not have any |
|
236 |
+convincing results. It does introduce a new set of challenges in terms |
|
237 |
+of highlighting differentially abundant metabolites. However, in the |
|
238 |
+\texttt{peaksAlignment} routine above (and those mentioned below), you |
|
239 |
+can set \texttt{usePeaks=FALSE} in order to do {\em scan}-based |
|
240 |
+alignments instead of {\em peak}-based alignments. In addition, the |
|
241 |
+\texttt{flagme} package may be useful simply for its bare-bones |
|
242 |
+dynamic programming algorithm. |
|
243 |
+ |
|
244 |
+ |
|
245 |
+\subsection{Normalizing retention time score to drift estimates} |
|
246 |
+In what is mentioned above for pairwise alignments, we are penalizing |
|
247 |
+for differences in retention times that are non-zero. But, as you can |
|
248 |
+see from the TICs, some differences in retention time are |
|
249 |
+consistent. For example, all of the peaks from sample |
|
250 |
+\texttt{0709\_485} elute at later times than peaks from sample |
|
251 |
+\texttt{0709\_496}. We should be able to estimate this drift and |
|
252 |
+normalize the time penalty to that estimate, instead of penalizing to |
|
253 |
+zero. That is, we should replace $t_i-t_j$ with $t_i-t_j-\hat{d}_{ij}$ |
|
254 |
+where $\hat{d}_{ij}$ is the expected drift between peak $i$ of the |
|
255 |
+first sample and peak $j$ of the second sample. |
|
256 |
+ |
|
257 |
+More details coming soon. |
|
258 |
+ |
|
259 |
+ |
|
260 |
+\subsection{Imputing location of undetected peaks} |
|
261 |
+One goal of the alignment leading into downstream data analyses is the |
|
262 |
+generation of a table of abundances for each metabolite across all |
|
263 |
+samples. As you can see from the TICs above, there are some low |
|
264 |
+intensity peaks that fall below detection in some but not all |
|
265 |
+samples. Our view is that instead of inserting arbitrary low constants |
|
266 |
+(such as 0 or half the detection limit) or imputing the intensities |
|
267 |
+post-hoc or having missing data in the data matrices, it is best to |
|
268 |
+return to the area of the where the peak should be and give some kind |
|
269 |
+of abundance. The alignments themselves are rich in information with |
|
270 |
+respect to the location of undetected peaks. We feel this is a more |
|
271 |
+conservative and statistically valid approach than introducing |
|
272 |
+arbitrary values. |
|
273 |
+ |
|
274 |
+More details coming soon. |
|
275 |
+ |
|
276 |
+ |
|
277 |
+\section{Multiple alignment of several experimental groups} |
|
278 |
+Next, we discuss the multiple alignment of peaks across many |
|
279 |
+samples. With replicates, we typically do an alignment within |
|
280 |
+replicates, then combine these together into a summarized form. This |
|
281 |
+cuts down on the computational cost. For example, consider 2 sets of |
|
282 |
+samples, each with 5 replicates. Aligning first within replicates |
|
283 |
+requires 10+10+1 total alignments whereas an all-pairwise alignment |
|
284 |
+requires 45 pairwise alignments. In addition, this allows some |
|
285 |
+flexibility in setting different gap and distance penalty parameters |
|
286 |
+for the {\em within} alignment and {\em between} alignment. An |
|
287 |
+example follows. |
|
288 |
+ |
|
289 |
+<<multiplealignment>>= |
|
290 |
+print(targets) |
|
291 |
+ma <- multipleAlignment(pd, group=targets$Group, wn.gap=0.5, wn.D=.05, |
|
292 |
+ bw.gap=.6, bw.D=0.05, usePeaks=TRUE, filterMin=1, |
|
293 |
+ df=50, verbose=FALSE, metric = 1, type = 1) # bug |
|
294 |
+ma |
|
295 |
+@ |
|
296 |
+ |
|
297 |
+If you set \texttt{verbose=TRUE}, many nitty-gritty details of the |
|
298 |
+alignment procedure are given. Next, we can take the alignment |
|
299 |
+results and overlay it onto the TICs, allowing a visual inspection. |
|
300 |
+ |
|
301 |
+<<multiplealignmentfig,fig.width=9,fig.height=7>>= |
|
302 |
+plotChrom(pd, rtrange=c(7.5,8.5), runs=ma@betweenAlignment@runs, |
|
303 |
+ mind=ma@betweenAlignment@ind, plotPeaks=TRUE, |
|
304 |
+ plotPeakLabels=TRUE, max.near=8, how.near=.5, |
|
305 |
+ col=rep(c("blue","red","black"), each=3)) |
|
306 |
+@ |
|
307 |
+ |
|
308 |
+ |
|
309 |
+% \section{Correlation Alignment algorithm} |
|
310 |
+% Another approach, represented by the \texttt{correlationAlignment} |
|
311 |
+% function, is to use a modified form of the Pearson correlation |
|
312 |
+% algorithm. After the correlation between two samples is calculated, a |
|
313 |
+% penalization coefficient, based on the retention time differences, is |
|
314 |
+% applied to the result. It is also possible to set a retention time |
|
315 |
+% range in which the penalization is 0, this because in gas |
|
316 |
+% chromatography we can have a little deviation in the retention time of |
|
317 |
+% the metabolite so, based on the experimental data, we can choose the |
|
318 |
+% retention time window for the penalization coefficient being applied. |
|
319 |
+ |
|
320 |
+<<correlationAlignment, eval=FALSE>>= |
|
321 |
+mp <- correlationAlignment(object=pd.2, thr=0.85, D=20, penality=0.2, |
|
322 |
+ normalize=TRUE, minFilter=1) |
|
323 |
+mp |
|
324 |
+@ |
|
325 |
+ |
|
326 |
+% \noindent where \texttt{thr} represent correlation threshold from 0 |
|
327 |
+% (min) to 1 (max); \texttt{D} represent the retention time window in |
|
328 |
+% seconds; \texttt{penality} represent the penality inflicted to a match |
|
329 |
+% between two peaks when the retention time difference exceed the |
|
330 |
+% parameter \texttt{D}; \texttt{normalize} is about the peak |
|
331 |
+% normalization-to-100 before the correlation is calculated; |
|
332 |
+% \texttt{minFilter} give the opportunity to exclude from the resulting |
|
333 |
+% correlation matrix each feature that in represented in our samples |
|
334 |
+% less time than this value. The value of minFilter must be smaller than |
|
335 |
+% the number of samples. |
|
336 |
+ |
|
337 |
+% The correlation-based peak alignment for multiple GC-MS |
|
338 |
+% peak lists uses a center-star technique to the alignment of the |
|
339 |
+% peaks. The combination of the \texttt{D} and \texttt{penality} parameters |
|
340 |
+% allow the users to force the algorithm to match the peaks close to the |
|
341 |
+% reference. The \texttt{thr} parameter control the matching factor. |
|
342 |
+ |
|
343 |
+ |
|
344 |
+\subsection{Gathering results} |
|
345 |
+The alignment results can be extracted from the \texttt{multipleAlignment} |
|
346 |
+object as: |
|
347 |
+<<multiplealignmentres>>= |
|
348 |
+ma@betweenAlignment@runs |
|
349 |
+ma@betweenAlignment@ind |
|
350 |
+@ |
|
351 |
+ |
|
352 |
+\noindent This table would suggest that matched peak \texttt{8} (see |
|
353 |
+numbers below the TICs in the figure above) corresponds to detected |
|
354 |
+peaks \texttt{9, 12, 11} in runs \texttt{4, 5, 6} and so on, same as |
|
355 |
+shown in the above plot. |
|
356 |
+ |
|
357 |
+In addition, you can gather a list of all the merged peaks with the |
|
358 |
+\texttt{gatherInfo} function, giving elements for the retention times, |
|
359 |
+the detected fragment ions and their intensities. The example below |
|
360 |
+also shows the how to construct a table of retention times of the |
|
361 |
+matched peaks (No attempt is made here to adjust retention times onto |
|
362 |
+a common scale. Instead, the peaks are matched to each other on their |
|
363 |
+original scale). For example: |
|
364 |
+ |
|
365 |
+<<alignmentres>>= |
|
366 |
+outList <- gatherInfo(pd,ma) |
|
367 |
+outList[[8]] |
|
368 |
+rtmat <- matrix(unlist(lapply(outList,.subset,"rt"), use.names=FALSE), |
|
369 |
+ nr=length(outList), byrow=TRUE) |
|
370 |
+colnames(rtmat) <- names(outList[[1]]$rt); rownames(rtmat) <- 1:nrow(rtmat) |
|
371 |
+round(rtmat, 3) |
|
372 |
+@ |
|
373 |
+ |
|
374 |
+ |
|
375 |
+\section{Future improvements and extension} |
|
376 |
+There are many procedures that we have implemented in our |
|
377 |
+investigation of GC-MS data, but have not made part of the package just |
|
378 |
+yet. Some of the most useful procedures will be released, such as: |
|
379 |
+ |
|
380 |
+\begin{enumerate} |
|
381 |
+\item Parsers for other peak detection algorithms (e.g. % XCMS, |
|
382 |
+ MzMine) and parsers for other alignment procedures |
|
383 |
+ (e.g. SpectConnect) and perhaps retention indices procedures. |
|
384 |
+\item More convenient access to the alignment information and |
|
385 |
+ abundance table. |
|
386 |
+\item Statistical analysis of differential metabolite abundance. |
|
387 |
+\item Fragment-level analysis, an alternative method to summarize |
|
388 |
+ abundance across all detected fragments of a metabolite peak. |
|
389 |
+\end{enumerate} |
|
390 |
+ |
|
391 |
+\section{References} |
|
392 |
+See the following for further details: |
|
393 |
+ |
|
394 |
+\begin{enumerate} |
|
395 |
+\item Robinson MD. {\em Methods for the analysis of gas chromatography |
|
396 |
+ - mass spectrometry data.} {\bf Ph.D. Thesis}. October 2008. |
|
397 |
+ Department of Medical Biology (Walter and Eliza Hall Institute of |
|
398 |
+ Medical Research), University of Melbourne. |
|
399 |
+\item Robinson MD, De Souza DP, Keen WW, Saunders EC, McConville MJ, |
|
400 |
+ Speed TP, Liki\'{c} VA. (2007) {\em A dynamic programming approach |
|
401 |
+ for the alignment of signal peaks in multiple gas |
|
402 |
+ chromatography-mass spectrometry experiments.} {\bf BMC |
|
403 |
+ Bioinformatics}. 8:419. |
|
404 |
+\item Prince JT, Marcotte EM (2006) {\em Chromatographic alignment of |
|
405 |
+ ESI-LC-MS proteomics data sets by ordered bijective interpolated |
|
406 |
+ warping}. {\bf Anal Chem}. 78(17):6140-52. |
|
407 |
+\end{enumerate} |
|
408 |
+ |
|
409 |
+\section{This vignette was built with/at ...} |
|
410 |
+ |
|
411 |
+<<session>>= |
|
412 |
+sessionInfo() |
|
413 |
+date() |
|
414 |
+@ |
|
415 |
+ |
|
416 |
+\end{document} |
... | ... |
@@ -16,7 +16,7 @@ |
16 | 16 |
|
17 | 17 |
\begin{document} |
18 | 18 |
|
19 |
-%\VignetteIndexEntry{Using flagme -- Fragment-level analysis of GC-MS-based metabolomics data} |
|
19 |
+%\VignetteIndexEntry{\texttt{flagme}: Fragment-level analysis of \\ GC-MS-based metabolomics data} |
|
20 | 20 |
|
21 | 21 |
\title{\texttt{flagme}: Fragment-level analysis of \\ GC-MS-based |
22 | 22 |
metabolomics data} |
... | ... |
@@ -105,9 +105,15 @@ according to their retention time using CAMERA and attaches them to an |
105 | 105 |
already created \texttt{peaksDataset} object: |
106 | 106 |
|
107 | 107 |
<<addXCMS>>= |
108 |
-pd.2 <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,8.5)) |
|
109 |
-pd.2 <- addXCMSPeaks(cdfFiles[1:3], pd.2, peakPicking=c('mF'), |
|
110 |
- snthresh=3, fwhm=4, step=1, steps=2, mzdiff=0.5) |
|
108 |
+pd.2 <- peaksDataset(cdfFiles[1:3], mz = seq(50, 550), rtrange = c(7.5, 8.5)) |
|
109 |
+cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40), |
|
110 |
+ prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0, |
|
111 |
+ extendLengthMSW = TRUE, mzCenterFun = "wMean") |
|
112 |
+mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5) |
|
113 |
+pd.2 <- addXCMSPeaks(cdfFiles[1:3], pd.2, settings = mfp, minintens = 100, |
|
114 |
+ multipleMatchedFilter = FALSE, multipleMatchedFilterParam = |
|
115 |
+ list(fwhm = c(5, 10, 20), rt_abs = 2, mz_abs = 0.1) |
|
116 |
+ ) |
|
111 | 117 |
pd.2 |
112 | 118 |
@ |
113 | 119 |
|
... | ... |
@@ -117,7 +123,7 @@ Regardless of platform and peak detection algorithm, a useful |
117 | 123 |
visualization of a set of samples is the set of total ion currents |
118 | 124 |
(TIC), or extracted ion currents (XICs). To view TICs, you can call: |
119 | 125 |
|
120 |
-<<plotexample1, fig=TRUE, width=9, height=7>>= |
|
126 |
+<<plotexample1, fig.width=9, fig.height=7>>= |
|
121 | 127 |
plotChrom(pd, rtrange=c(7.5,8.5), plotPeaks=TRUE, plotPeakLabels=TRUE, |
122 | 128 |
max.near=8, how.near=0.5, col=rep(c("blue","red","black"), each=3)) |
123 | 129 |
@ |
... | ... |
@@ -142,7 +148,7 @@ heatmap of intensity. Such plots can be enlightening, especially when |
142 | 148 |
peak detection results are overlaid. For example (with detected |
143 | 149 |
fragment peaks from AMDIS shown in white): |
144 | 150 |
|
145 |
-<<plotexample2,fig=TRUE,width=9,height=7>>= |
|
151 |
+<<plotexample2,fig.width=9,fig.height=7>>= |
|
146 | 152 |
r <- 1 |
147 | 153 |
plotImage(pd, run=r, rtrange=c(7.5,8.5), main="") |
148 | 154 |
v <- which(pd@peaksdata[[r]] > 0, arr.ind=TRUE) # find detected peaks |
... | ... |
@@ -208,7 +214,7 @@ scale (i.e. seconds or minutes) of the \texttt{peaksrt} slot of the |
208 | 214 |
the \texttt{gap} and \texttt{D} penalty on the matching of a small |
209 | 215 |
ranges of peaks. |
210 | 216 |
|
211 |
-<<pairwisealignexample, fig=TRUE, width=7, height=7>>= |
|
217 |
+<<pairwisealignexample, fig.width=7, fig.height=7>>= |
|
212 | 218 |
Ds <- c(0.1, 10, 0.1, 0.1) |
213 | 219 |
gaps <- c(0.5, 0.5, 0.1, 0.9) |
214 | 220 |
par(mfrow=c(2,2), mai=c(0.8466,0.4806,0.4806,0.1486)) |
... | ... |
@@ -292,7 +298,7 @@ If you set \texttt{verbose=TRUE}, many nitty-gritty details of the |
292 | 298 |
alignment procedure are given. Next, we can take the alignment |
293 | 299 |
results and overlay it onto the TICs, allowing a visual inspection. |
294 | 300 |
|
295 |
-<<multiplealignmentfig,fig=TRUE,width=9,height=7>>= |
|
301 |
+<<multiplealignmentfig,fig.width=9,fig.height=7>>= |
|
296 | 302 |
plotChrom(pd, rtrange=c(7.5,8.5), runs=ma@betweenAlignment@runs, |
297 | 303 |
mind=ma@betweenAlignment@ind, plotPeaks=TRUE, |
298 | 304 |
plotPeakLabels=TRUE, max.near=8, how.near=.5, |
300 | 306 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,965 @@ |
1 |
+\documentclass{article}\usepackage[]{graphicx}\usepackage[]{color} |
|
2 |
+% maxwidth is the original width if it is less than linewidth |
|
3 |
+% otherwise use linewidth (to make sure the graphics do not exceed the margin) |
|
4 |
+\makeatletter |
|
5 |
+\def\maxwidth{ % |
|
6 |
+ \ifdim\Gin@nat@width>\linewidth |
|
7 |
+ \linewidth |
|
8 |
+ \else |
|
9 |
+ \Gin@nat@width |
|
10 |
+ \fi |
|
11 |
+} |
|
12 |
+\makeatother |
|
13 |
+ |
|
14 |
+\definecolor{fgcolor}{rgb}{0.345, 0.345, 0.345} |
|
15 |
+\newcommand{\hlnum}[1]{\textcolor[rgb]{0.686,0.059,0.569}{#1}}% |
|
16 |
+\newcommand{\hlstr}[1]{\textcolor[rgb]{0.192,0.494,0.8}{#1}}% |
|
17 |
+\newcommand{\hlcom}[1]{\textcolor[rgb]{0.678,0.584,0.686}{\textit{#1}}}% |
|
18 |
+\newcommand{\hlopt}[1]{\textcolor[rgb]{0,0,0}{#1}}% |
|
19 |
+\newcommand{\hlstd}[1]{\textcolor[rgb]{0.345,0.345,0.345}{#1}}% |
|
20 |
+\newcommand{\hlkwa}[1]{\textcolor[rgb]{0.161,0.373,0.58}{\textbf{#1}}}% |
|
21 |
+\newcommand{\hlkwb}[1]{\textcolor[rgb]{0.69,0.353,0.396}{#1}}% |
|
22 |
+\newcommand{\hlkwc}[1]{\textcolor[rgb]{0.333,0.667,0.333}{#1}}% |
|
23 |
+\newcommand{\hlkwd}[1]{\textcolor[rgb]{0.737,0.353,0.396}{\textbf{#1}}}% |
|
24 |
+\let\hlipl\hlkwb |
|
25 |
+ |
|
26 |
+\usepackage{framed} |
|
27 |
+\makeatletter |
|
28 |
+\newenvironment{kframe}{% |
|
29 |
+ \def\at@end@of@kframe{}% |
|
30 |
+ \ifinner\ifhmode% |
|
31 |
+ \def\at@end@of@kframe{\end{minipage}}% |
|
32 |
+ \begin{minipage}{\columnwidth}% |
|
33 |
+ \fi\fi% |
|
34 |
+ \def\FrameCommand##1{\hskip\@totalleftmargin \hskip-\fboxsep |
|
35 |
+ \colorbox{shadecolor}{##1}\hskip-\fboxsep |
|
36 |
+ % There is no \\@totalrightmargin, so: |
|
37 |
+ \hskip-\linewidth \hskip-\@totalleftmargin \hskip\columnwidth}% |
|
38 |
+ \MakeFramed {\advance\hsize-\width |
|
39 |
+ \@totalleftmargin\z@ \linewidth\hsize |
|
40 |
+ \@setminipage}}% |
|
41 |
+ {\par\unskip\endMakeFramed% |
|
42 |
+ \at@end@of@kframe} |
|
43 |
+\makeatother |
|
44 |
+ |
|
45 |
+\definecolor{shadecolor}{rgb}{.97, .97, .97} |
|
46 |
+\definecolor{messagecolor}{rgb}{0, 0, 0} |
|
47 |
+\definecolor{warningcolor}{rgb}{1, 0, 1} |
|
48 |
+\definecolor{errorcolor}{rgb}{1, 0, 0} |
|
49 |
+\newenvironment{knitrout}{}{} % an empty environment to be redefined in TeX |
|
50 |
+ |
|
51 |
+\usepackage{alltt} |
|
52 |
+ |
|
53 |
+\usepackage{amsmath} |
|
54 |
+\usepackage{amscd} |
|
55 |
+\usepackage[tableposition=top]{caption} |
|
56 |
+\usepackage{ifthen} |
|
57 |
+\usepackage[utf8]{inputenc} |
|
58 |
+\topmargin 0in |
|
59 |
+\headheight 0in |
|
60 |
+\headsep 0in |
|
61 |
+\oddsidemargin 0in |
|
62 |
+\evensidemargin 0in |
|
63 |
+\textwidth 176mm |
|
64 |
+\textheight 215mm |
|
65 |
+\IfFileExists{upquote.sty}{\usepackage{upquote}}{} |
|
66 |
+\begin{document} |
|
67 |
+ |
|
68 |
+%\VignetteIndexEntry{\texttt{flagme}: Fragment-level analysis of \\ GC-MS-based metabolomics data} |
|
69 |
+ |
|
70 |
+\title{\texttt{flagme}: Fragment-level analysis of \\ GC-MS-based |
|
71 |
+ metabolomics data} |
|
72 |
+\author{Mark Robinson \\ \texttt{[email protected]} \\ Riccardo |
|
73 |
+ Romoli \\ \texttt{[email protected]}} |
|
74 |
+\maketitle |
|
75 |
+ |
|
76 |
+ |
|
77 |
+\section{Introduction} |
|
78 |
+\noindent This document gives a brief introduction to the |
|
79 |
+\texttt{flagme} package, which is designed to process, visualise and |
|
80 |
+statistically analyze sets of GC-MS samples. The ideas discussed here |
|
81 |
+were originally designed with GC-MS-based metabolomics in mind, but |
|
82 |
+indeed some of the methods and visualizations could be useful for |
|
83 |
+LC-MS data sets. The {\em fragment-level analysis} though, takes |
|
84 |
+advantage of the rich fragmentation patterns observed from electron |
|
85 |
+interaction (EI) ionization. |
|
86 |
+ |
|
87 |
+There are many aspects of data processing for GC-MS data. Generally, |
|
88 |
+algorithms are run separately on each sample to detect features, or |
|
89 |
+{\em peaks} (e.g. AMDIS). Due to retention time shifts from |
|
90 |
+run-to-run, an alignment algorithm is employed to allow the matching |
|
91 |
+of the same feature across multiple samples. Alternatively, if known |
|
92 |
+standards are introduced to the samples, retention {\em indices} can |
|
93 |
+be computed for each peak and used for alignment. After peaks are |
|
94 |
+matched across all samples, further processing steps are employed to |
|
95 |
+create a matrix of abundances, leading into detecting differences in |
|
96 |
+abundance. |
|
97 |
+ |
|
98 |
+Many of these data processing steps are prone to errors and they often |
|
99 |
+tend to be black boxes. But, with effective exploratory data |
|
100 |
+analysis, many of the pitfalls can be avoided and any problems can be |
|
101 |
+fixed before proceeding to the downstream statistical analysis. The |
|
102 |
+package provides various visualizations to ensure the methods applied |
|
103 |
+are not black boxes. |
|
104 |
+ |
|
105 |
+The \texttt{flagme} package gives a complete suite of methods to go |
|
106 |
+through all common stages of data processing. In addition, R is |
|
107 |
+especially well suited to the downstream data analysis tasks since it |
|
108 |
+is very rich in analysis tools and has excellent visualization |
|
109 |
+capabilities. In addition, it is freely available |
|
110 |
+(\texttt{www.r-project.org}), extensible and there is a growing |
|
111 |
+community of users and developers. For routine analyses, graphical |
|
112 |
+user interfaces could be designed. |
|
113 |
+ |
|
114 |
+ |
|
115 |
+\section{Reading and visualizing GC-MS data} |
|
116 |
+To run these examples, you must have the \texttt{gcspikelite} package |
|
117 |
+installed. This data package contains several GC-MS samples from a |
|
118 |
+spike-in experiment we designed to interrogate data processing |
|
119 |
+methods. So, first, we load the packages: |
|
120 |
+ |
|
121 |
+\begin{knitrout} |
|
122 |
+\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} |
|
123 |
+ |
|
124 |
+ |
|
125 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: gcspikelite}} |
|
126 |
+ |
|
127 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: xcms}} |
|
128 |
+ |
|
129 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: BiocParallel}} |
|
130 |
+ |
|
131 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: MSnbase}} |
|
132 |
+ |
|
133 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: BiocGenerics}} |
|
134 |
+ |
|
135 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: parallel}} |
|
136 |
+ |
|
137 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# \\\#\# Attaching package: 'BiocGenerics'}} |
|
138 |
+ |
|
139 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# The following objects are masked from 'package:parallel':\\\#\# \\\#\# \ \ \ \ clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,\\\#\# \ \ \ \ clusterExport, clusterMap, parApply, parCapply, parLapply,\\\#\# \ \ \ \ parLapplyLB, parRapply, parSapply, parSapplyLB}} |
|
140 |
+ |
|
141 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# The following objects are masked from 'package:stats':\\\#\# \\\#\# \ \ \ \ IQR, mad, sd, var, xtabs}} |
|
142 |
+ |
|
143 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# The following objects are masked from 'package:base':\\\#\# \\\#\# \ \ \ \ anyDuplicated, append, as.data.frame, basename, cbind, colnames,\\\#\# \ \ \ \ dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,\\\#\# \ \ \ \ grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,\\\#\# \ \ \ \ order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,\\\#\# \ \ \ \ rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,\\\#\# \ \ \ \ union, unique, unsplit, which.max, which.min}} |
|
144 |
+ |
|
145 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: Biobase}} |
|
146 |
+ |
|
147 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Welcome to Bioconductor\\\#\# \\\#\# \ \ \ \ Vignettes contain introductory material; view with\\\#\# \ \ \ \ 'browseVignettes()'. To cite Bioconductor, see\\\#\# \ \ \ \ 'citation("{}Biobase"{})', and for packages 'citation("{}pkgname"{})'.}} |
|
148 |
+ |
|
149 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: mzR}} |
|
150 |
+ |
|
151 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: Rcpp}} |
|
152 |
+ |
|
153 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: S4Vectors}} |
|
154 |
+ |
|
155 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: stats4}} |
|
156 |
+ |
|
157 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# \\\#\# Attaching package: 'S4Vectors'}} |
|
158 |
+ |
|
159 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# The following objects are masked from 'package:base':\\\#\# \\\#\# \ \ \ \ expand.grid, I, unname}} |
|
160 |
+ |
|
161 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: ProtGenerics}} |
|
162 |
+ |
|
163 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# \\\#\# Attaching package: 'ProtGenerics'}} |
|
164 |
+ |
|
165 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# The following object is masked from 'package:stats':\\\#\# \\\#\# \ \ \ \ smooth}} |
|
166 |
+ |
|
167 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# \\\#\# This is MSnbase version 2.18.0 \\\#\# \ \ Visit https://lgatto.github.io/MSnbase/ to get started.}} |
|
168 |
+ |
|
169 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# \\\#\# Attaching package: 'MSnbase'}} |
|
170 |
+ |
|
171 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# The following object is masked from 'package:base':\\\#\# \\\#\# \ \ \ \ trimws}} |
|
172 |
+ |
|
173 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# \\\#\# This is xcms version 3.14.1}} |
|
174 |
+ |
|
175 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# \\\#\# Attaching package: 'xcms'}} |
|
176 |
+ |
|
177 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# The following object is masked from 'package:stats':\\\#\# \\\#\# \ \ \ \ sigma}} |
|
178 |
+ |
|
179 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Loading required package: CAMERA}}\end{kframe} |
|
180 |
+\end{knitrout} |
|
181 |
+ |
|
182 |
+ |
|
183 |
+To load the data and corresponding peak detection results, we simply |
|
184 |
+create vectors of the file-names and create a \texttt{peakDataset} |
|
185 |
+object. Note that we can speed up the import time by setting the |
|
186 |
+retention time range to a subset of the elution, as below: |
|
187 |
+ |
|
188 |
+\begin{knitrout} |
|
189 |
+\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} |
|
190 |
+\begin{alltt} |
|
191 |
+\hlstd{gcmsPath} \hlkwb{<-} \hlkwd{paste}\hlstd{(}\hlkwd{find.package}\hlstd{(}\hlstr{"gcspikelite"}\hlstd{),} \hlstr{"data"}\hlstd{,} \hlkwc{sep}\hlstd{=}\hlstr{"/"}\hlstd{)} |
|
192 |
+\hlkwd{data}\hlstd{(targets)} |
|
193 |
+\hlstd{cdfFiles} \hlkwb{<-} \hlkwd{paste}\hlstd{(gcmsPath, targets}\hlopt{$}\hlstd{FileName,} \hlkwc{sep}\hlstd{=}\hlstr{"/"}\hlstd{)} |
|
194 |
+\hlstd{eluFiles} \hlkwb{<-} \hlkwd{gsub}\hlstd{(}\hlstr{"CDF"}\hlstd{,} \hlstr{"ELU"}\hlstd{, cdfFiles)} |
|
195 |
+\hlstd{pd} \hlkwb{<-} \hlkwd{peaksDataset}\hlstd{(cdfFiles,} \hlkwc{mz}\hlstd{=}\hlkwd{seq}\hlstd{(}\hlnum{50}\hlstd{,}\hlnum{550}\hlstd{),} \hlkwc{rtrange}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{7.5}\hlstd{,}\hlnum{8.5}\hlstd{))} |
|
196 |
+\end{alltt} |
|
197 |
+\begin{verbatim} |
|
198 |
+## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_468.CDF |
|
199 |
+\end{verbatim} |
|
200 |
+ |
|
201 |
+ |
|
202 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{verbatim} |
|
203 |
+## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_474.CDF |
|
204 |
+\end{verbatim} |
|
205 |
+ |
|
206 |
+ |
|
207 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{verbatim} |
|
208 |
+## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_475.CDF |
|
209 |
+\end{verbatim} |
|
210 |
+ |
|
211 |
+ |
|
212 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{verbatim} |
|
213 |
+## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_485.CDF |
|
214 |
+\end{verbatim} |
|
215 |
+ |
|
216 |
+ |
|
217 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{verbatim} |
|
218 |
+## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_493.CDF |
|
219 |
+\end{verbatim} |
|
220 |
+ |
|
221 |
+ |
|
222 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{verbatim} |
|
223 |
+## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_496.CDF |
|
224 |
+\end{verbatim} |
|
225 |
+ |
|
226 |
+ |
|
227 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{verbatim} |
|
228 |
+## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_470.CDF |
|
229 |
+\end{verbatim} |
|
230 |
+ |
|
231 |
+ |
|
232 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{verbatim} |
|
233 |
+## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_471.CDF |
|
234 |
+\end{verbatim} |
|
235 |
+ |
|
236 |
+ |
|
237 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{verbatim} |
|
238 |
+## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_479.CDF |
|
239 |
+\end{verbatim} |
|
240 |
+ |
|
241 |
+ |
|
242 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{alltt} |
|
243 |
+\hlstd{pd} \hlkwb{<-} \hlkwd{addAMDISPeaks}\hlstd{(pd, eluFiles)} |
|
244 |
+\end{alltt} |
|
245 |
+\begin{verbatim} |
|
246 |
+## Reading retention time range: 7.500133 8.499917 |
|
247 |
+## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_468.ELU ... Done. |
|
248 |
+## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_474.ELU ... Done. |
|
249 |
+## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_475.ELU ... Done. |
|
250 |
+## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_485.ELU ... Done. |
|
251 |
+## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_493.ELU ... Done. |
|
252 |
+## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_496.ELU ... Done. |
|
253 |
+## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_470.ELU ... Done. |
|
254 |
+## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_471.ELU ... Done. |
|
255 |
+## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_479.ELU ... Done. |
|
256 |
+\end{verbatim} |
|
257 |
+\begin{alltt} |
|
258 |
+\hlstd{pd} |
|
259 |
+\end{alltt} |
|
260 |
+\begin{verbatim} |
|
261 |
+## An object of class "peaksDataset" |
|
262 |
+## 9 samples: 0709_468 0709_474 0709_475 0709_485 0709_493 0709_496 0709_470 0709_471 0709_479 |
|
263 |
+## 501 m/z bins - range: ( 50 550 ) |
|
264 |
+## scans: 175 175 175 175 175 174 175 175 175 |
|
265 |
+## peaks: 24 23 26 20 27 24 24 25 21 |
|
266 |
+\end{verbatim} |
|
267 |
+\end{kframe} |
|
268 |
+\end{knitrout} |
|
269 |
+ |
|
270 |
+Here, we have added peaks from AMDIS, a well known and mature |
|
271 |
+algorithm for deconvolution of GC-MS data. For GC-TOF-MS data, we have |
|
272 |
+implemented a parser for the \texttt{ChromaTOF} output (see the |
|
273 |
+analogous \texttt{addChromaTOFPeaks} function). The |
|
274 |
+\texttt{addXCMSPeaks} allows to use all the XCMS peak-picking |
|
275 |
+algorithms; using this approach it is also possible to elaborate the |
|
276 |
+raw data file from within R instead of using an external software. |
|
277 |
+%% Support for XMCS or MzMine may be added in the future. Ask the author |
|
278 |
+%% if another detection result format is desired as the parsers are |
|
279 |
+%% generally easy to design. |
|
280 |
+In particular the function reads the raw data using XCMS, group each extracted ion |
|
281 |
+according to their retention time using CAMERA and attaches them to an |
|
282 |
+already created \texttt{peaksDataset} object: |
|
283 |
+ |
|
284 |
+\begin{knitrout} |
|
285 |
+\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} |
|
286 |
+\begin{alltt} |
|
287 |
+\hlstd{pd.2} \hlkwb{<-} \hlkwd{peaksDataset}\hlstd{(cdfFiles[}\hlnum{1}\hlopt{:}\hlnum{3}\hlstd{],} \hlkwc{mz} \hlstd{=} \hlkwd{seq}\hlstd{(}\hlnum{50}\hlstd{,} \hlnum{550}\hlstd{),} \hlkwc{rtrange} \hlstd{=} \hlkwd{c}\hlstd{(}\hlnum{7.5}\hlstd{,} \hlnum{8.5}\hlstd{))} |
|
288 |
+\end{alltt} |
|
289 |
+\begin{verbatim} |
|
290 |
+## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_468.CDF |
|
291 |
+\end{verbatim} |
|
292 |
+ |
|
293 |
+ |
|
294 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{verbatim} |
|
295 |
+## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_474.CDF |
|
296 |
+\end{verbatim} |
|
297 |
+ |
|
298 |
+ |
|
299 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{verbatim} |
|
300 |
+## Reading /usr/local/lib/R/site-library/gcspikelite/data/0709_475.CDF |
|
301 |
+\end{verbatim} |
|
302 |
+ |
|
303 |
+ |
|
304 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}}\begin{alltt} |
|
305 |
+\hlstd{cwt} \hlkwb{<-} \hlstd{xcms}\hlopt{::}\hlkwd{CentWaveParam}\hlstd{(}\hlkwc{snthresh} \hlstd{=} \hlnum{3}\hlstd{,} \hlkwc{ppm} \hlstd{=} \hlnum{3000}\hlstd{,} \hlkwc{peakwidth} \hlstd{=} \hlkwd{c}\hlstd{(}\hlnum{3}\hlstd{,} \hlnum{40}\hlstd{),} |
|
306 |
+ \hlkwc{prefilter} \hlstd{=} \hlkwd{c}\hlstd{(}\hlnum{3}\hlstd{,} \hlnum{100}\hlstd{),} \hlkwc{fitgauss} \hlstd{=} \hlnum{FALSE}\hlstd{,} \hlkwc{integrate} \hlstd{=} \hlnum{2}\hlstd{,} \hlkwc{noise} \hlstd{=} \hlnum{0}\hlstd{,} |
|
307 |
+ \hlkwc{extendLengthMSW} \hlstd{=} \hlnum{TRUE}\hlstd{,} \hlkwc{mzCenterFun} \hlstd{=} \hlstr{"wMean"}\hlstd{)} |
|
308 |
+\hlstd{mfp} \hlkwb{<-} \hlstd{xcms}\hlopt{::}\hlkwd{MatchedFilterParam}\hlstd{(}\hlkwc{fwhm} \hlstd{=} \hlnum{10}\hlstd{,} \hlkwc{snthresh} \hlstd{=} \hlnum{5}\hlstd{)} |
|
309 |
+\hlstd{pd.2} \hlkwb{<-} \hlkwd{addXCMSPeaks}\hlstd{(cdfFiles[}\hlnum{1}\hlopt{:}\hlnum{3}\hlstd{], pd.2,} \hlkwc{settings} \hlstd{= mfp,} \hlkwc{minintens} \hlstd{=} \hlnum{100}\hlstd{,} |
|
310 |
+ \hlkwc{multipleMatchedFilter} \hlstd{=} \hlnum{FALSE}\hlstd{,} \hlkwc{multipleMatchedFilterParam} \hlstd{=} |
|
311 |
+ \hlkwd{list}\hlstd{(}\hlkwc{fwhm} \hlstd{=} \hlkwd{c}\hlstd{(}\hlnum{5}\hlstd{,} \hlnum{10}\hlstd{,} \hlnum{20}\hlstd{),} \hlkwc{rt_abs} \hlstd{=} \hlnum{2}\hlstd{,} \hlkwc{mz_abs} \hlstd{=} \hlnum{0.1}\hlstd{)} |
|
312 |
+ \hlstd{)} |
|
313 |
+\end{alltt} |
|
314 |
+ |
|
315 |
+ |
|
316 |
+{\ttfamily\noindent\itshape\color{messagecolor}{\#\# Create profile matrix with method 'bin' and step 1 ... OK}} |
|
317 |
+ |
|
318 |
+{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in xcmsSet(x, method = "{}centWave"{}, prefilter = c(5, 100), scanrange = scanRange, : Chromatographic peak detection failed for all files! The first error was: Error in .local(object, ...): unused arguments (settings = new("{}MatchedFilterParam"{}, binSize = 0.1, impute = "{}none"{}, baseValue = numeric(0), distance = numeric(0), fwhm = 10, sigma = 4.24664515033124, max = 5, snthresh = 5, steps = 2, mzdiff = 0.6, index = FALSE), minintens = 100, multipleMatchedFilter = FALSE, multipleMatchedFilterParam = list(c(5, 10, 20), 2, 0.1))}}\begin{alltt} |
|
319 |
+\hlstd{pd.2} |
|
320 |
+\end{alltt} |
|
321 |
+\begin{verbatim} |
|
322 |
+## An object of class "peaksDataset" |
|
323 |
+## 3 samples: 0709_468 0709_474 0709_475 |
|
324 |
+## 501 m/z bins - range: ( 50 550 ) |
|
325 |
+## scans: 175 175 175 |
|
326 |
+## peaks: |
|
327 |
+\end{verbatim} |
|
328 |
+\end{kframe} |
|
329 |
+\end{knitrout} |
|
330 |
+ |
|
331 |
+The possibility to work using computer cluster will be added in the future. |
|
332 |
+ |
|
333 |
+Regardless of platform and peak detection algorithm, a useful |
|
334 |
+visualization of a set of samples is the set of total ion currents |
|
335 |
+(TIC), or extracted ion currents (XICs). To view TICs, you can call: |
|
336 |
+ |
|
337 |
+\begin{knitrout} |
|
338 |
+\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} |
|
339 |
+\begin{alltt} |
|
340 |
+\hlkwd{plotChrom}\hlstd{(pd,} \hlkwc{rtrange}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{7.5}\hlstd{,}\hlnum{8.5}\hlstd{),} \hlkwc{plotPeaks}\hlstd{=}\hlnum{TRUE}\hlstd{,} \hlkwc{plotPeakLabels}\hlstd{=}\hlnum{TRUE}\hlstd{,} |
|
341 |
+ \hlkwc{max.near}\hlstd{=}\hlnum{8}\hlstd{,} \hlkwc{how.near}\hlstd{=}\hlnum{0.5}\hlstd{,} \hlkwc{col}\hlstd{=}\hlkwd{rep}\hlstd{(}\hlkwd{c}\hlstd{(}\hlstr{"blue"}\hlstd{,}\hlstr{"red"}\hlstd{,}\hlstr{"black"}\hlstd{),} \hlkwc{each}\hlstd{=}\hlnum{3}\hlstd{))} |
|
342 |
+\end{alltt} |
|
343 |
+ |
|
344 |
+ |
|
345 |
+{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in (function (classes, fdef, mtable) : unable to find an inherited method for function 'plotChrom' for signature '"{}peaksDataset"{}'}}\end{kframe} |
|
346 |
+\end{knitrout} |
|
347 |
+ |
|
348 |
+Note here the little {\em hashes} represent the detected peaks and are |
|
349 |
+labelled with an integer index. One of the main challenges is to match |
|
350 |
+these peak detections across several samples, given that the appear at |
|
351 |
+slightly different times in different runs. |
|
352 |
+ |
|
353 |
+For XICs, you need to give the indices (of \texttt{pd@mz}, the grid of |
|
354 |
+mass-to-charge values) that you want to plot through the |
|
355 |
+\texttt{mzind} argument. This could be a single ion |
|
356 |
+(e.g. \texttt{mzind=24}) or could be a range of indices if multiple |
|
357 |
+ions are of interest (e.g. \texttt{mzind=c(24,25,98,99)}). |
|
358 |
+ |
|
359 |
+There are several other features within the \texttt{plot} command on |
|
360 |
+\texttt{peaksDataset} objects that can be useful. See \texttt{?plot} |
|
361 |
+(and select the flagme version) for full details. |
|
362 |
+ |
|
363 |
+Another useful visualization, at least for individual samples, is a 2D |
|
364 |
+heatmap of intensity. Such plots can be enlightening, especially when |
|
365 |
+peak detection results are overlaid. For example (with detected |
|
366 |
+fragment peaks from AMDIS shown in white): |
|
367 |
+ |
|
368 |
+\begin{knitrout} |
|
369 |
+\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} |
|
370 |
+\begin{alltt} |
|
371 |
+\hlstd{r} \hlkwb{<-} \hlnum{1} |
|
372 |
+\hlkwd{plotImage}\hlstd{(pd,} \hlkwc{run}\hlstd{=r,} \hlkwc{rtrange}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{7.5}\hlstd{,}\hlnum{8.5}\hlstd{),} \hlkwc{main}\hlstd{=}\hlstr{""}\hlstd{)} |
|
373 |
+\hlstd{v} \hlkwb{<-} \hlkwd{which}\hlstd{(pd}\hlopt{@}\hlkwc{peaksdata}\hlstd{[[r]]} \hlopt{>} \hlnum{0}\hlstd{,} \hlkwc{arr.ind}\hlstd{=}\hlnum{TRUE}\hlstd{)} \hlcom{# find detected peaks} |
|
374 |
+\hlkwd{abline}\hlstd{(}\hlkwc{v}\hlstd{=pd}\hlopt{@}\hlkwc{peaksrt}\hlstd{[[r]])} |
|
375 |
+\hlkwd{points}\hlstd{(pd}\hlopt{@}\hlkwc{peaksrt}\hlstd{[[r]][v[,}\hlnum{2}\hlstd{]], pd}\hlopt{@}\hlkwc{mz}\hlstd{[v[,}\hlnum{1}\hlstd{]],} \hlkwc{pch}\hlstd{=}\hlnum{19}\hlstd{,} \hlkwc{cex}\hlstd{=}\hlnum{.6}\hlstd{,} \hlkwc{col}\hlstd{=}\hlstr{"white"}\hlstd{)} |
|
376 |
+\end{alltt} |
|
377 |
+\end{kframe} |
|
378 |
+\includegraphics[width=\maxwidth]{figure/plotexample2-1} |
|
379 |
+\end{knitrout} |
|
380 |
+ |
|
381 |
+ |
|
382 |
+\section{Pairwise alignment with dynamic programming algorithm} |
|
383 |
+One of the first challenges of GC-MS data is the matching of detected |
|
384 |
+peaks (i.e. metabolites) across several samples. Although gas |
|
385 |
+chromatography is quite robust, there can be some drift in the elution |
|
386 |
+time of the same analyte from run to run. We have devised a strategy, |
|
387 |
+based on dynamic programming, that takes into account both the |
|
388 |
+similarity in spectrum (at the apex of the called peak) and the |
|
389 |
+similarity in retention time, without requiring the identity of each |
|
390 |
+peak; this matching uses the data alone. If each sample gives a `peak |
|
391 |
+list' of the detected peaks (such as that from AMDIS that we have |
|
392 |
+attached to our \texttt{peaksDataset} object), the challenge is to |
|
393 |
+introduce gaps into these lists such that they are best aligned. From |
|
394 |
+this a matrix of retention times or a matrix of peak abundances can be |
|
395 |
+extracted for further statistical analysis, visualization and |
|
396 |
+interpretation. For this matching, we created a procedure analogous to |
|
397 |
+a multiple {\em sequence} alignment. |
|
398 |
+ |
|
399 |
+To highlight the dynamic programming-based alignment strategy, we |
|
400 |
+first illustrate a pairwise alignment of two peak lists. This example |
|
401 |
+also illustrates the selection of parameters necessary for the |
|
402 |
+alignment. From the data read in previously, let us consider the |
|
403 |
+alignment of two samples, denoted \texttt{0709\_468} and |
|
404 |
+\texttt{0709\_474}. First, a similarity matrix for two samples is |
|
405 |
+calculated. This is calculated based on a scoring function and takes |
|
406 |
+into account the similarity in retention time and in the similarity of |
|
407 |
+the apex spectra, according to: |
|
408 |
+\[ |
|
409 |
+S_{ij}(D) = \frac{\sum_{k=1}^K x_{ik} y_{jk}}{\sqrt{ \sum_{k=1}^K |
|
410 |
+ x_{ik}^2 \cdot \sum_{k=1}^K y_{jk}^2 } } \cdot \exp \left( - |
|
411 |
+ \frac{1}{2} \frac{(t_i-t_j)^2}{D^2} \right) |
|
412 |
+\] |
|
413 |
+\noindent where $i$ is the index of the peak in the first sample and |
|
414 |
+$j$ is the index of the peak in the second sample, $\mathbf{x}_i$ and |
|
415 |
+$\mathbf{y}_j$ are the spectra vectors and $t_i$ and $t_j$ are their |
|
416 |
+respective retention times. As you can see, there are two components |
|
417 |
+to the similarity: spectra similarity (left term) and similarity in |
|
418 |
+retention time (right term). Of course, other metrics for spectra |
|
419 |
+similarity are feasible. Ask the author if you want to see other |
|
420 |
+metrices implemented. We have some non-optimized code for a few |
|
421 |
+alternative metrics. |
|
422 |
+ |
|
423 |
+The peak alignment algorithm, much like sequence alignments, requires |
|
424 |
+a \texttt{gap} parameter to be set, here a number between 0 and 1. A |
|
425 |
+high gap penalty discourages gaps when matching the two lists of peaks |
|
426 |
+and a low gap penalty allows gaps at a very low {\em cost}. We find |
|
427 |
+that a gap penalty in the middle range (0.4-0.6) works well for GC-MS |
|
428 |
+peak matching. Another parameter, \texttt{D}, modulates the impact of |
|
429 |
+the difference in retention time penalty. A large value for |
|
430 |
+\texttt{D} essentially eliminates the effect. Generally, we set this |
|
431 |
+parameter to be a bit larger than the average width of a peak, |
|
432 |
+allowing a little flexibility for retention time shifts between |
|
433 |
+samples. Keep in mind the \texttt{D} parameter should be set on the |
|
434 |
+scale (i.e. seconds or minutes) of the \texttt{peaksrt} slot of the |
|
435 |
+\texttt{peaksDataset} object. The next example shows the effect of |
|
436 |
+the \texttt{gap} and \texttt{D} penalty on the matching of a small |
|
437 |
+ranges of peaks. |
|
438 |
+ |
|
439 |
+\begin{knitrout} |
|
440 |
+\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} |
|
441 |
+\begin{alltt} |
|
442 |
+\hlstd{Ds} \hlkwb{<-} \hlkwd{c}\hlstd{(}\hlnum{0.1}\hlstd{,} \hlnum{10}\hlstd{,} \hlnum{0.1}\hlstd{,} \hlnum{0.1}\hlstd{)} |
|
443 |
+\hlstd{gaps} \hlkwb{<-} \hlkwd{c}\hlstd{(}\hlnum{0.5}\hlstd{,} \hlnum{0.5}\hlstd{,} \hlnum{0.1}\hlstd{,} \hlnum{0.9}\hlstd{)} |
|
444 |
+\hlkwd{par}\hlstd{(}\hlkwc{mfrow}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{2}\hlstd{,}\hlnum{2}\hlstd{),} \hlkwc{mai}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{0.8466}\hlstd{,}\hlnum{0.4806}\hlstd{,}\hlnum{0.4806}\hlstd{,}\hlnum{0.1486}\hlstd{))} |
|
445 |
+\hlkwa{for}\hlstd{(i} \hlkwa{in} \hlnum{1}\hlopt{:}\hlnum{4}\hlstd{)\{} |
|
446 |
+ \hlstd{pa} \hlkwb{<-} \hlkwd{peaksAlignment}\hlstd{(pd}\hlopt{@}\hlkwc{peaksdata}\hlstd{[[}\hlnum{1}\hlstd{]], pd}\hlopt{@}\hlkwc{peaksdata}\hlstd{[[}\hlnum{2}\hlstd{]],} |
|
447 |
+ \hlstd{pd}\hlopt{@}\hlkwc{peaksrt}\hlstd{[[}\hlnum{1}\hlstd{]], pd}\hlopt{@}\hlkwc{peaksrt}\hlstd{[[}\hlnum{2}\hlstd{]],} \hlkwc{D}\hlstd{=Ds[i],} |
|
448 |
+ \hlkwc{gap}\hlstd{=gaps[i],} \hlkwc{metric}\hlstd{=}\hlnum{1}\hlstd{,} \hlkwc{type}\hlstd{=}\hlnum{1}\hlstd{,} \hlkwc{compress} \hlstd{=} \hlnum{FALSE}\hlstd{)} |
|
449 |
+ \hlkwd{plotAlignment}\hlstd{(pa,} \hlkwc{xlim}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{0}\hlstd{,} \hlnum{17}\hlstd{),} \hlkwc{ylim}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{0}\hlstd{,} \hlnum{16}\hlstd{),} \hlkwc{matchCol}\hlstd{=}\hlstr{"yellow"}\hlstd{,} |
|
450 |
+ \hlkwc{main}\hlstd{=}\hlkwd{paste}\hlstd{(}\hlstr{"D="}\hlstd{, Ds[i],} \hlstr{" gap="}\hlstd{, gaps[i],} \hlkwc{sep}\hlstd{=}\hlstr{""}\hlstd{))} |
|
451 |
+\hlstd{\}} |
|
452 |
+\end{alltt} |
|
453 |
+\begin{verbatim} |
|
454 |
+## [peaksAlignment] Comparing 24 peaks to 23 peaks -- gap= 0.5 D= 0.001 , metric= 1 , type= 1 |
|
455 |
+## [peaksAlignment] 21 matched. Similarity= 0.7983038 |
|
456 |
+\end{verbatim} |
|
457 |
+ |
|
458 |
+ |
|
459 |
+{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in plotAlignment(pa, xlim = c(0, 17), ylim = c(0, 16), matchCol = "{}yellow"{}, : could not find function "{}plotAlignment"{}}}\end{kframe} |
|
460 |
+\end{knitrout} |
|
461 |
+ |
|
462 |
+You might ask: is the flagme package useful without peak detection |
|
463 |
+results? Possibly. There have been some developments in alignment |
|
464 |
+(generally on LC-MS proteomics experiments) without peak/feature |
|
465 |
+detection, such as Prince et al. 2006, where a very similar dynamic |
|
466 |
+programming is used for a pairwise alignment. We have experimented |
|
467 |
+with alignments without using the peaks, but do not have any |
|
468 |
+convincing results. It does introduce a new set of challenges in terms |
|
469 |
+of highlighting differentially abundant metabolites. However, in the |
|
470 |
+\texttt{peaksAlignment} routine above (and those mentioned below), you |
|
471 |
+can set \texttt{usePeaks=FALSE} in order to do {\em scan}-based |
|
472 |
+alignments instead of {\em peak}-based alignments. In addition, the |
|
473 |
+\texttt{flagme} package may be useful simply for its bare-bones |
|
474 |
+dynamic programming algorithm. |
|
475 |
+ |
|
476 |
+ |
|
477 |
+\subsection{Normalizing retention time score to drift estimates} |
|
478 |
+In what is mentioned above for pairwise alignments, we are penalizing |
|
479 |
+for differences in retention times that are non-zero. But, as you can |
|
480 |
+see from the TICs, some differences in retention time are |
|
481 |
+consistent. For example, all of the peaks from sample |
|
482 |
+\texttt{0709\_485} elute at later times than peaks from sample |
|
483 |
+\texttt{0709\_496}. We should be able to estimate this drift and |
|
484 |
+normalize the time penalty to that estimate, instead of penalizing to |
|
485 |
+zero. That is, we should replace $t_i-t_j$ with $t_i-t_j-\hat{d}_{ij}$ |
|
486 |
+where $\hat{d}_{ij}$ is the expected drift between peak $i$ of the |
|
487 |
+first sample and peak $j$ of the second sample. |
|
488 |
+ |
|
489 |
+More details coming soon. |
|
490 |
+ |
|
491 |
+ |
|
492 |
+\subsection{Imputing location of undetected peaks} |
|
493 |
+One goal of the alignment leading into downstream data analyses is the |
|
494 |
+generation of a table of abundances for each metabolite across all |
|
495 |
+samples. As you can see from the TICs above, there are some low |
|
496 |
+intensity peaks that fall below detection in some but not all |
|
497 |
+samples. Our view is that instead of inserting arbitrary low constants |
|
498 |
+(such as 0 or half the detection limit) or imputing the intensities |
|
499 |
+post-hoc or having missing data in the data matrices, it is best to |
|
500 |
+return to the area of the where the peak should be and give some kind |
|
501 |
+of abundance. The alignments themselves are rich in information with |
|
502 |
+respect to the location of undetected peaks. We feel this is a more |
|
503 |
+conservative and statistically valid approach than introducing |
|
504 |
+arbitrary values. |
|
505 |
+ |
|
506 |
+More details coming soon. |
|
507 |
+ |
|
508 |
+ |
|
509 |
+\section{Multiple alignment of several experimental groups} |
|
510 |
+Next, we discuss the multiple alignment of peaks across many |
|
511 |
+samples. With replicates, we typically do an alignment within |
|
512 |
+replicates, then combine these together into a summarized form. This |
|
513 |
+cuts down on the computational cost. For example, consider 2 sets of |
|
514 |
+samples, each with 5 replicates. Aligning first within replicates |
|
515 |
+requires 10+10+1 total alignments whereas an all-pairwise alignment |
|
516 |
+requires 45 pairwise alignments. In addition, this allows some |
|
517 |
+flexibility in setting different gap and distance penalty parameters |
|
518 |
+for the {\em within} alignment and {\em between} alignment. An |
|
519 |
+example follows. |
|
520 |
+ |
|
521 |
+\begin{knitrout} |
|
522 |
+\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} |
|
523 |
+\begin{alltt} |
|
524 |
+\hlkwd{print}\hlstd{(targets)} |
|
525 |
+\end{alltt} |
|
526 |
+\begin{verbatim} |
|
527 |
+## FileName Group |
|
528 |
+## 1 0709_468.CDF mmA |
|
529 |
+## 2 0709_474.CDF mmA |
|
530 |
+## 3 0709_475.CDF mmA |
|
531 |
+## 4 0709_485.CDF mmC |
|
532 |
+## 5 0709_493.CDF mmC |
|
533 |
+## 6 0709_496.CDF mmC |
|
534 |
+## 7 0709_470.CDF mmD |
|
535 |
+## 8 0709_471.CDF mmD |
|
536 |
+## 9 0709_479.CDF mmD |
|
537 |
+\end{verbatim} |
|
538 |
+\begin{alltt} |
|
539 |
+\hlstd{ma} \hlkwb{<-} \hlkwd{multipleAlignment}\hlstd{(pd,} \hlkwc{group}\hlstd{=targets}\hlopt{$}\hlstd{Group,} \hlkwc{wn.gap}\hlstd{=}\hlnum{0.5}\hlstd{,} \hlkwc{wn.D}\hlstd{=}\hlnum{.05}\hlstd{,} |
|
540 |
+ \hlkwc{bw.gap}\hlstd{=}\hlnum{.6}\hlstd{,} \hlkwc{bw.D}\hlstd{=}\hlnum{0.05}\hlstd{,} \hlkwc{usePeaks}\hlstd{=}\hlnum{TRUE}\hlstd{,} \hlkwc{filterMin}\hlstd{=}\hlnum{1}\hlstd{,} |
|
541 |
+ \hlkwc{df}\hlstd{=}\hlnum{50}\hlstd{,} \hlkwc{verbose}\hlstd{=}\hlnum{FALSE}\hlstd{,} \hlkwc{metric} \hlstd{=} \hlnum{1}\hlstd{,} \hlkwc{type} \hlstd{=} \hlnum{1}\hlstd{)} \hlcom{# bug} |
|
542 |
+\end{alltt} |
|
543 |
+\begin{verbatim} |
|
544 |
+## [clusterAlignment] Aligning 0709_468 to 0709_474 |
|
545 |
+## [peaksAlignment] Comparing 24 peaks to 23 peaks -- gap= 0.5 D= 5e-04 , metric= 1 , type= 1 |
|
546 |
+## [peaksAlignment] 22 matched. Similarity= 0.8625793 |
|
547 |
+## [clusterAlignment] Aligning 0709_468 to 0709_475 |
|
548 |
+## [peaksAlignment] Comparing 24 peaks to 26 peaks -- gap= 0.5 D= 5e-04 , metric= 1 , type= 1 |
|
549 |
+## [peaksAlignment] 15 matched. Similarity= 0.8 |
|
550 |
+## [clusterAlignment] Aligning 0709_474 to 0709_475 |
|
551 |
+## [peaksAlignment] Comparing 23 peaks to 26 peaks -- gap= 0.5 D= 5e-04 , metric= 1 , type= 1 |
|
552 |
+## [peaksAlignment] 20 matched. Similarity= 0.899699 |
|
553 |
+## [progressiveAlignment] Doing merge -1 -3 |
|
554 |
+## [progressiveAlignment] left.runs: 1 , right.runs: 3 |
|
555 |
+## [progressiveAlignment] Doing merge -2 1 |
|
556 |
+## [progressiveAlignment] left.runs: 2 , right.runs: 1 3 |
|
557 |
+## [progressiveAlignment] (dot=50) going to 23 : |
|
558 |
+## used (Mb) gc trigger (Mb) max used (Mb) |
|
559 |
+## Ncells 7846781 419.1 12592810 672.6 12592810 672.6 |
|
560 |
+## Vcells 14553341 111.1 38545179 294.1 38545179 294.1 |
|
561 |
+## [clusterAlignment] Aligning 0709_485 to 0709_493 |
|
562 |
+## [peaksAlignment] Comparing 20 peaks to 27 peaks -- gap= 0.5 D= 5e-04 , metric= 1 , type= 1 |
|
563 |
+## [peaksAlignment] 20 matched. Similarity= 0.9354748 |
|
564 |
+## [clusterAlignment] Aligning 0709_485 to 0709_496 |
|
565 |
+## [peaksAlignment] Comparing 20 peaks to 24 peaks -- gap= 0.5 D= 5e-04 , metric= 1 , type= 1 |
|
566 |
+## [peaksAlignment] 20 matched. Similarity= 0.9359244 |
|
567 |
+## [clusterAlignment] Aligning 0709_493 to 0709_496 |
|
568 |
+## [peaksAlignment] Comparing 27 peaks to 24 peaks -- gap= 0.5 D= 5e-04 , metric= 1 , type= 1 |
|
569 |
+## [peaksAlignment] 22 matched. Similarity= 0.8515771 |
|
570 |
+## [progressiveAlignment] Doing merge -5 -6 |
|
571 |
+## [progressiveAlignment] left.runs: 5 , right.runs: 6 |
|
572 |
+## [progressiveAlignment] Doing merge -4 1 |
|
573 |
+## [progressiveAlignment] left.runs: 4 , right.runs: 5 6 |
|
574 |
+## [progressiveAlignment] (dot=50) going to 20 : |
|
575 |
+## used (Mb) gc trigger (Mb) max used (Mb) |
|
576 |
+## Ncells 7846949 419.1 12592810 672.6 12592810 672.6 |
|
577 |
+## Vcells 14554064 111.1 38545179 294.1 38545179 294.1 |
|
578 |
+## [clusterAlignment] Aligning 0709_470 to 0709_471 |
|
579 |
+## [peaksAlignment] Comparing 24 peaks to 25 peaks -- gap= 0.5 D= 5e-04 , metric= 1 , type= 1 |
|
580 |
+## [peaksAlignment] 22 matched. Similarity= 0.8879722 |
|
581 |
+## [clusterAlignment] Aligning 0709_470 to 0709_479 |
|
582 |
+## [peaksAlignment] Comparing 24 peaks to 21 peaks -- gap= 0.5 D= 5e-04 , metric= 1 , type= 1 |
|
583 |
+## [peaksAlignment] 21 matched. Similarity= 0.8564714 |
|
584 |
+## [clusterAlignment] Aligning 0709_471 to 0709_479 |
|
585 |
+## [peaksAlignment] Comparing 25 peaks to 21 peaks -- gap= 0.5 D= 5e-04 , metric= 1 , type= 1 |
|
586 |
+## [peaksAlignment] 19 matched. Similarity= 0.8258931 |
|
587 |
+## [progressiveAlignment] Doing merge -8 -9 |
|
588 |
+## [progressiveAlignment] left.runs: 8 , right.runs: 9 |
|
589 |
+## [progressiveAlignment] Doing merge -7 1 |
|
590 |
+## [progressiveAlignment] left.runs: 7 , right.runs: 8 9 |
|
591 |
+## [progressiveAlignment] (dot=50) going to 24 : |
|
592 |
+## used (Mb) gc trigger (Mb) max used (Mb) |
|
593 |
+## Ncells 7847143 419.1 12592810 672.6 12592810 672.6 |
|
594 |
+## Vcells 14555317 111.1 38545179 294.1 38545179 294.1 |
|
595 |
+## [clusterAlignment] Aligning to |
|
596 |
+## [peaksAlignment] Comparing 36 peaks to 29 peaks -- gap= 0.6 D= 5e-04 , metric= 1 , type= 1 |
|
597 |
+## [peaksAlignment] 25 matched. Similarity= 0.9094807 |
|
598 |
+## [clusterAlignment] Aligning to |
|
599 |
+## [peaksAlignment] Comparing 36 peaks to 29 peaks -- gap= 0.6 D= 5e-04 , metric= 1 , type= 1 |
|
600 |
+## [peaksAlignment] 29 matched. Similarity= 0.8798354 |
|
601 |
+## [clusterAlignment] Aligning to |
|
602 |
+## [peaksAlignment] Comparing 29 peaks to 29 peaks -- gap= 0.6 D= 5e-04 , metric= 1 , type= 1 |
|
603 |
+## [peaksAlignment] 29 matched. Similarity= 0.9655151 |
|
604 |
+## [progressiveAlignment] Doing merge -1 -3 |
|
605 |
+## [progressiveAlignment] left.runs: 1 , right.runs: 3 |
|
606 |
+## [progressiveAlignment] Doing merge -2 1 |
|
607 |
+## [progressiveAlignment] left.runs: 2 , right.runs: 1 3 |
|
608 |
+## [progressiveAlignment] (dot=50) going to 29 : |
|
609 |
+## used (Mb) gc trigger (Mb) max used (Mb) |
|
610 |
+## Ncells 7848679 419.2 12592810 672.6 12592810 672.6 |
|
611 |
+## Vcells 14607964 111.5 38545179 294.1 38545179 294.1 |
|
612 |
+\end{verbatim} |
|
613 |
+\begin{alltt} |
|
614 |
+\hlstd{ma} |
|
615 |
+\end{alltt} |
|
616 |
+\begin{verbatim} |
|
617 |
+## An object of class "multipleAlignment" |
|
618 |
+## 3 groups: 3 3 3 samples, respectively. |
|
619 |
+## 36 merged peaks |
|
620 |
+\end{verbatim} |
|
621 |
+\end{kframe} |
|
622 |
+\end{knitrout} |
|
623 |
+ |
|
624 |
+If you set \texttt{verbose=TRUE}, many nitty-gritty details of the |
|
625 |
+alignment procedure are given. Next, we can take the alignment |
|
626 |
+results and overlay it onto the TICs, allowing a visual inspection. |
|
627 |
+ |
|
628 |
+\begin{knitrout} |
|
629 |
+\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} |
|
630 |
+\begin{alltt} |
|
631 |
+\hlkwd{plotChrom}\hlstd{(pd,} \hlkwc{rtrange}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{7.5}\hlstd{,}\hlnum{8.5}\hlstd{),} \hlkwc{runs}\hlstd{=ma}\hlopt{@}\hlkwc{betweenAlignment}\hlopt{@}\hlkwc{runs}\hlstd{,} |
|
632 |
+ \hlkwc{mind}\hlstd{=ma}\hlopt{@}\hlkwc{betweenAlignment}\hlopt{@}\hlkwc{ind}\hlstd{,} \hlkwc{plotPeaks}\hlstd{=}\hlnum{TRUE}\hlstd{,} |
|
633 |
+ \hlkwc{plotPeakLabels}\hlstd{=}\hlnum{TRUE}\hlstd{,} \hlkwc{max.near}\hlstd{=}\hlnum{8}\hlstd{,} \hlkwc{how.near}\hlstd{=}\hlnum{.5}\hlstd{,} |
|
634 |
+ \hlkwc{col}\hlstd{=}\hlkwd{rep}\hlstd{(}\hlkwd{c}\hlstd{(}\hlstr{"blue"}\hlstd{,}\hlstr{"red"}\hlstd{,}\hlstr{"black"}\hlstd{),} \hlkwc{each}\hlstd{=}\hlnum{3}\hlstd{))} |
|
635 |
+\end{alltt} |
|
636 |
+ |
|
637 |
+ |
|
638 |
+{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in (function (classes, fdef, mtable) : unable to find an inherited method for function 'plotChrom' for signature '"{}peaksDataset"{}'}}\end{kframe} |
|
639 |
+\end{knitrout} |
|
640 |
+ |
|
641 |
+ |
|
642 |
+% \section{Correlation Alignment algorithm} |
|
643 |
+% Another approach, represented by the \texttt{correlationAlignment} |
|
644 |
+% function, is to use a modified form of the Pearson correlation |
|
645 |
+% algorithm. After the correlation between two samples is calculated, a |
|
646 |
+% penalization coefficient, based on the retention time differences, is |
|
647 |
+% applied to the result. It is also possible to set a retention time |
|
648 |
+% range in which the penalization is 0, this because in gas |
|
649 |
+% chromatography we can have a little deviation in the retention time of |
|
650 |
+% the metabolite so, based on the experimental data, we can choose the |
|
651 |
+% retention time window for the penalization coefficient being applied. |
|
652 |
+ |
|
653 |
+\begin{knitrout} |
|
654 |
+\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} |
|
655 |
+\begin{alltt} |
|
656 |
+\hlstd{mp} \hlkwb{<-} \hlkwd{correlationAlignment}\hlstd{(}\hlkwc{object}\hlstd{=pd.2,} \hlkwc{thr}\hlstd{=}\hlnum{0.85}\hlstd{,} \hlkwc{D}\hlstd{=}\hlnum{20}\hlstd{,} \hlkwc{penality}\hlstd{=}\hlnum{0.2}\hlstd{,} |
|
657 |
+ \hlkwc{normalize}\hlstd{=}\hlnum{TRUE}\hlstd{,} \hlkwc{minFilter}\hlstd{=}\hlnum{1}\hlstd{)} |
|
658 |
+\hlstd{mp} |
|
659 |
+\end{alltt} |
|
660 |
+\end{kframe} |
|
661 |
+\end{knitrout} |
|
662 |
+ |
|
663 |
+% \noindent where \texttt{thr} represent correlation threshold from 0 |
|
664 |
+% (min) to 1 (max); \texttt{D} represent the retention time window in |
|
665 |
+% seconds; \texttt{penality} represent the penality inflicted to a match |
|
666 |
+% between two peaks when the retention time difference exceed the |
|
667 |
+% parameter \texttt{D}; \texttt{normalize} is about the peak |
|
668 |
+% normalization-to-100 before the correlation is calculated; |
|
669 |
+% \texttt{minFilter} give the opportunity to exclude from the resulting |
|
670 |
+% correlation matrix each feature that in represented in our samples |
|
671 |
+% less time than this value. The value of minFilter must be smaller than |
|
672 |
+% the number of samples. |
|
673 |
+ |
|
674 |
+% The correlation-based peak alignment for multiple GC-MS |
|
675 |
+% peak lists uses a center-star technique to the alignment of the |
|
676 |
+% peaks. The combination of the \texttt{D} and \texttt{penality} parameters |
|
677 |
+% allow the users to force the algorithm to match the peaks close to the |
|
678 |
+% reference. The \texttt{thr} parameter control the matching factor. |
|
679 |
+ |
|
680 |
+ |
|
681 |
+\subsection{Gathering results} |
|
682 |
+The alignment results can be extracted from the \texttt{multipleAlignment} |
|
683 |
+object as: |
|
684 |
+\begin{knitrout} |
|
685 |
+\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} |
|
686 |
+\begin{alltt} |
|
687 |
+\hlstd{ma}\hlopt{@}\hlkwc{betweenAlignment}\hlopt{@}\hlkwc{runs} |
|
688 |
+\end{alltt} |
|
689 |
+\begin{verbatim} |
|
690 |
+## [1] 4 5 6 2 1 3 7 8 9 |
|
691 |
+\end{verbatim} |
|
692 |
+\begin{alltt} |
|
693 |
+\hlstd{ma}\hlopt{@}\hlkwc{betweenAlignment}\hlopt{@}\hlkwc{ind} |
|
694 |
+\end{alltt} |
|
695 |
+\begin{verbatim} |
|
696 |
+## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] |
|
697 |
+## [1,] 1 1 1 1 1 1 1 1 1 |
|
698 |
+## [2,] NA 2 NA NA 2 NA 2 2 NA |
|
699 |
+## [3,] NA 3 2 2 3 2 3 3 NA |
|
700 |
+## [4,] 2 4 3 3 4 3 4 4 2 |
|
701 |
+## [5,] 3 5 4 4 5 4 5 5 3 |
|
702 |
+## [6,] 4 6 5 5 6 5 6 6 4 |
|
703 |
+## [7,] 5 7 6 6 7 6 NA NA NA |
|
704 |
+## [8,] 6 8 7 7 8 7 7 7 5 |
|
705 |
+## [9,] 7 9 8 NA NA 8 8 8 6 |
|
706 |
+## [10,] NA NA NA NA NA 9 NA NA NA |
|
707 |
+## [11,] NA NA NA NA NA 10 NA NA NA |
|
708 |
+## [12,] NA 10 NA NA NA 11 NA NA NA |
|
709 |
+## [13,] 8 11 9 NA NA 12 NA NA NA |
|
710 |
+## [14,] NA 12 NA NA NA 13 NA NA NA |
|
711 |
+## [15,] NA 13 NA NA NA 14 NA NA 7 |
|
712 |
+## [16,] 9 14 NA NA NA 15 NA 9 8 |
|
713 |
+## [17,] 10 15 10 8 9 16 NA 10 9 |
|
714 |
+## [18,] 11 16 11 9 10 17 9 11 10 |
|
715 |
+## [19,] NA 17 12 NA NA 18 10 12 NA |
|
716 |
+## [20,] NA 18 13 NA NA 19 11 13 NA |
|
717 |
+## [21,] 12 NA 14 NA 11 20 12 14 NA |
|
718 |
+## [22,] NA NA NA 10 12 21 13 15 NA |
|
719 |
+## [23,] NA NA NA 11 13 22 14 16 11 |
|
720 |
+## [24,] 13 19 15 12 NA NA 15 17 12 |
|
721 |
+## [25,] NA NA NA 13 14 NA 16 NA NA |
|
722 |
+## [26,] NA NA NA 14 15 NA 17 18 13 |
|
723 |
+## [27,] NA NA NA 15 16 NA NA 19 14 |
|
724 |
+## [28,] NA 20 16 16 17 NA 18 20 15 |
|
725 |
+## [29,] NA 21 17 17 18 NA 19 21 16 |
|
726 |
+## [30,] 14 22 18 18 19 NA 20 22 17 |
|
727 |
+## [31,] 15 23 19 19 20 NA 21 NA NA |
|
728 |
+## [32,] 16 NA 20 20 21 NA 22 23 18 |
|
729 |
+## [33,] 17 24 21 21 22 23 NA NA 19 |
|
730 |
+## [34,] 18 25 22 22 23 24 23 24 20 |
|
731 |
+## [35,] 19 26 23 NA NA 25 NA NA NA |
|
732 |
+## [36,] 20 27 24 23 24 26 24 25 21 |
|
733 |
+\end{verbatim} |
|
734 |
+\end{kframe} |
|
735 |
+\end{knitrout} |
|
736 |
+ |
|
737 |
+\noindent This table would suggest that matched peak \texttt{8} (see |
|
738 |
+numbers below the TICs in the figure above) corresponds to detected |
|
739 |
+peaks \texttt{9, 12, 11} in runs \texttt{4, 5, 6} and so on, same as |
|
740 |
+shown in the above plot. |
|
741 |
+ |
|
742 |
+In addition, you can gather a list of all the merged peaks with the |
|
743 |
+\texttt{gatherInfo} function, giving elements for the retention times, |
|
744 |
+the detected fragment ions and their intensities. The example below |
|
745 |
+also shows the how to construct a table of retention times of the |
|
746 |
+matched peaks (No attempt is made here to adjust retention times onto |
|
747 |
+a common scale. Instead, the peaks are matched to each other on their |
|
748 |
+original scale). For example: |
|
749 |
+ |
|
750 |
+\begin{knitrout} |
|
751 |
+\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} |
|
752 |
+\begin{alltt} |
|
753 |
+\hlstd{outList} \hlkwb{<-} \hlkwd{gatherInfo}\hlstd{(pd,ma)} |
|
754 |
+\hlstd{outList[[}\hlnum{8}\hlstd{]]} |
|
755 |
+\end{alltt} |
|
756 |
+\begin{verbatim} |
|
757 |
+## $rt |
|
758 |
+## mmC.4 mmC.5 mmC.6 mmA.2 mmA.1 mmA.3 mmD.7 mmD.8 |
|
759 |
+## 7.728317 7.740700 7.711417 7.713550 7.708567 7.711600 7.702967 7.701717 |
|
760 |
+## mmD.9 |
|
761 |
+## 7.713933 |
|
762 |
+## |
|
763 |
+## $mz |
|
764 |
+## [1] 52 59 66 70 72 73 74 75 79 89 104 116 133 147 148 188 204 |
|
765 |
+## |
|
766 |
+## $data |
|
767 |
+## mmC.4 mmC.5 mmC.6 mmA.2 mmA.1 mmA.3 mmD.7 mmD.8 mmD.9 |
|
768 |
+## [1,] 0 0 0 0 0 0 26248 0 0 |
|
769 |
+## [2,] 0 0 0 5113 4425 4994 4855 4557 4728 |
|
770 |
+## [3,] 0 0 0 3926 5146 4876 4831 3354 4783 |
|
771 |
+## [4,] 0 0 0 11568 0 0 10637 0 0 |
|
772 |
+## [5,] 0 0 0 3680 3910 4492 4051 3427 3907 |
|
773 |
+## [6,] 0 0 0 61816 65680 66768 65912 52848 61560 |
|
774 |
+## [7,] 0 0 0 6705 6185 7400 6642 6235 7088 |
|
775 |
+## [8,] 0 0 24160 26376 23328 0 28016 26304 27184 |
|
776 |
+## [9,] 0 0 0 0 0 0 38712 0 0 |
|
777 |
+## [10,] 0 0 0 5617 5347 5702 5173 3946 5659 |
|
778 |
+## [11,] 0 0 0 13173 13808 13207 12852 9816 12492 |
|
779 |
+## [12,] 0 0 0 5417 5525 5912 5577 4504 5201 |
|
780 |
+## [13,] 0 0 0 3539 3730 2910 3599 2436 3893 |
|
781 |
+## [14,] 0 0 17904 21864 20016 22280 21904 17896 22400 |
|
782 |
+## [15,] 0 0 0 4413 3430 3890 3006 3335 3851 |
|
783 |
+## [16,] 0 0 7636 14433 14751 13765 14731 10680 14061 |
|
784 |
+## [17,] 0 0 0 6878 6667 7018 6935 5149 6830 |
|
785 |
+\end{verbatim} |
|
786 |
+\begin{alltt} |
|
787 |
+\hlstd{rtmat} \hlkwb{<-} \hlkwd{matrix}\hlstd{(}\hlkwd{unlist}\hlstd{(}\hlkwd{lapply}\hlstd{(outList,.subset,}\hlstr{"rt"}\hlstd{),} \hlkwc{use.names}\hlstd{=}\hlnum{FALSE}\hlstd{),} |
|
788 |
+ \hlkwc{nr}\hlstd{=}\hlkwd{length}\hlstd{(outList),} \hlkwc{byrow}\hlstd{=}\hlnum{TRUE}\hlstd{)} |
|
789 |
+\hlkwd{colnames}\hlstd{(rtmat)} \hlkwb{<-} \hlkwd{names}\hlstd{(outList[[}\hlnum{1}\hlstd{]]}\hlopt{$}\hlstd{rt);} \hlkwd{rownames}\hlstd{(rtmat)} \hlkwb{<-} \hlnum{1}\hlopt{:}\hlkwd{nrow}\hlstd{(rtmat)} |
|
790 |
+\hlkwd{round}\hlstd{(rtmat,} \hlnum{3}\hlstd{)} |
|
791 |
+\end{alltt} |
|
792 |
+\begin{verbatim} |
|
793 |
+## mmC.4 mmC.5 mmC.6 mmA.2 mmA.1 mmA.3 mmD.7 mmD.8 mmD.9 |
|
794 |
+## 1 7.534 7.512 7.506 7.531 7.526 7.540 7.520 7.519 7.531 |
|
795 |
+## 2 NA 7.535 NA NA 7.549 NA 7.543 7.547 NA |
|
796 |
+## 3 NA 7.558 7.551 7.559 7.566 7.557 7.560 7.565 NA |
|
797 |
+## 4 7.580 7.575 7.569 7.576 7.583 7.574 7.577 7.582 7.577 |
|
798 |
+## 5 7.597 7.586 7.586 7.588 7.600 7.592 7.594 7.599 7.594 |
|
799 |
+## 6 7.614 7.615 7.614 7.616 7.617 7.614 7.617 7.610 7.617 |
|
800 |
+## 7 7.717 7.695 7.694 7.691 7.663 7.649 NA NA NA |
|
801 |
+## 8 7.728 7.741 7.711 7.714 7.709 7.712 7.703 7.702 7.714 |
|
802 |
+## 9 7.803 7.804 7.803 NA NA 7.803 7.806 7.805 7.805 |
|
803 |
+## 10 NA NA NA NA NA 7.826 NA NA NA |
|
804 |
+## 11 NA NA NA NA NA 7.975 NA NA NA |
|
805 |
+## 12 NA 7.809 NA NA NA 7.997 NA NA NA |
|
806 |
+## 13 7.825 7.849 7.951 NA NA 8.009 NA NA NA |
|
807 |
+## 14 NA 7.946 NA NA NA 8.077 NA NA NA |
|
808 |
+## 15 NA 7.958 NA NA NA 8.095 NA NA 7.817 |
|
809 |
+## 16 7.946 7.969 NA NA NA 8.112 NA 7.816 7.823 |
|
810 |
+## 17 7.974 7.986 7.980 7.736 7.783 8.249 NA 7.907 7.874 |
|
811 |
+## 18 8.008 8.009 8.003 7.799 7.800 8.283 7.812 7.936 7.943 |
|
812 |
+## 19 NA 8.049 8.043 NA NA 8.312 7.966 7.965 NA |
|
813 |
+## 20 NA 8.061 8.060 NA NA 8.335 7.989 7.993 NA |
|
814 |
+## 21 8.077 NA 8.100 NA 7.823 8.357 8.012 8.010 NA |
|
815 |
+## 22 NA NA NA 7.828 7.880 8.375 8.069 8.068 NA |
|
816 |
+## 23 NA NA NA 7.942 7.943 8.403 8.086 8.085 7.977 |
|
817 |
+## 24 8.111 8.107 8.111 7.976 NA NA 8.109 8.108 8.000 |
|
818 |
+## 25 NA NA NA 7.999 7.966 NA 8.172 NA NA |
|
819 |
+## 26 NA NA NA 8.079 7.994 NA 8.246 8.245 8.080 |
|
820 |
+## 27 NA NA NA 8.114 8.011 NA NA 8.262 8.091 |
|
821 |
+## 28 NA 8.204 8.237 8.182 8.109 NA 8.280 8.330 8.114 |
|
822 |
+## 29 NA 8.244 8.254 8.251 8.246 NA 8.326 8.342 8.251 |
|
823 |
+## 30 8.254 8.301 8.294 8.285 8.263 NA 8.360 8.359 8.263 |
|
824 |
+## 31 8.266 8.324 8.323 8.337 8.332 NA 8.377 NA NA |
|
825 |
+## 32 8.334 NA 8.329 8.359 8.360 NA 8.395 8.393 8.337 |
|
826 |
+## 33 8.363 8.352 8.352 8.399 8.394 8.420 NA NA 8.400 |
|
827 |
+## 34 8.403 8.392 8.386 8.434 8.434 8.437 8.435 8.433 8.440 |
|
828 |
+## 35 8.437 8.432 8.432 NA NA 8.443 NA NA NA |
|
829 |
+## 36 8.477 8.461 8.460 8.474 8.469 8.472 8.469 8.468 8.474 |
|
830 |
+\end{verbatim} |
|
831 |
+\end{kframe} |
|
832 |
+\end{knitrout} |
|
833 |
+ |
|
834 |
+ |
|
835 |
+\section{Future improvements and extension} |
|
836 |
+There are many procedures that we have implemented in our |
|
837 |
+investigation of GC-MS data, but have not made part of the package just |
|
838 |
+yet. Some of the most useful procedures will be released, such as: |
|
839 |
+ |
|
840 |
+\begin{enumerate} |
|
841 |
+\item Parsers for other peak detection algorithms (e.g. % XCMS, |
|
842 |
+ MzMine) and parsers for other alignment procedures |
|
843 |
+ (e.g. SpectConnect) and perhaps retention indices procedures. |
|
844 |
+\item More convenient access to the alignment information and |
|
845 |
+ abundance table. |
|
846 |
+\item Statistical analysis of differential metabolite abundance. |
|
847 |
+\item Fragment-level analysis, an alternative method to summarize |
|
848 |
+ abundance across all detected fragments of a metabolite peak. |
|
849 |
+\end{enumerate} |
|
850 |
+ |
|
851 |
+\section{References} |
|
852 |
+See the following for further details: |
|
853 |
+ |
|
854 |
+\begin{enumerate} |
|
855 |
+\item Robinson MD. {\em Methods for the analysis of gas chromatography |
|
856 |
+ - mass spectrometry data.} {\bf Ph.D. Thesis}. October 2008. |
|
857 |
+ Department of Medical Biology (Walter and Eliza Hall Institute of |
|
858 |
+ Medical Research), University of Melbourne. |
|
859 |
+\item Robinson MD, De Souza DP, Keen WW, Saunders EC, McConville MJ, |
|
860 |
+ Speed TP, Liki\'{c} VA. (2007) {\em A dynamic programming approach |
|
861 |
+ for the alignment of signal peaks in multiple gas |
|
862 |
+ chromatography-mass spectrometry experiments.} {\bf BMC |
|
863 |
+ Bioinformatics}. 8:419. |
|
864 |
+\item Prince JT, Marcotte EM (2006) {\em Chromatographic alignment of |
|
865 |
+ ESI-LC-MS proteomics data sets by ordered bijective interpolated |
|
866 |
+ warping}. {\bf Anal Chem}. 78(17):6140-52. |
|
867 |
+\end{enumerate} |
|
868 |
+ |
|
869 |
+\section{This vignette was built with/at ...} |
|
870 |
+ |
|
871 |
+\begin{knitrout} |
|
872 |
+\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} |
|
873 |
+\begin{alltt} |
|
874 |
+\hlkwd{sessionInfo}\hlstd{()} |
|
875 |
+\end{alltt} |
|
876 |
+\begin{verbatim} |
|
877 |
+## R version 4.1.1 (2021-08-10) |
|
878 |
+## Platform: x86_64-pc-linux-gnu (64-bit) |
|
879 |
+## Running under: Debian GNU/Linux 10 (buster) |
|
880 |
+## |
|
881 |
+## Matrix products: default |
|
882 |
+## BLAS: /usr/lib/x86_64-linux-gnu/openblas/libblas.so.3 |
|
883 |
+## LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.3.5.so |
|
884 |
+## |
|
885 |
+## locale: |
|
886 |
+## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C |
|
887 |
+## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 |
|
888 |
+## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 |
|
889 |
+## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C |
|
890 |
+## [9] LC_ADDRESS=C LC_TELEPHONE=C |
|
891 |
+## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C |
|
892 |
+## |
|
893 |
+## attached base packages: |
|
894 |
+## [1] stats4 parallel stats graphics grDevices utils datasets |
|
895 |
+## [8] methods base |
|
896 |
+## |
|
897 |
+## other attached packages: |
|
898 |
+## [1] flagme_1.48.0 CAMERA_1.48.0 xcms_3.14.1 |
|
899 |
+## [4] MSnbase_2.18.0 ProtGenerics_1.24.0 S4Vectors_0.30.2 |
|
900 |
+## [7] mzR_2.26.1 Rcpp_1.0.7 Biobase_2.52.0 |
|
901 |
+## [10] BiocGenerics_0.38.0 BiocParallel_1.26.2 gcspikelite_1.30.0 |
|
902 |
+## |
|
903 |
+## loaded via a namespace (and not attached): |
|
904 |
+## [1] colorspace_2.0-2 ellipsis_0.3.2 |
|
905 |
+## [3] htmlTable_2.3.0 XVector_0.32.0 |
|
906 |
+## [5] GenomicRanges_1.44.0 base64enc_0.1-3 |
|
907 |
+## [7] clue_0.3-60 rstudioapi_0.13 |
|
908 |
+## [9] affyio_1.62.0 fansi_0.5.0 |
|
909 |
+## [11] codetools_0.2-18 splines_4.1.1 |
|
910 |
+## [13] ncdf4_1.17 doParallel_1.0.16 |
|
911 |
+## [15] impute_1.66.0 robustbase_0.93-9 |
|
912 |
+## [17] knitr_1.36 Formula_1.2-4 |
|
913 |
+## [19] cluster_2.1.2 vsn_3.60.0 |
|
914 |
+## [21] png_0.1-7 graph_1.70.0 |
|
915 |
+## [23] BiocManager_1.30.16 compiler_4.1.1 |
|
916 |
+## [25] backports_1.2.1 assertthat_0.2.1 |
|
917 |
+## [27] Matrix_1.3-4 fastmap_1.1.0 |
|
918 |
+## [29] limma_3.48.3 htmltools_0.5.2 |
|
919 |
+## [31] tools_4.1.1 igraph_1.2.7 |
|
920 |
+## [33] gtable_0.3.0 glue_1.4.2 |
|
921 |
+## [35] GenomeInfoDbData_1.2.6 affy_1.70.0 |
|
922 |
+## [37] RANN_2.6.1 dplyr_1.0.7 |
|
923 |
+## [39] MALDIquant_1.20 vctrs_0.3.8 |
|
924 |
+## [41] preprocessCore_1.54.0 iterators_1.0.13 |
|
925 |
+## [43] xfun_0.26 stringr_1.4.0 |
|
926 |
+## [45] lifecycle_1.0.1 gtools_3.9.2 |
|
927 |
+## [47] XML_3.99-0.8 DEoptimR_1.0-9 |
|
928 |
+## [49] zlibbioc_1.38.0 MASS_7.3-54 |
|
929 |
+## [51] scales_1.1.1 pcaMethods_1.84.0 |
|
930 |
+## [53] MatrixGenerics_1.4.3 SummarizedExperiment_1.22.0 |
|
931 |
+## [55] RBGL_1.68.0 MassSpecWavelet_1.58.0 |
|
932 |
+## [57] SparseM_1.81 RColorBrewer_1.1-2 |
|
933 |
+## [59] gridExtra_2.3 ggplot2_3.3.5 |
|
934 |
+## [61] rpart_4.1-15 latticeExtra_0.6-29 |
|
935 |
+## [63] stringi_1.7.5 highr_0.9 |
|
936 |
+## [65] foreach_1.5.1 checkmate_2.0.0 |
|
937 |
+## [67] caTools_1.18.2 GenomeInfoDb_1.28.4 |
|
938 |
+## [69] rlang_0.4.11 pkgconfig_2.0.3 |
|
939 |
+## [71] matrixStats_0.61.0 bitops_1.0-7 |
|
940 |
+## [73] mzID_1.30.0 evaluate_0.14 |
|
941 |
+## [75] lattice_0.20-45 purrr_0.3.4 |
|
942 |
+## [77] htmlwidgets_1.5.4 tidyselect_1.1.1 |
|
943 |
+## [79] plyr_1.8.6 magrittr_2.0.1 |
|
944 |
+## [81] R6_2.5.1 IRanges_2.26.0 |
|
945 |
+## [83] gplots_3.1.1 generics_0.1.0 |
|
946 |
+## [85] Hmisc_4.6-0 DelayedArray_0.18.0 |
|
947 |
+## [87] DBI_1.1.1 pillar_1.6.3 |
|
948 |
+## [89] foreign_0.8-81 MsCoreUtils_1.4.0 |
|
949 |
+## [91] survival_3.2-13 RCurl_1.98-1.5 |
|
950 |
+## [93] nnet_7.3-16 tibble_3.1.5 |
|
951 |
+## [95] crayon_1.4.1 KernSmooth_2.23-20 |
|
952 |
+## [97] utf8_1.2.2 jpeg_0.1-9 |
|
953 |
+## [99] grid_4.1.1 data.table_1.14.2 |
|
954 |
+## [101] digest_0.6.28 munsell_0.5.0 |
|
955 |
+\end{verbatim} |
|
956 |
+\begin{alltt} |
|
957 |
+\hlkwd{date}\hlstd{()} |
|
958 |
+\end{alltt} |
|
959 |
+\begin{verbatim} |
|
960 |
+## [1] "Mon Oct 18 15:30:58 2021" |
|
961 |
+\end{verbatim} |
|
962 |
+\end{kframe} |
|
963 |
+\end{knitrout} |
|
964 |
+ |
|
965 |
+\end{document} |