1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,121 @@ |
1 |
+ |
|
2 |
+#' @include hidden_aliases.R |
|
3 |
+NULL |
|
4 |
+ |
|
5 |
+#' @title MS data backend for mgf files |
|
6 |
+#' |
|
7 |
+#' @aliases MsBackendMassbank-class |
|
8 |
+#' |
|
9 |
+#' @description |
|
10 |
+#' |
|
11 |
+#' The `MsBackendMassbank` class supports import of MS/MS spectra data from |
|
12 |
+#' files in Mascot Generic Format |
|
13 |
+#' ([mgf](http://www.matrixscience.com/help/data_file_help.html)) |
|
14 |
+#' files. After initial import, the full MS data is kept in |
|
15 |
+#' memory. `MsBackendMassbank` extends the [MsBackendDataFrame()] backend |
|
16 |
+#' directly and supports thus the [applyProcessing()] function to make |
|
17 |
+#' data manipulations persistent. The backend does however not |
|
18 |
+#' support export to mgf files yet. |
|
19 |
+#' |
|
20 |
+#' New objects are created with the `MsBackendMassbank` function. The |
|
21 |
+#' `backendInitialize` method has to be subsequently called to |
|
22 |
+#' initialize the object and import MS/MS data from (one or more) mgf |
|
23 |
+#' files. Optional parameter `nonStop` allows to specify whether the |
|
24 |
+#' import returns with an error if one of the xml files lacks required |
|
25 |
+#' data, such as `mz` and `intensity` values (default `nonStop = |
|
26 |
+#' FALSE`), or whether only affected file(s) is(are) skipped and a |
|
27 |
+#' warning is shown (`nonStop = TRUE`). Note that any other error |
|
28 |
+#' (such as xml import error) will abort import regardless of |
|
29 |
+#' parameter `nonStop`. |
|
30 |
+#' |
|
31 |
+#' @param object Instance of `MsBackendMassbank` class. |
|
32 |
+#' |
|
33 |
+#' @param files `character` with the (full) file name(s) of the mgf file(s) |
|
34 |
+#' from which MS/MS data should be imported. |
|
35 |
+#' |
|
36 |
+#' @param nonStop `logical(1)` whether import should be stopped if an |
|
37 |
+#' xml file does not contain all required fields. Defaults to |
|
38 |
+#' `nonStop = FALSE`. |
|
39 |
+#' |
|
40 |
+#' @param BPPARAM Parameter object defining the parallel processing |
|
41 |
+#' setup to import data in parallel. Defaults to `BPPARAM = |
|
42 |
+#' bpparam()`. See [bpparam()] for more information. |
|
43 |
+#' |
|
44 |
+#' @param ... Currently ignored. |
|
45 |
+#' |
|
46 |
+#' @author Michael Witting |
|
47 |
+#' |
|
48 |
+#' @importClassesFrom Spectra MsBackendDataFrame |
|
49 |
+#' |
|
50 |
+#' @exportClass MsBackendMassbank |
|
51 |
+#' |
|
52 |
+#' @name MsBackendMassbank |
|
53 |
+#' |
|
54 |
+#' @examples |
|
55 |
+#' |
|
56 |
+#' ## Create an MsBackendHmdbXml backend and import data from test xml files. |
|
57 |
+#' fls <- dir(system.file("extdata", package = "MsBackendMassbank"), |
|
58 |
+#' full.names = TRUE, pattern = "mgf$") |
|
59 |
+#' be <- backendInitialize(MsBackendMassbank(), fls) |
|
60 |
+#' be |
|
61 |
+#' |
|
62 |
+#' be$msLevel |
|
63 |
+#' be$intensity |
|
64 |
+#' be$mz |
|
65 |
+NULL |
|
66 |
+ |
|
67 |
+setClass("MsBackendMassbank", |
|
68 |
+ contains = "MsBackendDataFrame", |
|
69 |
+ prototype = prototype(spectraData = DataFrame(), |
|
70 |
+ readonly = FALSE, |
|
71 |
+ version = "0.1")) |
|
72 |
+ |
|
73 |
+#' @importMethodsFrom Spectra backendInitialize asDataFrame<- $<- $ |
|
74 |
+#' |
|
75 |
+#' @importFrom BiocParallel bpparam |
|
76 |
+#' |
|
77 |
+#' @importMethodsFrom BiocParallel bplapply |
|
78 |
+#' |
|
79 |
+#' @importFrom methods validObject |
|
80 |
+#' |
|
81 |
+#' @exportMethod backendInitialize |
|
82 |
+#' |
|
83 |
+#' @rdname MsBackendMassbank |
|
84 |
+setMethod("backendInitialize", signature = "MsBackendMassbank", |
|
85 |
+ function(object, files, nonStop = FALSE, ..., BPPARAM = bpparam()) { |
|
86 |
+ if (missing(files) || !length(files)) |
|
87 |
+ stop("Parameter 'files' is mandatory for ", class(object)) |
|
88 |
+ if (!is.character(files)) |
|
89 |
+ stop("Parameter 'files' is expected to be a character vector", |
|
90 |
+ " with the files names from where data should be", |
|
91 |
+ " imported") |
|
92 |
+ files <- normalizePath(files) |
|
93 |
+ if (any(!file.exists(files))) |
|
94 |
+ stop("file(s) ", |
|
95 |
+ paste(files[!file.exists(files)], collapse = ", "), |
|
96 |
+ " not found") |
|
97 |
+ ## Import data and rbind. |
|
98 |
+ message("Start data import from ", length(files), " files ... ", |
|
99 |
+ appendLF = FALSE) |
|
100 |
+ res <- bplapply(files, FUN = .read_massbank, |
|
101 |
+ nonStop = nonStop, BPPARAM = BPPARAM) |
|
102 |
+ message("done") |
|
103 |
+ res <- do.call(rbind, res) |
|
104 |
+ if (nonStop && length(files) > nrow(res)) |
|
105 |
+ warning("Import failed for ", length(files) - nrow(res), |
|
106 |
+ " files") |
|
107 |
+ asDataFrame(object) <- res |
|
108 |
+ object$dataStorage <- "<memory>" |
|
109 |
+ object$centroided <- TRUE |
|
110 |
+ validObject(object) |
|
111 |
+ object |
|
112 |
+ }) |
|
113 |
+ |
|
114 |
+#' @rdname MsBackendMassbank |
|
115 |
+#' |
|
116 |
+#' @importFrom methods new |
|
117 |
+#' |
|
118 |
+#' @export MsBackendMassbank |
|
119 |
+MsBackendMassbank <- function() { |
|
120 |
+ new("MsBackendMassbank") |
|
121 |
+} |
... | ... |
@@ -1,294 +1,114 @@ |
1 |
-# all fields are found here |
|
2 |
-# https://github.com/MassBank/MassBank-web/blob/dev/Documentation/MassBankRecordFormat.md |
|
3 |
- |
|
4 |
-#' |
|
5 |
-#' |
|
6 |
-#' |
|
7 |
-.import_massbank_ms_ms_spectrum <- function(x) { |
|
8 |
- |
|
9 |
- if (!is.character(x) || length(x) != 1) |
|
10 |
- stop("'x' has to be of type character with length 1") |
|
11 |
- |
|
12 |
- con <- file(x) |
|
13 |
- mb_record <- readLines(con) |
|
14 |
- close(con) |
|
15 |
- |
|
16 |
- # isolate spectrum and basic metadata (default) |
|
17 |
- recordinfo <- .isolate_recordinfo(mb_record) |
|
18 |
- spectrum <- .isolate_spectrum(mb_record) |
|
19 |
- |
|
20 |
- # idea: isolate only additional information if requested |
|
21 |
- if(isolate_ch) { |
|
22 |
- ch <- .isolate_chemical(mb_record) |
|
23 |
- } else { |
|
24 |
- ch <- NULL |
|
1 |
+##' @param f `character(1)` with the path to an MassBank file. |
|
2 |
+##' |
|
3 |
+##' @param msLevel `numeric(1)` with the MS level. Default is 2. |
|
4 |
+##' |
|
5 |
+##' @param ... Additional parameters, currently ignored. |
|
6 |
+##' |
|
7 |
+##' @importFrom S4Vectors DataFrame |
|
8 |
+##' |
|
9 |
+##' @importFrom IRanges NumericList |
|
10 |
+##' |
|
11 |
+##' @author Michael Witting |
|
12 |
+##' |
|
13 |
+##' @noRd |
|
14 |
+.read_massbank <- function(f, msLevel = 2L, ...) { |
|
15 |
+ |
|
16 |
+ if (length(f) != 1L) |
|
17 |
+ stop("Please provide a single mgf file.") |
|
18 |
+ |
|
19 |
+ mb <- scan(file = f, what = "", |
|
20 |
+ sep = "\n", quote = "", |
|
21 |
+ allowEscapes = FALSE, |
|
22 |
+ quiet = TRUE) |
|
23 |
+ |
|
24 |
+ begin <- grep("ACCESSION:", mb) + 1L |
|
25 |
+ end <- grep("//", mb) |
|
26 |
+ n <- length(begin) |
|
27 |
+ sp <- vector("list", length = n) |
|
28 |
+ |
|
29 |
+ for (i in seq(along = sp)) |
|
30 |
+ sp[[i]] <- .extract_mb_spectrum(mb[begin[i]:end[i]]) |
|
31 |
+ |
|
32 |
+ res <- DataFrame(do.call(rbind, sp)) |
|
33 |
+ |
|
34 |
+ for (i in seq_along(res)) { |
|
35 |
+ if (all(lengths(res[[i]]) == 1)) |
|
36 |
+ res[[i]] <- unlist(res[[i]]) |
|
25 | 37 |
} |
26 | 38 |
|
27 |
- # create Spectra object with metadata |
|
39 |
+ res$mz <- IRanges::NumericList(res$mz) |
|
40 |
+ res$intensity <- IRanges::NumericList(res$intensity) |
|
41 |
+ res$dataOrigin <- f |
|
42 |
+ res$msLevel <- as.integer(msLevel) |
|
43 |
+ res |
|
28 | 44 |
|
29 | 45 |
} |
30 | 46 |
|
31 |
-#' |
|
32 |
-#' |
|
33 |
-#' |
|
34 |
-.isolate_spectrum <- function(mb_record) { |
|
35 |
- |
|
36 |
- #read the spectrum |
|
37 |
- spectrum_start <- grep("PK$PEAK:", mb_record, fixed = TRUE) + 1 |
|
38 |
- spectrum_end <- tail(grep("//", mb_record, fixed = TRUE), 1) - 1 |
|
39 |
- |
|
40 |
- if(spectrum_start < spectrum_end){ |
|
41 |
- splitted <- strsplit(mb_record[spectrum_start:(spectrum_end)]," ") |
|
42 |
- spectrum <- matrix(nrow = spectrum_end + 1 - spectrum_start, ncol = 3) |
|
43 | 47 |
|
44 |
- for(k in 1:length(splitted)){ |
|
45 |
- splitted[[k]] <- splitted[[k]][which(splitted[[k]] != "")] |
|
46 |
- spectrum[k,] <- splitted[[k]] |
|
47 |
- } |
|
48 | 48 |
|
49 |
- # convert to data frame and adjust data type |
|
50 |
- spectrum <- as.data.frame(spectrum, stringsAsFactors = FALSE) |
|
51 |
- spectrum[] <- lapply(spectrum, type.convert) |
|
52 |
- colnames(spectrum) <- c("m/z", "int", "rel.int.") |
|
53 | 49 |
|
54 |
- # return spectrum |
|
55 |
- return(spectrum) |
|
50 |
+##' @param mb `character()` of lines defining a spectrum in mgf |
|
51 |
+##' format. |
|
52 |
+##' |
|
53 |
+##' @author Michael Witting |
|
54 |
+##' |
|
55 |
+##' @noRd |
|
56 |
+.extract_mb_spectrum <- function(mb) { |
|
56 | 57 |
|
57 |
- } else { |
|
58 |
+ #read the spectrum |
|
59 |
+ spectrum_start <- grep("PK$PEAK:", mb, fixed = TRUE) + 1 |
|
60 |
+ spectrum_end <- tail(grep("//", mb, fixed = TRUE), 1) - 1 |
|
58 | 61 |
|
59 |
- stop("Could not find PK$PEAK or //. Please check the supplied MB record file") |
|
62 |
+ splitted <- strsplit(mb[spectrum_start:(spectrum_end)]," ") |
|
63 |
+ spectrum <- matrix(nrow = spectrum_end + 1 - spectrum_start, ncol = 3) |
|
60 | 64 |
|
65 |
+ for(k in 1:length(splitted)){ |
|
66 |
+ splitted[[k]] <- splitted[[k]][which(splitted[[k]] != "")] |
|
67 |
+ spectrum[k,] <- splitted[[k]] |
|
61 | 68 |
} |
62 |
-} |
|
63 |
- |
|
64 |
-#' |
|
65 |
-#' |
|
66 |
-#' |
|
67 |
-.isolate_chemical <- function(mb_record) { |
|
68 |
- |
|
69 |
- # create empty list |
|
70 |
- ch <- list() |
|
71 |
- |
|
72 |
- # isolate chemical information |
|
73 |
- ch$name <- as.list(substring(grep("CH$NAME:", mb_record, value = TRUE, fixed = TRUE), 10)) |
|
74 |
- ch$compound_class <- substring(grep("CH$COMPOUND_CLASS:", mb_record, value = TRUE, fixed = TRUE), 20) |
|
75 |
- ch$formula <- substring(grep("CH$FORMULA:", mb_record, value = TRUE, fixed = TRUE), 13) |
|
76 |
- ch$exact_mass <- as.numeric(substring(grep("CH$EXACT_MASS:", mb_record, value = TRUE, fixed = TRUE), 16)) |
|
77 |
- ch$smiles <- substring(grep("CH$SMILES:", mb_record, value = TRUE, fixed = TRUE), 12) |
|
78 |
- ch$iupac <- substring(grep("CH$IUPAC:", mb_record, value = TRUE, fixed = TRUE), 11) |
|
79 |
- ch$link_cas <- substring(grep("CH$LINK: CAS", mb_record, value = TRUE, fixed = TRUE), 14) |
|
80 |
- ch$link_cayman <- substring(grep("CH$LINK: CAYMAN", mb_record, value = TRUE, fixed = TRUE), 17) |
|
81 |
- ch$link_chebi <- substring(grep("CH$LINK: CHEBI", mb_record, value = TRUE, fixed = TRUE), 16) |
|
82 |
- ch$link_chembl <- substring(grep("CH$LINK: CHEMBL", mb_record, value = TRUE, fixed = TRUE), 17) |
|
83 |
- ch$link_chempdb <- substring(grep("CH$LINK: CHEMPDB", mb_record, value = TRUE, fixed = TRUE), 18) |
|
84 |
- ch$link_chemspider <- substring(grep("CH$LINK: CHEMSPIDER", mb_record, value = TRUE, fixed = TRUE), 21) |
|
85 |
- ch$link_comptox <- substring(grep("CH$LINK: COMPTOX", mb_record, value = TRUE, fixed = TRUE), 18) |
|
86 |
- ch$link_hmdb <- substring(grep("CH$LINK: HMDB", mb_record, value = TRUE, fixed = TRUE), 15) |
|
87 |
- ch$link_inchikey <- substring(grep("CH$LINK: INCHIKEY", mb_record, value = TRUE, fixed = TRUE), 19) |
|
88 |
- ch$link_kappaview <- substring(grep("CH$LINK: KAPPAVIEW", mb_record, value = TRUE, fixed = TRUE), 20) |
|
89 |
- ch$link_kegg <- substring(grep("CH$LINK: KEGG", mb_record, value = TRUE, fixed = TRUE), 15) |
|
90 |
- ch$link_knapsack <- substring(grep("CH$LINK: KNAPSACK", mb_record, value = TRUE, fixed = TRUE), 19) |
|
91 |
- ch$link_lipidbank <- substring(grep("CH$LINK: LIPIDBANK", mb_record, value = TRUE, fixed = TRUE), 20) |
|
92 |
- ch$link_lipidmaps <- substring(grep("CH$LINK: LIPIDMAPS", mb_record, value = TRUE, fixed = TRUE), 20) |
|
93 |
- ch$link_nikkaji <- substring(grep("CH$LINK: NIKKAJI", mb_record, value = TRUE, fixed = TRUE), 18) |
|
94 |
- ch$link_pubchem <- substring(grep("CH$LINK: PUBCHEM", mb_record, value = TRUE, fixed = TRUE), 18) |
|
95 |
- ch$link_zinc <- substring(grep("CH$LINK: ZINC", mb_record, value = TRUE, fixed = TRUE), 15) |
|
96 |
- |
|
97 |
- # clean up data |
|
98 |
- # TODO replace character(0) with NA_character |
|
99 |
- |
|
100 |
- # return result list |
|
101 |
- ch |
|
102 |
-} |
|
103 |
- |
|
104 |
-#' |
|
105 |
-#' |
|
106 |
-#' |
|
107 |
-.isolate_species <- function(mb_record) { |
|
108 |
- |
|
109 |
- # create empty list |
|
110 |
- sp <- list() |
|
111 |
- |
|
112 |
- # species information |
|
113 |
- sp$scientific_name <- substring(grep("SP$SCIENTIFIC_NAME:", mb_record, value = TRUE, fixed = TRUE), 21) |
|
114 |
- sp$lineage <- substring(grep("SP$LINEAGE:", mb_record, value = TRUE, fixed = TRUE), 13) |
|
115 |
- sp$link <- substring(grep("SP$LINK:", mb_record, value = TRUE, fixed = TRUE), 10) |
|
116 |
- sp$sample <- substring(grep("SP$SAMPLE:", mb_record, value = TRUE, fixed = TRUE), 12) |
|
117 | 69 |
|
118 |
- # clean up data |
|
119 |
- # TODO replace character(0) with NA_character |
|
70 |
+ # convert to data frame and adjust data type |
|
71 |
+ spectrum <- as.data.frame(spectrum, stringsAsFactors = FALSE) |
|
72 |
+ spectrum[] <- lapply(spectrum, type.convert) |
|
73 |
+ colnames(spectrum) <- c("mz", "intensity", "rel.intensity") |
|
74 |
+ |
|
75 |
+ # isolate spectrum metadata from record |
|
76 |
+ rtime <- as.numeric(substring(grep("AC$CHROMATOGRAPHY: RETENTION_TIME", |
|
77 |
+ mb, |
|
78 |
+ value = TRUE, |
|
79 |
+ fixed = TRUE), |
|
80 |
+ 35)) |
|
81 |
+ |
|
82 |
+ precursorMz <- as.numeric(substring(grep("MS$FOCUSED_ION: PRECURSOR_M/Z", |
|
83 |
+ mb, |
|
84 |
+ value = TRUE, |
|
85 |
+ fixed = TRUE), |
|
86 |
+ 30)) |
|
87 |
+ |
|
88 |
+ precursorIntensity <- as.numeric(substring(grep("MS$FOCUSED_ION: PRECURSOR_INT", |
|
89 |
+ mb, |
|
90 |
+ value = TRUE, |
|
91 |
+ fixed = TRUE), |
|
92 |
+ 31)) |
|
93 |
+ |
|
94 |
+ title <- substring(grep("RECORD_TITLE:", |
|
95 |
+ mb, |
|
96 |
+ value = TRUE, |
|
97 |
+ fixed = TRUE), |
|
98 |
+ 15) |
|
99 |
+ |
|
100 |
+ # check data |
|
101 |
+ if(!length(precursorIntensity)) |
|
102 |
+ precursorIntensity <- 100 |
|
103 |
+ |
|
104 |
+ list(rtime = rtime, |
|
105 |
+ scanIndex = as.integer(1), |
|
106 |
+ precursorMz = precursorMz, |
|
107 |
+ precursorIntensity = precursorIntensity, |
|
108 |
+ precursorCharge = as.integer(0), |
|
109 |
+ mz = spectrum$mz, |
|
110 |
+ intensity = spectrum$intensity, |
|
111 |
+ title = title) |
|
120 | 112 |
|
121 |
- # return result list |
|
122 |
- sp |
|
123 | 113 |
} |
124 | 114 |
|
125 |
-#' |
|
126 |
-#' |
|
127 |
-#' |
|
128 |
-.isolate_analchem <- function(mb_record) { |
|
129 |
- |
|
130 |
- # create empty list |
|
131 |
- ac <- list() |
|
132 |
- |
|
133 |
- # analytical chemistry information, MS instrument |
|
134 |
- ac$instrument <- substring(grep("AC$INSTRUMENT:", mb_record, value = TRUE, fixed = TRUE), 16) |
|
135 |
- ac$instrument_type <- substring(grep("AC$INSTRUMENT_TYPE:", mb_record, value = TRUE, fixed = TRUE), 21) |
|
136 |
- |
|
137 |
- # analytical chemistry information, MS settings |
|
138 |
- ac$ms_ms_type <- substring(grep("AC$MASS_SPECTROMETRY: MS_TYPE", mb_record, value = TRUE, fixed = TRUE), 31) |
|
139 |
- ac$ms_cap_voltage <- substring(grep("AC$MASS_SPECTROMETRY: CAPILLARY_VOLTAGE", mb_record, value = TRUE, fixed = TRUE), 41) |
|
140 |
- ac$ms_ion_mode <- substring(grep("AC$MASS_SPECTROMETRY: ION_MODE", mb_record, value = TRUE, fixed = TRUE), 32) |
|
141 |
- ac$ms_col_energy <- substring(grep("AC$MASS_SPECTROMETRY: COLLISION_ENERGY", mb_record, value = TRUE, fixed = TRUE), 40) |
|
142 |
- ac$ms_col_gas <- substring(grep("AC$MASS_SPECTROMETRY: COLLISION_GAS", mb_record, value = TRUE, fixed = TRUE), 37) |
|
143 |
- ac$ms_desolv_gas_flow <- substring(grep("AC$MASS_SPECTROMETRY: DESOLVATION_GAS_FLOW", mb_record, value = TRUE, fixed = TRUE), 44) |
|
144 |
- ac$ms_desolv_temp <- substring(grep("AC$MASS_SPECTROMETRY: DESOLVATION_TEMPERATURE", mb_record, value = TRUE, fixed = TRUE), 47) |
|
145 |
- ac$ms_frag_mode <- substring(grep("AC$MASS_SPECTROMETRY: FRAGMENTATION_MODE", mb_record, value = TRUE, fixed = TRUE), 42) |
|
146 |
- ac$ms_ionization <- substring(grep("AC$MASS_SPECTROMETRY: IONIZATION", mb_record, value = TRUE, fixed = TRUE), 34) |
|
147 |
- ac$ms_ionization_energy <- substring(grep("AC$MASS_SPECTROMETRY: IONIZATION_ENERGY", mb_record, value = TRUE, fixed = TRUE), 41) |
|
148 |
- ac$ms_laser <- substring(grep("AC$MASS_SPECTROMETRY: LASER", mb_record, value = TRUE, fixed = TRUE), 29) |
|
149 |
- ac$ms_matrix <- substring(grep("AC$MASS_SPECTROMETRY: MATRIX", mb_record, value = TRUE, fixed = TRUE), 30) |
|
150 |
- ac$ms_mass_accuracy <- substring(grep("AC$MASS_SPECTROMETRY: MASS_ACCURACY", mb_record, value = TRUE, fixed = TRUE), 37) |
|
151 |
- ac$ms_mass_range <- substring(grep("AC$MASS_SPECTROMETRY: MASS_RANGE_MZ", mb_record, value = TRUE, fixed = TRUE), 37) |
|
152 |
- ac$ms_reagent_gas <- substring(grep("AC$MASS_SPECTROMETRY: REAGENT_GAS", mb_record, value = TRUE, fixed = TRUE), 35) |
|
153 |
- ac$ms_resolution <- substring(grep("AC$MASS_SPECTROMETRY: RESOLUTION", mb_record, value = TRUE, fixed = TRUE), 34) |
|
154 |
- ac$ms_scan_setting <- substring(grep("AC$MASS_SPECTROMETRY: SCANNING_SETTING", mb_record, value = TRUE, fixed = TRUE), 40) |
|
155 |
- ac$ms_source_temp <- substring(grep("AC$MASS_SPECTROMETRY: SOURCE_TEMPERATURE", mb_record, value = TRUE, fixed = TRUE), 42) |
|
156 |
- |
|
157 |
- # analytical chemistry information, chromatography |
|
158 |
- ac$chrom_carrier_gas <- substring(grep("AC$CHROMATOGRAPHY: CARRIER_GAS", mb_record, value = TRUE, fixed = TRUE), 32) |
|
159 |
- ac$chrom_column <- substring(grep("AC$CHROMATOGRAPHY: COLUMN_NAME", mb_record, value = TRUE, fixed = TRUE), 32) |
|
160 |
- ac$chrom_column_temp <- substring(grep("AC$CHROMATOGRAPHY: COLUMN_TEMPERATURE", mb_record, value = TRUE, fixed = TRUE), 39) |
|
161 |
- ac$chrom_column_temp_gradient <- substring(grep("AC$CHROMATOGRAPHY: COLUMN_TEMPERATURE_GRADIENT", mb_record, value = TRUE, fixed = TRUE), 48) |
|
162 |
- ac$chrom_flow_gradient <- substring(grep("AC$CHROMATOGRAPHY: FLOW_GRADIENT", mb_record, value = TRUE, fixed = TRUE), 34) |
|
163 |
- ac$chrom_flow_rate <- substring(grep("AC$CHROMATOGRAPHY: FLOW_RATE", mb_record, value = TRUE, fixed = TRUE), 30) |
|
164 |
- ac$chrom_inj_temp <- substring(grep("AC$CHROMATOGRAPHY: INJECTION_TEMPERATURE", mb_record, value = TRUE, fixed = TRUE), 42) |
|
165 |
- ac$chrom_inj_temp_gradient <- substring(grep("AC$CHROMATOGRAPHY: INJECTION_TEMPERATURE_GRADIENT", mb_record, value = TRUE, fixed = TRUE), 51) |
|
166 |
- ac$chrom_rti_kovats <- substring(grep("AC$CHROMATOGRAPHY: KOVATS_RTI", mb_record, value = TRUE, fixed = TRUE), 31) |
|
167 |
- ac$chrom_rti_lee <- substring(grep("AC$CHROMATOGRAPHY: LEE_RTI", mb_record, value = TRUE, fixed = TRUE), 28) |
|
168 |
- ac$chrom_rti_naps <- substring(grep("AC$CHROMATOGRAPHY: NAPS_RTI", mb_record, value = TRUE, fixed = TRUE), 29) |
|
169 |
- ac$chrom_rti_uoa <- substring(grep("AC$CHROMATOGRAPHY: UOA_RTI", mb_record, value = TRUE, fixed = TRUE), 28) |
|
170 |
- ac$chrom_rti_uoa_pred <- substring(grep("AC$CHROMATOGRAPHY: UOA_PREDICTED_RTI", mb_record, value = TRUE, fixed = TRUE), 38) |
|
171 |
- ac$chrom_rt <- substring(grep("AC$CHROMATOGRAPHY: RETENTION_TIME", mb_record, value = TRUE, fixed = TRUE), 35) |
|
172 |
- ac$chrom_rt_uoa_pred <- substring(grep("AC$CHROMATOGRAPHY: TRAMS_PREDICTED_RETENTION_TIME", mb_record, value = TRUE, fixed = TRUE), 51) |
|
173 |
- ac$chrom_solvent <- as.list(substring(grep("AC$CHROMATOGRAPHY: SOLVENT", mb_record, value = TRUE, fixed = TRUE), 28)) |
|
174 |
- ac$chrom_transfer_temp <- substring(grep("AC$CHROMATOGRAPHY: TRANSFERLINE_TEMPERATURE", mb_record, value = TRUE, fixed = TRUE), 45) |
|
175 |
- |
|
176 |
- # # analytical chemistry information, ion mobility |
|
177 |
- # # preparation for IMS update of MassBank format |
|
178 |
- # ac$ims_instrument_type <- substring(grep("AC$ION_MOBILITY: INSTRUMENT_TYPE", mb_record, value = TRUE, fixed = TRUE), 34) |
|
179 |
- # ac$ims_drift_gas <- substring(grep("AC$ION_MOBILITY: DRIFT_GAS", mb_record, value = TRUE, fixed = TRUE), 28) |
|
180 |
- # ac$ims_drift_time <- substring(grep("AC$ION_MOBILITY: DRIFT_TIME", mb_record, value = TRUE, fixed = TRUE), 29) |
|
181 |
- # ac$ims_ccs <- substring(grep("AC$ION_MOBILITY: CCS", mb_record, value = TRUE, fixed = TRUE), 22) |
|
182 |
- |
|
183 |
- # analytical chemistry information, general |
|
184 |
- ac$general_conc <- substring(grep("AC$GENERAL: CONCENTRATION", mb_record, value = TRUE, fixed = TRUE), 27) |
|
185 |
- |
|
186 |
- # clean up data |
|
187 |
- # TODO replace character(0) with NA_character |
|
188 |
- |
|
189 |
- # return result list |
|
190 |
- ac |
|
191 |
-} |
|
192 |
- |
|
193 |
-#' |
|
194 |
-#' |
|
195 |
-#' |
|
196 |
-.isolate_massspec <- function(mb_record) { |
|
197 |
- |
|
198 |
- # create empty list |
|
199 |
- ms <- list() |
|
200 |
- |
|
201 |
- # MS information, base peak |
|
202 |
- ms$focus_base_peak <- substring(grep("MS$FOCUSED_ION: BASE_PEAK", mb_record, value = TRUE, fixed = TRUE), 27) |
|
203 |
- |
|
204 |
- # MS information, derivative |
|
205 |
- ms$focus_derivative_form <- substring(grep("MS$FOCUSED_ION: DERIVATIVE_FORM", mb_record, value = TRUE, fixed = TRUE), 33) |
|
206 |
- ms$focus_derivative_mass <- substring(grep("MS$FOCUSED_ION: DERIVATIVE_MASS", mb_record, value = TRUE, fixed = TRUE), 33) |
|
207 |
- ms$focus_derivative_type <- substring(grep("MS$FOCUSED_ION: DERIVATIVE_TYPE", mb_record, value = TRUE, fixed = TRUE), 33) |
|
208 |
- |
|
209 |
- # MS information, precursor |
|
210 |
- ms$focus_ion_type <- substring(grep("MS$FOCUSED_ION: ION_TYPE", mb_record, value = TRUE, fixed = TRUE), 26) |
|
211 |
- ms$focus_precursor_int <- substring(grep("MS$FOCUSED_ION: PRECURSOR_INT", mb_record, value = TRUE, fixed = TRUE), 31) |
|
212 |
- ms$focus_precursor_mz <- substring(grep("MS$FOCUSED_ION: PRECURSOR_MZ", mb_record, value = TRUE, fixed = TRUE), 30) |
|
213 |
- ms$focus_precursor_type <- substring(grep("MS$FOCUSED_ION: PRECURSOR_TYPE", mb_record, value = TRUE, fixed = TRUE), 32) |
|
214 |
- |
|
215 |
- # MS data processing |
|
216 |
- ms$data_processing_comment <- substring(grep("MS$DATA_PROCESSING: COMMENT", mb_record, value = TRUE, fixed = TRUE), 29) |
|
217 |
- ms$data_processing_deprofile <- substring(grep("MS$DATA_PROCESSING: DEPROFILE", mb_record, value = TRUE, fixed = TRUE), 31) |
|
218 |
- ms$data_processing_find <- substring(grep("MS$DATA_PROCESSING: FIND_PEAK", mb_record, value = TRUE, fixed = TRUE), 31) |
|
219 |
- ms$data_processing_reanalyze <- substring(grep("MS$DATA_PROCESSING: REANALYZE", mb_record, value = TRUE, fixed = TRUE), 31) |
|
220 |
- ms$data_processing_recalibrate <- substring(grep("MS$DATA_PROCESSING: RECALIBRATE", mb_record, value = TRUE, fixed = TRUE), 33) |
|
221 |
- ms$data_processing_whole <- substring(grep("MS$DATA_PROCESSING: WHOLE", mb_record, value = TRUE, fixed = TRUE), 27) |
|
222 |
- |
|
223 |
- # clean up data |
|
224 |
- # TODO replace character(0) with NA_character_ |
|
225 |
- |
|
226 |
- # TODO type conversion for numeric data |
|
227 |
- |
|
228 |
- # return result list |
|
229 |
- ms |
|
230 |
-} |
|
231 |
- |
|
232 |
-#' |
|
233 |
-#' |
|
234 |
-#' |
|
235 |
-.isolate_recordinfo <- function(mb_record) { |
|
236 |
- |
|
237 |
- # create empty list |
|
238 |
- recordinfo <- list() |
|
239 |
- |
|
240 |
- # mb_record information |
|
241 |
- recordinfo$accession <- substring(grep("ACCESSION:", mb_record, value = TRUE, fixed = TRUE), 12) |
|
242 |
- recordinfo$deprecated <- substring(grep("DEPRECATED:", mb_record, value = TRUE, fixed = TRUE), 13) |
|
243 |
- recordinfo$record_title <- substring(grep("RECORD_TITLE:", mb_record, value = TRUE, fixed = TRUE), 15) |
|
244 |
- recordinfo$date <- substring(grep("DATE:", mb_record, value = TRUE, fixed = TRUE), 7) |
|
245 |
- recordinfo$authors <- substring(grep("AUTHORS:", mb_record, value = TRUE, fixed = TRUE), 10) |
|
246 |
- recordinfo$license <- substring(grep("LICENSE:", mb_record, value = TRUE, fixed = TRUE), 10) |
|
247 |
- recordinfo$copyright <- substring(grep("COPYRIGHT:", mb_record, value = TRUE, fixed = TRUE), 12) |
|
248 |
- recordinfo$publication <- substring(grep("PUBLICATION:", mb_record, value = TRUE, fixed = TRUE), 14) |
|
249 |
- recordinfo$project <- as.list(substring(grep("PROJECT:", mb_record, value = TRUE, fixed = TRUE), 10)) |
|
250 |
- |
|
251 |
- # clean up data |
|
252 |
- # TODO replace character(0) with NA_character_ |
|
253 |
- |
|
254 |
- # TODO type conversion for dates |
|
255 |
- |
|
256 |
- # return result list |
|
257 |
- recordinfo |
|
258 |
- |
|
259 |
-} |
|
260 |
- |
|
261 |
-#' |
|
262 |
-#' |
|
263 |
-#' |
|
264 |
-.isolate_peakinfo <- function(mb_record) { |
|
265 |
- |
|
266 |
- # create empty list |
|
267 |
- pk <- list() |
|
268 |
- |
|
269 |
- # peak data |
|
270 |
- pk$splash <- substring(grep("PK$SPLASH:", mb_record, value = TRUE, fixed = TRUE), 12) |
|
271 |
- pk$num <- substring(grep("PK$NUM_PEAK:", mb_record, value = TRUE, fixed = TRUE), 14) |
|
272 |
- |
|
273 |
- # clean up data |
|
274 |
- # TODO replace character(0) with NA_character_ |
|
275 |
- |
|
276 |
- # TODO type conversion for numeric data |
|
277 |
- |
|
278 |
- # return result list |
|
279 |
- pk |
|
280 |
- |
|
281 |
-} |
|
282 |
- |
|
283 |
-#' |
|
284 |
-#' |
|
285 |
-#' |
|
286 |
-.isolate_comment <- function(mb_record) { |
|
287 |
- |
|
288 |
- # comment section |
|
289 |
- comment <- as.list(substring(grep("COMMENT:", mb_record, value = TRUE, fixed = TRUE), 10)) |
|
290 |
- |
|
291 |
- # return result list |
|
292 |
- comment |
|
293 |
- |
|
294 |
-} |
295 | 115 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,13 @@ |
1 |
+#' @title Internal page for hidden aliases |
|
2 |
+#' |
|
3 |
+#' @aliases [,MsBackendDataFrame-method |
|
4 |
+#' |
|
5 |
+#' @description |
|
6 |
+#' |
|
7 |
+#' For S4 methods that require a documentation entry but only clutter the index. |
|
8 |
+#' |
|
9 |
+#' @usage NULL |
|
10 |
+#' @format NULL |
|
11 |
+#' @keywords internal |
|
12 |
+#' @docType methods |
|
13 |
+hidden_aliases <- NULL |