Browse code

updated functions to be aligned with RforMassSpectrometry

Michael Witting authored on 17/04/2020 08:56:41
Showing 3 changed files

1 1
new file mode 100644
... ...
@@ -0,0 +1,121 @@
1
+
2
+#' @include hidden_aliases.R
3
+NULL
4
+
5
+#' @title MS data backend for mgf files
6
+#'
7
+#' @aliases MsBackendMassbank-class
8
+#'
9
+#' @description
10
+#'
11
+#' The `MsBackendMassbank` class supports import of MS/MS spectra data from
12
+#' files in Mascot Generic Format
13
+#' ([mgf](http://www.matrixscience.com/help/data_file_help.html))
14
+#' files. After initial import, the full MS data is kept in
15
+#' memory. `MsBackendMassbank` extends the [MsBackendDataFrame()] backend
16
+#' directly and supports thus the [applyProcessing()] function to make
17
+#' data manipulations persistent. The backend does however not
18
+#' support export to mgf files yet.
19
+#'
20
+#' New objects are created with the `MsBackendMassbank` function. The
21
+#' `backendInitialize` method has to be subsequently called to
22
+#' initialize the object and import MS/MS data from (one or more) mgf
23
+#' files.  Optional parameter `nonStop` allows to specify whether the
24
+#' import returns with an error if one of the xml files lacks required
25
+#' data, such as `mz` and `intensity` values (default `nonStop =
26
+#' FALSE`), or whether only affected file(s) is(are) skipped and a
27
+#' warning is shown (`nonStop = TRUE`). Note that any other error
28
+#' (such as xml import error) will abort import regardless of
29
+#' parameter `nonStop`.
30
+#'
31
+#' @param object Instance of `MsBackendMassbank` class.
32
+#'
33
+#' @param files `character` with the (full) file name(s) of the mgf file(s)
34
+#'     from which MS/MS data should be imported.
35
+#'
36
+#' @param nonStop `logical(1)` whether import should be stopped if an
37
+#'     xml file does not contain all required fields. Defaults to
38
+#'     `nonStop = FALSE`.
39
+#'
40
+#' @param BPPARAM Parameter object defining the parallel processing
41
+#'     setup to import data in parallel. Defaults to `BPPARAM =
42
+#'     bpparam()`. See [bpparam()] for more information.
43
+#'
44
+#' @param ... Currently ignored.
45
+#'
46
+#' @author Michael Witting
47
+#'
48
+#' @importClassesFrom Spectra MsBackendDataFrame
49
+#'
50
+#' @exportClass MsBackendMassbank
51
+#'
52
+#' @name MsBackendMassbank
53
+#'
54
+#' @examples
55
+#'
56
+#' ## Create an MsBackendHmdbXml backend and import data from test xml files.
57
+#' fls <- dir(system.file("extdata", package = "MsBackendMassbank"),
58
+#'     full.names = TRUE, pattern = "mgf$")
59
+#' be <- backendInitialize(MsBackendMassbank(), fls)
60
+#' be
61
+#'
62
+#' be$msLevel
63
+#' be$intensity
64
+#' be$mz
65
+NULL
66
+
67
+setClass("MsBackendMassbank",
68
+         contains = "MsBackendDataFrame",
69
+         prototype = prototype(spectraData = DataFrame(),
70
+                               readonly = FALSE,
71
+                               version = "0.1"))
72
+
73
+#' @importMethodsFrom Spectra backendInitialize asDataFrame<- $<- $
74
+#'
75
+#' @importFrom BiocParallel bpparam
76
+#'
77
+#' @importMethodsFrom BiocParallel bplapply
78
+#'
79
+#' @importFrom methods validObject
80
+#'
81
+#' @exportMethod backendInitialize
82
+#'
83
+#' @rdname MsBackendMassbank
84
+setMethod("backendInitialize", signature = "MsBackendMassbank",
85
+          function(object, files, nonStop = FALSE, ..., BPPARAM = bpparam()) {
86
+            if (missing(files) || !length(files))
87
+              stop("Parameter 'files' is mandatory for ", class(object))
88
+            if (!is.character(files))
89
+              stop("Parameter 'files' is expected to be a character vector",
90
+                   " with the files names from where data should be",
91
+                   " imported")
92
+            files <- normalizePath(files)
93
+            if (any(!file.exists(files)))
94
+              stop("file(s) ",
95
+                   paste(files[!file.exists(files)], collapse = ", "),
96
+                   " not found")
97
+            ## Import data and rbind.
98
+            message("Start data import from ", length(files), " files ... ",
99
+                    appendLF = FALSE)
100
+            res <- bplapply(files, FUN = .read_massbank,
101
+                            nonStop = nonStop, BPPARAM = BPPARAM)
102
+            message("done")
103
+            res <- do.call(rbind, res)
104
+            if (nonStop && length(files) > nrow(res))
105
+              warning("Import failed for ", length(files) - nrow(res),
106
+                      " files")
107
+            asDataFrame(object) <- res
108
+            object$dataStorage <- "<memory>"
109
+            object$centroided <- TRUE
110
+            validObject(object)
111
+            object
112
+          })
113
+
114
+#' @rdname MsBackendMassbank
115
+#'
116
+#' @importFrom methods new
117
+#'
118
+#' @export MsBackendMassbank
119
+MsBackendMassbank <- function() {
120
+  new("MsBackendMassbank")
121
+}
... ...
@@ -1,294 +1,114 @@
1
-# all fields are found here
2
-# https://github.com/MassBank/MassBank-web/blob/dev/Documentation/MassBankRecordFormat.md
3
-
4
-#'
5
-#'
6
-#'
7
-.import_massbank_ms_ms_spectrum <- function(x) {
8
-
9
-  if (!is.character(x) || length(x) != 1)
10
-    stop("'x' has to be of type character with length 1")
11
-
12
-  con <- file(x)
13
-  mb_record <- readLines(con)
14
-  close(con)
15
-
16
-  # isolate spectrum and basic metadata (default)
17
-  recordinfo <- .isolate_recordinfo(mb_record)
18
-  spectrum <- .isolate_spectrum(mb_record)
19
-
20
-  # idea: isolate only additional information if requested
21
-  if(isolate_ch) {
22
-    ch <- .isolate_chemical(mb_record)
23
-  } else {
24
-    ch <- NULL
1
+##' @param f `character(1)` with the path to an MassBank file.
2
+##'
3
+##' @param msLevel `numeric(1)` with the MS level. Default is 2.
4
+##'
5
+##' @param ... Additional parameters, currently ignored.
6
+##'
7
+##' @importFrom S4Vectors DataFrame
8
+##'
9
+##' @importFrom IRanges NumericList
10
+##'
11
+##' @author Michael Witting
12
+##'
13
+##' @noRd
14
+.read_massbank <- function(f, msLevel = 2L, ...) {
15
+
16
+  if (length(f) != 1L)
17
+    stop("Please provide a single mgf file.")
18
+
19
+  mb <- scan(file = f, what = "",
20
+              sep = "\n", quote = "",
21
+              allowEscapes = FALSE,
22
+              quiet = TRUE)
23
+
24
+  begin <- grep("ACCESSION:", mb) + 1L
25
+  end <- grep("//", mb)
26
+  n <- length(begin)
27
+  sp <- vector("list", length = n)
28
+
29
+  for (i in seq(along = sp))
30
+    sp[[i]] <- .extract_mb_spectrum(mb[begin[i]:end[i]])
31
+
32
+  res <- DataFrame(do.call(rbind, sp))
33
+
34
+  for (i in seq_along(res)) {
35
+    if (all(lengths(res[[i]]) == 1))
36
+      res[[i]] <- unlist(res[[i]])
25 37
   }
26 38
 
27
-  # create Spectra object with metadata
39
+  res$mz <- IRanges::NumericList(res$mz)
40
+  res$intensity <- IRanges::NumericList(res$intensity)
41
+  res$dataOrigin <- f
42
+  res$msLevel <- as.integer(msLevel)
43
+  res
28 44
 
29 45
 }
30 46
 
31
-#'
32
-#'
33
-#'
34
-.isolate_spectrum <- function(mb_record) {
35
-
36
-  #read the spectrum
37
-  spectrum_start <- grep("PK$PEAK:", mb_record, fixed = TRUE) + 1
38
-  spectrum_end <- tail(grep("//", mb_record, fixed = TRUE), 1) - 1
39
-
40
-  if(spectrum_start < spectrum_end){
41
-    splitted <- strsplit(mb_record[spectrum_start:(spectrum_end)]," ")
42
-    spectrum <- matrix(nrow = spectrum_end + 1 - spectrum_start, ncol = 3)
43 47
 
44
-    for(k in 1:length(splitted)){
45
-      splitted[[k]] <- splitted[[k]][which(splitted[[k]] != "")]
46
-      spectrum[k,] <- splitted[[k]]
47
-    }
48 48
 
49
-    # convert to data frame and adjust data type
50
-    spectrum <- as.data.frame(spectrum, stringsAsFactors = FALSE)
51
-    spectrum[] <- lapply(spectrum, type.convert)
52
-    colnames(spectrum) <- c("m/z", "int", "rel.int.")
53 49
 
54
-    # return spectrum
55
-    return(spectrum)
50
+##' @param mb `character()` of lines defining a spectrum in mgf
51
+##'     format.
52
+##'
53
+##' @author Michael Witting
54
+##'
55
+##' @noRd
56
+.extract_mb_spectrum <- function(mb) {
56 57
 
57
-  } else {
58
+  #read the spectrum
59
+  spectrum_start <- grep("PK$PEAK:", mb, fixed = TRUE) + 1
60
+  spectrum_end <- tail(grep("//", mb, fixed = TRUE), 1) - 1
58 61
 
59
-    stop("Could not find PK$PEAK or //. Please check the supplied MB record file")
62
+  splitted <- strsplit(mb[spectrum_start:(spectrum_end)]," ")
63
+  spectrum <- matrix(nrow = spectrum_end + 1 - spectrum_start, ncol = 3)
60 64
 
65
+  for(k in 1:length(splitted)){
66
+    splitted[[k]] <- splitted[[k]][which(splitted[[k]] != "")]
67
+    spectrum[k,] <- splitted[[k]]
61 68
   }
62
-}
63
-
64
-#'
65
-#'
66
-#'
67
-.isolate_chemical <- function(mb_record) {
68
-
69
-  # create empty list
70
-  ch <- list()
71
-
72
-  # isolate chemical information
73
-  ch$name <- as.list(substring(grep("CH$NAME:", mb_record, value = TRUE, fixed = TRUE), 10))
74
-  ch$compound_class <- substring(grep("CH$COMPOUND_CLASS:", mb_record, value = TRUE, fixed = TRUE), 20)
75
-  ch$formula <- substring(grep("CH$FORMULA:", mb_record, value = TRUE, fixed = TRUE), 13)
76
-  ch$exact_mass <- as.numeric(substring(grep("CH$EXACT_MASS:", mb_record, value = TRUE, fixed = TRUE), 16))
77
-  ch$smiles <- substring(grep("CH$SMILES:", mb_record, value = TRUE, fixed = TRUE), 12)
78
-  ch$iupac <- substring(grep("CH$IUPAC:", mb_record, value = TRUE, fixed = TRUE), 11)
79
-  ch$link_cas <- substring(grep("CH$LINK: CAS", mb_record, value = TRUE, fixed = TRUE), 14)
80
-  ch$link_cayman <- substring(grep("CH$LINK: CAYMAN", mb_record, value = TRUE, fixed = TRUE), 17)
81
-  ch$link_chebi <- substring(grep("CH$LINK: CHEBI", mb_record, value = TRUE, fixed = TRUE), 16)
82
-  ch$link_chembl <- substring(grep("CH$LINK: CHEMBL", mb_record, value = TRUE, fixed = TRUE), 17)
83
-  ch$link_chempdb <- substring(grep("CH$LINK: CHEMPDB", mb_record, value = TRUE, fixed = TRUE), 18)
84
-  ch$link_chemspider <- substring(grep("CH$LINK: CHEMSPIDER", mb_record, value = TRUE, fixed = TRUE), 21)
85
-  ch$link_comptox <- substring(grep("CH$LINK: COMPTOX", mb_record, value = TRUE, fixed = TRUE), 18)
86
-  ch$link_hmdb <- substring(grep("CH$LINK: HMDB", mb_record, value = TRUE, fixed = TRUE), 15)
87
-  ch$link_inchikey <- substring(grep("CH$LINK: INCHIKEY", mb_record, value = TRUE, fixed = TRUE), 19)
88
-  ch$link_kappaview <- substring(grep("CH$LINK: KAPPAVIEW", mb_record, value = TRUE, fixed = TRUE), 20)
89
-  ch$link_kegg <- substring(grep("CH$LINK: KEGG", mb_record, value = TRUE, fixed = TRUE), 15)
90
-  ch$link_knapsack <- substring(grep("CH$LINK: KNAPSACK", mb_record, value = TRUE, fixed = TRUE), 19)
91
-  ch$link_lipidbank <- substring(grep("CH$LINK: LIPIDBANK", mb_record, value = TRUE, fixed = TRUE), 20)
92
-  ch$link_lipidmaps <- substring(grep("CH$LINK: LIPIDMAPS", mb_record, value = TRUE, fixed = TRUE), 20)
93
-  ch$link_nikkaji <- substring(grep("CH$LINK: NIKKAJI", mb_record, value = TRUE, fixed = TRUE), 18)
94
-  ch$link_pubchem <- substring(grep("CH$LINK: PUBCHEM", mb_record, value = TRUE, fixed = TRUE), 18)
95
-  ch$link_zinc <- substring(grep("CH$LINK: ZINC", mb_record, value = TRUE, fixed = TRUE), 15)
96
-
97
-  # clean up data
98
-  # TODO replace character(0) with NA_character
99
-
100
-  # return result list
101
-  ch
102
-}
103
-
104
-#'
105
-#'
106
-#'
107
-.isolate_species <- function(mb_record) {
108
-
109
-  # create empty list
110
-  sp <- list()
111
-
112
-  # species information
113
-  sp$scientific_name <- substring(grep("SP$SCIENTIFIC_NAME:", mb_record, value = TRUE, fixed = TRUE), 21)
114
-  sp$lineage <- substring(grep("SP$LINEAGE:", mb_record, value = TRUE, fixed = TRUE), 13)
115
-  sp$link <- substring(grep("SP$LINK:", mb_record, value = TRUE, fixed = TRUE), 10)
116
-  sp$sample <- substring(grep("SP$SAMPLE:", mb_record, value = TRUE, fixed = TRUE), 12)
117 69
 
118
-  # clean up data
119
-  # TODO replace character(0) with NA_character
70
+  # convert to data frame and adjust data type
71
+  spectrum <- as.data.frame(spectrum, stringsAsFactors = FALSE)
72
+  spectrum[] <- lapply(spectrum, type.convert)
73
+  colnames(spectrum) <- c("mz", "intensity", "rel.intensity")
74
+
75
+  # isolate spectrum metadata from record
76
+  rtime <- as.numeric(substring(grep("AC$CHROMATOGRAPHY: RETENTION_TIME",
77
+                                     mb,
78
+                                     value = TRUE,
79
+                                     fixed = TRUE),
80
+                                35))
81
+
82
+  precursorMz <- as.numeric(substring(grep("MS$FOCUSED_ION: PRECURSOR_M/Z",
83
+                                           mb,
84
+                                           value = TRUE,
85
+                                           fixed = TRUE),
86
+                                      30))
87
+
88
+  precursorIntensity <- as.numeric(substring(grep("MS$FOCUSED_ION: PRECURSOR_INT",
89
+                                                  mb,
90
+                                                  value = TRUE,
91
+                                                  fixed = TRUE),
92
+                                             31))
93
+
94
+  title <- substring(grep("RECORD_TITLE:",
95
+                          mb,
96
+                          value = TRUE,
97
+                          fixed = TRUE),
98
+                     15)
99
+
100
+  # check data
101
+  if(!length(precursorIntensity))
102
+    precursorIntensity <- 100
103
+
104
+  list(rtime = rtime,
105
+        scanIndex = as.integer(1),
106
+        precursorMz = precursorMz,
107
+        precursorIntensity = precursorIntensity,
108
+        precursorCharge = as.integer(0),
109
+        mz = spectrum$mz,
110
+        intensity = spectrum$intensity,
111
+        title = title)
120 112
 
121
-  # return result list
122
-  sp
123 113
 }
124 114
 
125
-#'
126
-#'
127
-#'
128
-.isolate_analchem <- function(mb_record) {
129
-
130
-  # create empty list
131
-  ac <- list()
132
-
133
-  # analytical chemistry information, MS instrument
134
-  ac$instrument <- substring(grep("AC$INSTRUMENT:", mb_record, value = TRUE, fixed = TRUE), 16)
135
-  ac$instrument_type <- substring(grep("AC$INSTRUMENT_TYPE:", mb_record, value = TRUE, fixed = TRUE), 21)
136
-
137
-  # analytical chemistry information, MS settings
138
-  ac$ms_ms_type <- substring(grep("AC$MASS_SPECTROMETRY: MS_TYPE", mb_record, value = TRUE, fixed = TRUE), 31)
139
-  ac$ms_cap_voltage <- substring(grep("AC$MASS_SPECTROMETRY: CAPILLARY_VOLTAGE", mb_record, value = TRUE, fixed = TRUE), 41)
140
-  ac$ms_ion_mode <- substring(grep("AC$MASS_SPECTROMETRY: ION_MODE", mb_record, value = TRUE, fixed = TRUE), 32)
141
-  ac$ms_col_energy <- substring(grep("AC$MASS_SPECTROMETRY: COLLISION_ENERGY", mb_record, value = TRUE, fixed = TRUE), 40)
142
-  ac$ms_col_gas <- substring(grep("AC$MASS_SPECTROMETRY: COLLISION_GAS", mb_record, value = TRUE, fixed = TRUE), 37)
143
-  ac$ms_desolv_gas_flow <- substring(grep("AC$MASS_SPECTROMETRY: DESOLVATION_GAS_FLOW", mb_record, value = TRUE, fixed = TRUE), 44)
144
-  ac$ms_desolv_temp <- substring(grep("AC$MASS_SPECTROMETRY: DESOLVATION_TEMPERATURE", mb_record, value = TRUE, fixed = TRUE), 47)
145
-  ac$ms_frag_mode <- substring(grep("AC$MASS_SPECTROMETRY: FRAGMENTATION_MODE", mb_record, value = TRUE, fixed = TRUE), 42)
146
-  ac$ms_ionization <- substring(grep("AC$MASS_SPECTROMETRY: IONIZATION", mb_record, value = TRUE, fixed = TRUE), 34)
147
-  ac$ms_ionization_energy <- substring(grep("AC$MASS_SPECTROMETRY: IONIZATION_ENERGY", mb_record, value = TRUE, fixed = TRUE), 41)
148
-  ac$ms_laser <- substring(grep("AC$MASS_SPECTROMETRY: LASER", mb_record, value = TRUE, fixed = TRUE), 29)
149
-  ac$ms_matrix <- substring(grep("AC$MASS_SPECTROMETRY: MATRIX", mb_record, value = TRUE, fixed = TRUE), 30)
150
-  ac$ms_mass_accuracy <- substring(grep("AC$MASS_SPECTROMETRY: MASS_ACCURACY", mb_record, value = TRUE, fixed = TRUE), 37)
151
-  ac$ms_mass_range <- substring(grep("AC$MASS_SPECTROMETRY: MASS_RANGE_MZ", mb_record, value = TRUE, fixed = TRUE), 37)
152
-  ac$ms_reagent_gas <- substring(grep("AC$MASS_SPECTROMETRY: REAGENT_GAS", mb_record, value = TRUE, fixed = TRUE), 35)
153
-  ac$ms_resolution <- substring(grep("AC$MASS_SPECTROMETRY: RESOLUTION", mb_record, value = TRUE, fixed = TRUE), 34)
154
-  ac$ms_scan_setting <- substring(grep("AC$MASS_SPECTROMETRY: SCANNING_SETTING", mb_record, value = TRUE, fixed = TRUE), 40)
155
-  ac$ms_source_temp <- substring(grep("AC$MASS_SPECTROMETRY: SOURCE_TEMPERATURE", mb_record, value = TRUE, fixed = TRUE), 42)
156
-
157
-  # analytical chemistry information, chromatography
158
-  ac$chrom_carrier_gas <- substring(grep("AC$CHROMATOGRAPHY: CARRIER_GAS", mb_record, value = TRUE, fixed = TRUE), 32)
159
-  ac$chrom_column <- substring(grep("AC$CHROMATOGRAPHY: COLUMN_NAME", mb_record, value = TRUE, fixed = TRUE), 32)
160
-  ac$chrom_column_temp <- substring(grep("AC$CHROMATOGRAPHY: COLUMN_TEMPERATURE", mb_record, value = TRUE, fixed = TRUE), 39)
161
-  ac$chrom_column_temp_gradient <- substring(grep("AC$CHROMATOGRAPHY: COLUMN_TEMPERATURE_GRADIENT", mb_record, value = TRUE, fixed = TRUE), 48)
162
-  ac$chrom_flow_gradient <- substring(grep("AC$CHROMATOGRAPHY: FLOW_GRADIENT", mb_record, value = TRUE, fixed = TRUE), 34)
163
-  ac$chrom_flow_rate <- substring(grep("AC$CHROMATOGRAPHY: FLOW_RATE", mb_record, value = TRUE, fixed = TRUE), 30)
164
-  ac$chrom_inj_temp <- substring(grep("AC$CHROMATOGRAPHY: INJECTION_TEMPERATURE", mb_record, value = TRUE, fixed = TRUE), 42)
165
-  ac$chrom_inj_temp_gradient <- substring(grep("AC$CHROMATOGRAPHY: INJECTION_TEMPERATURE_GRADIENT", mb_record, value = TRUE, fixed = TRUE), 51)
166
-  ac$chrom_rti_kovats <- substring(grep("AC$CHROMATOGRAPHY: KOVATS_RTI", mb_record, value = TRUE, fixed = TRUE), 31)
167
-  ac$chrom_rti_lee <- substring(grep("AC$CHROMATOGRAPHY: LEE_RTI", mb_record, value = TRUE, fixed = TRUE), 28)
168
-  ac$chrom_rti_naps <- substring(grep("AC$CHROMATOGRAPHY: NAPS_RTI", mb_record, value = TRUE, fixed = TRUE), 29)
169
-  ac$chrom_rti_uoa <- substring(grep("AC$CHROMATOGRAPHY: UOA_RTI", mb_record, value = TRUE, fixed = TRUE), 28)
170
-  ac$chrom_rti_uoa_pred <- substring(grep("AC$CHROMATOGRAPHY: UOA_PREDICTED_RTI", mb_record, value = TRUE, fixed = TRUE), 38)
171
-  ac$chrom_rt <- substring(grep("AC$CHROMATOGRAPHY: RETENTION_TIME", mb_record, value = TRUE, fixed = TRUE), 35)
172
-  ac$chrom_rt_uoa_pred <- substring(grep("AC$CHROMATOGRAPHY: TRAMS_PREDICTED_RETENTION_TIME", mb_record, value = TRUE, fixed = TRUE), 51)
173
-  ac$chrom_solvent <- as.list(substring(grep("AC$CHROMATOGRAPHY: SOLVENT", mb_record, value = TRUE, fixed = TRUE), 28))
174
-  ac$chrom_transfer_temp <- substring(grep("AC$CHROMATOGRAPHY: TRANSFERLINE_TEMPERATURE", mb_record, value = TRUE, fixed = TRUE), 45)
175
-
176
-  # # analytical chemistry information, ion mobility
177
-  # # preparation for IMS update of MassBank format
178
-  # ac$ims_instrument_type <- substring(grep("AC$ION_MOBILITY: INSTRUMENT_TYPE", mb_record, value = TRUE, fixed = TRUE), 34)
179
-  # ac$ims_drift_gas <- substring(grep("AC$ION_MOBILITY: DRIFT_GAS", mb_record, value = TRUE, fixed = TRUE), 28)
180
-  # ac$ims_drift_time <- substring(grep("AC$ION_MOBILITY: DRIFT_TIME", mb_record, value = TRUE, fixed = TRUE), 29)
181
-  # ac$ims_ccs <- substring(grep("AC$ION_MOBILITY: CCS", mb_record, value = TRUE, fixed = TRUE), 22)
182
-
183
-  # analytical chemistry information, general
184
-  ac$general_conc <- substring(grep("AC$GENERAL: CONCENTRATION", mb_record, value = TRUE, fixed = TRUE), 27)
185
-
186
-  # clean up data
187
-  # TODO replace character(0) with NA_character
188
-
189
-  # return result list
190
-  ac
191
-}
192
-
193
-#'
194
-#'
195
-#'
196
-.isolate_massspec <- function(mb_record) {
197
-
198
-  # create empty list
199
-  ms <- list()
200
-
201
-  # MS information, base peak
202
-  ms$focus_base_peak <- substring(grep("MS$FOCUSED_ION: BASE_PEAK", mb_record, value = TRUE, fixed = TRUE), 27)
203
-
204
-  # MS information, derivative
205
-  ms$focus_derivative_form <- substring(grep("MS$FOCUSED_ION: DERIVATIVE_FORM", mb_record, value = TRUE, fixed = TRUE), 33)
206
-  ms$focus_derivative_mass <- substring(grep("MS$FOCUSED_ION: DERIVATIVE_MASS", mb_record, value = TRUE, fixed = TRUE), 33)
207
-  ms$focus_derivative_type <- substring(grep("MS$FOCUSED_ION: DERIVATIVE_TYPE", mb_record, value = TRUE, fixed = TRUE), 33)
208
-
209
-  # MS information, precursor
210
-  ms$focus_ion_type <- substring(grep("MS$FOCUSED_ION: ION_TYPE", mb_record, value = TRUE, fixed = TRUE), 26)
211
-  ms$focus_precursor_int <- substring(grep("MS$FOCUSED_ION: PRECURSOR_INT", mb_record, value = TRUE, fixed = TRUE), 31)
212
-  ms$focus_precursor_mz <- substring(grep("MS$FOCUSED_ION: PRECURSOR_MZ", mb_record, value = TRUE, fixed = TRUE), 30)
213
-  ms$focus_precursor_type <- substring(grep("MS$FOCUSED_ION: PRECURSOR_TYPE", mb_record, value = TRUE, fixed = TRUE), 32)
214
-
215
-  # MS data processing
216
-  ms$data_processing_comment <- substring(grep("MS$DATA_PROCESSING: COMMENT", mb_record, value = TRUE, fixed = TRUE), 29)
217
-  ms$data_processing_deprofile <- substring(grep("MS$DATA_PROCESSING: DEPROFILE", mb_record, value = TRUE, fixed = TRUE), 31)
218
-  ms$data_processing_find <- substring(grep("MS$DATA_PROCESSING: FIND_PEAK", mb_record, value = TRUE, fixed = TRUE), 31)
219
-  ms$data_processing_reanalyze <- substring(grep("MS$DATA_PROCESSING: REANALYZE", mb_record, value = TRUE, fixed = TRUE), 31)
220
-  ms$data_processing_recalibrate <- substring(grep("MS$DATA_PROCESSING: RECALIBRATE", mb_record, value = TRUE, fixed = TRUE), 33)
221
-  ms$data_processing_whole <- substring(grep("MS$DATA_PROCESSING: WHOLE", mb_record, value = TRUE, fixed = TRUE), 27)
222
-
223
-  # clean up data
224
-  # TODO replace character(0) with NA_character_
225
-
226
-  # TODO type conversion for numeric data
227
-
228
-  # return result list
229
-  ms
230
-}
231
-
232
-#'
233
-#'
234
-#'
235
-.isolate_recordinfo <- function(mb_record) {
236
-
237
-  # create empty list
238
-  recordinfo <- list()
239
-
240
-  # mb_record information
241
-  recordinfo$accession <- substring(grep("ACCESSION:", mb_record, value = TRUE, fixed = TRUE), 12)
242
-  recordinfo$deprecated <- substring(grep("DEPRECATED:", mb_record, value = TRUE, fixed = TRUE), 13)
243
-  recordinfo$record_title <- substring(grep("RECORD_TITLE:", mb_record, value = TRUE, fixed = TRUE), 15)
244
-  recordinfo$date <- substring(grep("DATE:", mb_record, value = TRUE, fixed = TRUE), 7)
245
-  recordinfo$authors <- substring(grep("AUTHORS:", mb_record, value = TRUE, fixed = TRUE), 10)
246
-  recordinfo$license <- substring(grep("LICENSE:", mb_record, value = TRUE, fixed = TRUE), 10)
247
-  recordinfo$copyright <- substring(grep("COPYRIGHT:", mb_record, value = TRUE, fixed = TRUE), 12)
248
-  recordinfo$publication <- substring(grep("PUBLICATION:", mb_record, value = TRUE, fixed = TRUE), 14)
249
-  recordinfo$project <- as.list(substring(grep("PROJECT:", mb_record, value = TRUE, fixed = TRUE), 10))
250
-
251
-  # clean up data
252
-  # TODO replace character(0) with NA_character_
253
-
254
-  # TODO type conversion for dates
255
-
256
-  # return result list
257
-  recordinfo
258
-
259
-}
260
-
261
-#'
262
-#'
263
-#'
264
-.isolate_peakinfo <- function(mb_record) {
265
-
266
-  # create empty list
267
-  pk <- list()
268
-
269
-  # peak data
270
-  pk$splash <- substring(grep("PK$SPLASH:", mb_record, value = TRUE, fixed = TRUE), 12)
271
-  pk$num <- substring(grep("PK$NUM_PEAK:", mb_record, value = TRUE, fixed = TRUE), 14)
272
-
273
-  # clean up data
274
-  # TODO replace character(0) with NA_character_
275
-
276
-  # TODO type conversion for numeric data
277
-
278
-  # return result list
279
-  pk
280
-
281
-}
282
-
283
-#'
284
-#'
285
-#'
286
-.isolate_comment <- function(mb_record) {
287
-
288
-  # comment section
289
-  comment <- as.list(substring(grep("COMMENT:", mb_record, value = TRUE, fixed = TRUE), 10))
290
-
291
-  # return result list
292
-  comment
293
-
294
-}
295 115
new file mode 100644
... ...
@@ -0,0 +1,13 @@
1
+#' @title Internal page for hidden aliases
2
+#'
3
+#' @aliases [,MsBackendDataFrame-method
4
+#'
5
+#' @description
6
+#'
7
+#' For S4 methods that require a documentation entry but only clutter the index.
8
+#'
9
+#' @usage NULL
10
+#' @format NULL
11
+#' @keywords internal
12
+#' @docType methods
13
+hidden_aliases <- NULL