13a450da |
#' @include hidden_aliases.R
NULL
#' @title MS data backend for mgf files
#'
#' @aliases MsBackendMassbank-class
#'
#' @description
#'
#' The `MsBackendMassbank` class supports import of MS/MS spectra data from
|
6a3e408f |
#' MS/MS spectrum data from
#' [Massbank](https://github.com/MassBank/MassBank-data)
|
13a450da |
#' files. After initial import, the full MS data is kept in
|
b99a42e6 |
#' memory. `MsBackendMassbank` extends the
#' [Spectra::MsBackendDataFrame()] backend
#' directly and supports thus the [Spectra::applyProcessing()] function to make
|
6a3e408f |
#' data manipulations persistent.
|
13a450da |
#'
#' New objects are created with the `MsBackendMassbank` function. The
#' `backendInitialize` method has to be subsequently called to
|
6a3e408f |
#' initialize the object and import MS/MS data from (one or more) MassBank
#' files. Optional parameter `nonStop` allows to specify whether the
#' import returns with an error if one of the text files lacks required
|
13a450da |
#' data, such as `mz` and `intensity` values (default `nonStop =
#' FALSE`), or whether only affected file(s) is(are) skipped and a
#' warning is shown (`nonStop = TRUE`). Note that any other error
|
6a3e408f |
#' will abort import regardless of parameter `nonStop`.
|
13a450da |
#'
#' @param object Instance of `MsBackendMassbank` class.
#'
|
5b7c5974 |
#' @param file for `export`: `character(1)` defining the output file.
#'
|
6a3e408f |
#' @param files `character` with the (full) file name(s) of the MassBank file(s)
|
13a450da |
#' from which MS/MS data should be imported.
#'
|
5b7c5974 |
#' @param format for `spectraVariableMapping`: `character(1)` defining the
#' format to be used. Currently only `format = "Massbank"` is supported.
#'
#' @param mapping for `export`: named `character` vector
#' allowing to specify how fields from the Massbank file should be renamed.
#' Names are supposed to be the spectra variable name and values of the
#' vector the field names in the Massbank file. See output of
|
797495cd |
#' `spectraVariableMapping(MsBackendMassbank())` for the expected format.
|
5b7c5974 |
#'
|
6a3e408f |
#' @param metaBlocks `data.frame` indicating which metadata shall
#' be imported. Default is [metaDataBlocks()].
|
b6ec6667 |
#'
|
13a450da |
#' @param nonStop `logical(1)` whether import should be stopped if an
#' xml file does not contain all required fields. Defaults to
#' `nonStop = FALSE`.
#'
#' @param BPPARAM Parameter object defining the parallel processing
#' setup to import data in parallel. Defaults to `BPPARAM =
|
b99a42e6 |
#' bpparam()`. See [BiocParallel::bpparam()] for more information.
|
13a450da |
#'
|
b99a42e6 |
#' @param x [Spectra::Spectra()] object that should be exported.
|
5b7c5974 |
#'
|
13a450da |
#' @param ... Currently ignored.
#'
#' @author Michael Witting
#'
#' @importClassesFrom Spectra MsBackendDataFrame
#'
#' @exportClass MsBackendMassbank
#'
#' @name MsBackendMassbank
#'
|
6a3e408f |
#' @return `backendInitialize` and `MsBackendMassbank` return an instance of
#' `MsBackendMassbank-class`.
#'
|
13a450da |
#' @examples
#'
|
6a3e408f |
#' ## Create an MsBackendMassbank backend and import data from a test file.
|
13a450da |
#' fls <- dir(system.file("extdata", package = "MsBackendMassbank"),
|
331d39bb |
#' full.names = TRUE, pattern = "txt$")
|
13a450da |
#' be <- backendInitialize(MsBackendMassbank(), fls)
#' be
#'
#' be$msLevel
#' be$intensity
#' be$mz
|
6a3e408f |
#'
#' ## Initializing a backend reading additional metadata columns/information
#' mb <- metaDataBlocks()
#' mb
#' mb[1, 2] <- TRUE
#'
#' be <- backendInitialize(MsBackendMassbank(), fls, metaBlocks = mb)
#' spectraVariables(be)
#' be$instrument
|
13a450da |
NULL
setClass("MsBackendMassbank",
contains = "MsBackendDataFrame",
prototype = prototype(spectraData = DataFrame(),
readonly = FALSE,
version = "0.1"))
|
f56ab929 |
#' @importMethodsFrom Spectra spectraData<- $<- $
#'
#' @importMethodsFrom ProtGenerics backendInitialize
|
13a450da |
#'
#' @importFrom BiocParallel bpparam
#'
|
6a3e408f |
#' @importFrom S4Vectors bindROWS
#'
|
13a450da |
#' @importMethodsFrom BiocParallel bplapply
#'
#' @importFrom methods validObject
#'
#' @exportMethod backendInitialize
#'
#' @rdname MsBackendMassbank
setMethod("backendInitialize", signature = "MsBackendMassbank",
|
b6ec6667 |
function(object, files, metaBlocks = metaDataBlocks(),
nonStop = FALSE, ..., BPPARAM = bpparam()) {
|
6a3e408f |
if (missing(files) || !length(files))
stop("Parameter 'files' is mandatory for ", class(object))
if (!is.character(files))
stop("Parameter 'files' is expected to be a character vector",
" with the files names from where data should be",
" imported")
suppressWarnings(files <- normalizePath(files))
if (any(!file.exists(files))) {
stop("file(s) ",
paste(files[!file.exists(files)], collapse = ", "),
" not found")
}
## Import data and rbind.
message("Start data import from ", length(files), " files ... ",
appendLF = FALSE)
res <- bplapply(files, FUN = .read_massbank,
metaBlocks = metaBlocks,
nonStop = nonStop, BPPARAM = BPPARAM)
message("done")
|
b99a42e6 |
if (nonStop && any(lengths(res) == 0))
warning("Import failed for some files")
|
a99bc7da |
## res <- bindROWS(DataFrame(), objects = res, use.names = FALSE,
## ignore.mcols = TRUE, check = FALSE)
## spectraData(object) <- res
message("Merging results ...", appendLF = FALSE)
res <- as(do.call(rbind, res), "DataFrame")
res$mz <- NumericList(res$mz, compress = FALSE)
res$intensity <- NumericList(res$intensity, compress = FALSE)
object@spectraData <- res
message("done")
|
6a3e408f |
object$dataStorage <- "<memory>"
validObject(object)
object
|
13a450da |
})
#' @rdname MsBackendMassbank
#'
#' @importFrom methods new
#'
#' @export MsBackendMassbank
MsBackendMassbank <- function() {
new("MsBackendMassbank")
}
|
5787d32a |
|
797495cd |
#' @importMethodsFrom Spectra spectraVariableMapping
#'
#' @exportMethod spectraVariableMapping
|
5787d32a |
#'
#' @rdname MsBackendMassbank
|
797495cd |
setMethod(
"spectraVariableMapping", "MsBackendMassbank",
|
8b4f2e1b |
function(object, format = c("Massbank")) {
|
797495cd |
switch(match.arg(format),
"Massbank" = c(
## minimal information
accession = "ACCESSION:",
name = "CH$NAME:",
smiles = "CH$SMILES:",
exactmass = "CH$EXACT_MASS:",
formula = "CH$FORMULA:",
inchi = "CH$IUPAC:",
cas = "CH$LINK: CAS",
inchikey = "CH$LINK: INCHIKEY",
collisionEnergy = "AC$MASS_SPECTROMETRY: COLLISION_ENERGY",
precursorMz = "MS$FOCUSED_ION: PRECURSOR_M/Z",
precursorIntensity = "MS$FOCUSED_ION: PRECURSOR_INT",
adduct = "MS$FOCUSED_ION: PRECURSOR_TYPE",
rtime = "AC$CHROMATOGRAPHY: RETENTION_TIME",
polarity = "AC$MASS_SPECTROMETRY: ION_MODE",
splash = "PK$SPLASH:",
title = "RECORD_TITLE:",
|
5787d32a |
|
797495cd |
## instrument information
instrument = "AC$INSTRUMENT:",
instrument_type = "AC$INSTRUMENT_TYPE:",
|
5787d32a |
|
797495cd |
## ms information
ms_ms_type = "AC$MASS_SPECTROMETRY: MS_TYPE",
ms_cap_voltage = "AC$MASS_SPECTROMETRY: CAPILLARY_VOLTAGE",
ms_col_gas = "AC$MASS_SPECTROMETRY: COLLISION_GAS",
ms_desolv_gas_flow =
"AC$MASS_SPECTROMETRY: DESOLVATION_GAS_FLOW",
ms_desolv_temp =
"AC$MASS_SPECTROMETRY: DESOLVATION_TEMPERATURE",
ms_ionization = "AC$MASS_SPECTROMETRY: IONIZATION",
ms_ionization_energy =
"AC$MASS_SPECTROMETRY: IONIZATION_ENERGY",
ms_laser = "AC$MASS_SPECTROMETRY: LASER",
ms_matrix = "AC$MASS_SPECTROMETRY: MATRIX",
ms_mass_accuracy = "AC$MASS_SPECTROMETRY: MASS_ACCURACY",
ms_mass_range = "AC$MASS_SPECTROMETRY: MASS_RANGE_MZ",
ms_reagent_gas = "AC$MASS_SPECTROMETRY: REAGENT_GAS",
ms_resolution = "AC$MASS_SPECTROMETRY: RESOLUTION",
ms_scan_setting = "AC$MASS_SPECTROMETRY: SCANNING_SETTING",
ms_source_temp = "AC$MASS_SPECTROMETRY: SOURCE_TEMPERATURE",
ms_frag_mode = "AC$MASS_SPECTROMETRY: FRAGMENTATION_MODE",
|
92efaee9 |
ms_kinetic_energy = "AC$MASS_SPECTROMETRY: KINETIC_ENERGY",
ms_electron_current = "AC$MASS_SPECTROMETRY: ELECTRON_CURRENT",
ms_reaction_time = "AC$MASS_SPECTROMETRY: REACTION_TIME",
|
5787d32a |
|
797495cd |
## ims information
ims_instrument_type = "AC$ION_MOBILITY: INSTRUMENT_TYPE",
ims_drift_gas = "AC$ION_MOBILITY: DRIFT_GAS",
ims_drift_time = "AC$ION_MOBILITY: DRIFT_TIME",
ims_ccs = "AC$ION_MOBILITY: CCS",
|
bd76ae2e |
|
797495cd |
## ms information part II
focus_base_peak = "MS$FOCUSED_ION: BASE_PEAK",
focus_derivative_form = "MS$FOCUSED_ION: DERIVATIVE_FORM",
focus_derivative_mass = "MS$FOCUSED_ION: DERIVATIVE_MASS",
focus_derivative_type = "MS$FOCUSED_ION: DERIVATIVE_TYPE",
focus_ion_type = "MS$FOCUSED_ION: ION_TYPE",
|
5787d32a |
|
797495cd |
## data processing information
data_processing_comment = "MS$DATA_PROCESSING: COMMENT",
data_processing_deprofile = "MS$DATA_PROCESSING: DEPROFILE",
data_processing_find = "MS$DATA_PROCESSING: FIND_PEAK",
data_processing_reanalyze = "MS$DATA_PROCESSING: REANALYZE",
data_processing_recalibrate =
"MS$DATA_PROCESSING: RECALIBRATE",
data_processing_whole = "MS$DATA_PROCESSING: WHOLE",
|
5787d32a |
|
797495cd |
## chromatography information
chrom_carrier_gas = "AC$CHROMATOGRAPHY: CARRIER_GAS",
chrom_column = "AC$CHROMATOGRAPHY: COLUMN_NAME",
chrom_column_temp = "AC$CHROMATOGRAPHY: COLUMN_TEMPERATURE",
chrom_column_temp_gradient =
"AC$CHROMATOGRAPHY: COLUMN_TEMPERATURE_GRADIENT",
chrom_flow_gradient = "AC$CHROMATOGRAPHY: FLOW_GRADIENT",
chrom_flow_rate = "AC$CHROMATOGRAPHY: FLOW_RATE",
chrom_inj_temp = "AC$CHROMATOGRAPHY: INJECTION_TEMPERATURE",
chrom_inj_temp_gradient =
"AC$CHROMATOGRAPHY: INJECTION_TEMPERATURE_GRADIENT",
chrom_rti_kovats = "AC$CHROMATOGRAPHY: KOVATS_RTI",
chrom_rti_lee = "AC$CHROMATOGRAPHY: LEE_RTI",
chrom_rti_naps = "AC$CHROMATOGRAPHY: NAPS_RTI",
chrom_rti_uoa = "AC$CHROMATOGRAPHY: UOA_RTI",
chrom_rti_uoa_pred = "AC$CHROMATOGRAPHY: UOA_PREDICTED_RTI",
chrom_rt = "AC$CHROMATOGRAPHY: RETENTION_TIME",
chrom_solvent = "AC$CHROMATOGRAPHY: SOLVENT",
chrom_transfer_temp =
"AC$CHROMATOGRAPHY: TRANSFERLINE_TEMPERATURE",
|
5787d32a |
|
797495cd |
## chemical information
compound_class = "CH$COMPOUND_CLASS:",
link_cayman = "CH$LINK: CAYMAN",
link_chebi = "CH$LINK: CHEBI",
link_chembl = "CH$LINK: CHEMBL",
link_chempdb = "CH$LINK: CHEMPDB",
link_chemspider = "CH$LINK: CHEMSPIDER",
link_comptox = "CH$LINK: COMPTOX",
link_hmdb = "CH$LINK: HMDB",
link_kappaview = "CH$LINK: KAPPAVIEW",
link_kegg = "CH$LINK: KEGG",
link_knapsack = "CH$LINK: KNAPSACK",
link_lipidbank = "CH$LINK: LIPIDBANK",
link_lipidmaps = "CH$LINK: LIPIDMAPS",
link_nikkaji = "CH$LINK: NIKKAJI",
link_pubchem = "CH$LINK: PUBCHEM",
link_zinc = "CH$LINK: ZINC",
|
5787d32a |
|
797495cd |
## sample information
scientific_name = "SP$SCIENTIFIC_NAME:",
lineage = "SP$LINEAGE:",
link = "SP$LINK:",
sample = "SP$SAMPLE:",
|
5787d32a |
|
797495cd |
## record information
deprecated = "DEPRECATED:",
date = "DATE:",
authors = "AUTHORS:",
license = "LICENSE:",
copyright = "COPYRIGHT:",
publication = "PUBLICATION:",
project = "PROJECT:",
comment = "COMMENT:",
|
5787d32a |
|
797495cd |
## peak information
pknum = "PK$NUM_PEAK:"
)
)
})
|
5787d32a |
#' @importMethodsFrom Spectra export
#'
#' @exportMethod export
#'
#' @rdname MsBackendMassbank
|
6a3e408f |
setMethod("export", "MsBackendMassbank",
function(object, x, file = tempfile(),
|
797495cd |
mapping = spectraVariableMapping(MsBackendMassbank()), ...) {
|
6a3e408f |
.export_massbank(x = x, con = file, mapping = mapping)
})
|
a99bc7da |
## #' tests...
## p <- "/home/jo/Projects/compounds/MassBank/text/MassBank-data-2024.11"
## fls <- dir(p, pattern = "txt$", recursive = TRUE, full.names = TRUE)
## library(Spectra)
## mb_spectra <- Spectra(fls,
## source = MsBackendMassbank(),
## backend = MsBackendDataFrame(),
## nonStop = TRUE,
## BPPARAM = SerialParam())
|