Bioconductor Code: affxparser

Raw Blame Patch Log History
########################################################################/**
# @RdocFunction parseDatHeaderString
#
# @title "Parses a DAT header string"
#
# \description{
#  @get "title".
# }
#
# @synopsis
#
# \arguments{
#  \item{header}{A @character string.}
#  \item{timeFormat}{The format string used to parse the timestamp.
#    For more details, see \code{\link[base:strptime]{strptime}()}.
#    If @NULL, no parsing is done.}
#  \item{...}{Not used.}
# }
#
# \value{
#  Returns named @list structure.
# }
#
# \seealso{
#  @see "readCelHeader".
# }
#
# @author "HB"
#
# @keyword programming
# @keyword file
# @keyword IO
# @keyword internal
#**/####################################################################### 
parseDatHeaderString <- function(header, timeFormat="%m/%d/%y %H:%M:%S", ...) {
  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  # Local functions
  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  trim <- function(s, ...) {
    s <- as.character(s);
    s <- sub("^[\t\n\f\r ]*", "", s);
    s <- sub("[\t\n\f\r ]*$", "", s);
    s;
  } # trim();


  # <quote>
  #   There are several sub-fields in this field. The first sub field is the
  # scanner ID, sometimes followed by a number, followed by three spaces.
  # If the scanner ID is absent, the field consists of four spaces.
  #   Next are 10 structured comment fields. Each field is preceded by the
  # delimiter 0x14 and a space. The field is followed by a space and 0x14.
  # Only field two is valid, the other 9 fields are obsolete. Field 2 
  # contains the probe array type, followed by .1sq. The 1sq extension is
  # also obsolete.
  #   Next (after the last structured field) there is the chip orientation
  # preceded by a space.
  #   The rest of the field is filled with nulls (zeros)
  # </quote>

  # Split by ASCII 0x14 delimiter
  parts <- strsplit(header, split=" \024 ", fixed=TRUE)[[1]];
  field <- parts[1];
  commentFields <- parts[-1];

  res <- list();
  pattern <- ".*CLS=(.{5})RWS=(.{5})XIN=(.{3})YIN=(.{3})VE=(.{3}).*(.{7})(.{4})";
  # Number of pixels per row (padded with spaces), preceded with "CLS=".
  res$nbrOfPixelColumns <- as.integer(gsub(pattern, "\\1", field));

  # Number of rows in the image (padded with spaces), preceded with "RWS=".
  res$nbrOfPixelRows <- as.integer(gsub(pattern, "\\2", field));

  # Pixel width in micrometers (padded with spaces), preceded with "XIN="
  res$pixelWidth <- as.double(gsub(pattern, "\\3", field));

  # Pixel height in micrometers (padded with spaces), preceded with "YIN="
  res$pixelHeight <- as.double(gsub(pattern, "\\4", field));

  # Scan speed in millimeters per second (padded with spaces), 
  # preceded with "VE=".
  res$scanSpeed <- as.double(gsub(pattern, "\\5", field));

  # Temperature in degrees Celsius (padded with spaces). If no temperature was
  # set then the entire field is empty.
  res$temperature <- as.double(gsub(pattern, "\\6", field));

  # Laser power in milliwatts or microwatts (padded with spaces).
  res$laserPower <- as.double(gsub(pattern, "\\6", field));


  # Find the element with a date. It is part of the same string as the
  # one containing the chip type.  Get the chip type from the header.
  # Extract the date timestamp
  pattern <- ".*([01][0-9]/[0-3][0-9]/[0-9][0-9] [0-2][0-9]:[0-5][0-9]:[0-5][0-9]).*";
  timestamp <- gsub(pattern, "\\1", header);
  timestamp <- trim(timestamp); # Unnecessary?

  # Parse the identified timestamp into POSIXct?
  if (!is.null(timeFormat)) {
    timestamp <- strptime(timestamp, format=timeFormat, ...);
    # If no valid timestamp was found, return NA.
    if (length(as.character(timestamp)) == 0) {
      timestamp <- as.POSIXct(NA);
    }
  }
  res$timestamp <- timestamp;

  res$chipType <- trim(gsub("[.]1sq", "", commentFields[2]));

  res;
} # parseDatHeaderString()


############################################################################
# HISTORY:
# 2009-09-21
# o Created from internal code in aroma.affymetrix.
############################################################################