R/private.unwrapCelHeaderV4.R
9fc6b826
 .unwrapTagValuePairs <- function(bfr, ...) {
65301877
   trim <- function(s) {
     s <- gsub("^ *", "", s);
     s <- gsub(" *$", "", s);
   }
 
9fc6b826
   bfr <- trim(bfr);
   patterns <- c("^([^:]*):([^;]*)[;]*(.*)$", "^([^=]*)=([^ ]*)[ ]*(.*)$");
   tags <- values <- c();
   while (nchar(bfr) > 0) {
     for (pattern in patterns) {
       tag <- gsub(pattern, "\\1", bfr);
       if (!identical(tag, bfr))
         break;
     }
     value <- gsub(pattern, "\\2", bfr);
 
     tags <- c(tags, tag);
     values <- c(values, value);
 
     bfr <- gsub(pattern, "\\3", bfr);
     bfr <- trim(bfr);
   }
 
   params <- as.list(values);
   names(params) <- tags;
   params;
 }
 
 .unwrapDatHeaderString <- function(header, ...) {
   trim <- function(s) {
     s <- gsub("^ *", "", s);
     s <- gsub(" *$", "", s);
   }
 
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   # Already a list?
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   if (is.list(header)) {
   } else {
     header <- strsplit(header, split="\n")[[1]];
   }
 
a27f3354
 
   # Extract the "head" and the "tail" of the DAT header
   pattern <- "([^\024]*)(\024.*)";
   head <- gsub(pattern, "\\1", header);
   tail <- gsub(pattern, "\\2", header);
 
9fc6b826
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   # [123456789012345678900123456789001234567890]
   # "[5..65534]  NA06985_H_tH_B5_3005533:",      ????
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   pattern <- "^([^:]*):(.*)$";
a27f3354
   if (regexpr(pattern, head) != -1) {
     bfr <- gsub(pattern, "\\1", header);
     header2 <- gsub(pattern, "\\2", header);
     bfr <- trim(bfr);             # Example: "[12..40151]  Fetal 3"
     if (nchar(bfr) > 0) {
       pattern <- "^([^ ]*])[ ]*(.*)[ ]*";
       pixelRange <- gsub(pattern, "\\1", bfr);
       sampleName <- gsub(pattern, "\\2", bfr);
       if (identical(pixelRange, sampleName)) {
         stop("Internal error: Failed to extract 'pixelRange' and 'sampleName' from DAT header.  They became identical: ", pixelRange);
       }
     } else {
       pixelRange <- "";
       sampleName <- "";
     }
9fc6b826
 
a27f3354
     # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     # Parse the DAT header
     #
     # 1. Number of pixels per row (padded with spaces), preceded with 
     #    "CLS=". char[9]
     # 2. Number of rows in the image (padded with spaces), preceded with 
     #    "RWS=".char[9]
     # 3. Pixel width in micrometers (padded with spaces), preceded with 
     #    "XIN=" char[7]
     # 4. Pixel height in micrometers (padded with spaces), preceded with 
     #    "YIN=". char[7]
     # 5. Scan speed in millimeters per second (padded with spaces), preceded 
     #    with "VE=". char[6]
     # 6. Temperature in degrees Celsius (padded with spaces). If no temperature
     #    was set then the entire field is empty. char[7]
     # 7. Laser power in milliwatts or microwatts (padded with spaces). char[4]
     # 8. Date and time of scan (padded with spaces). char[18]
     #
     # Example:
     # [123456789012345678900123456789001234567890] (See above)
     # "CLS=8714 ",
     # "RWS=8714 ",
     # "XIN=1  ",
     # "YIN=1  ",
     # "VE=30 ",
     # "       ",
     # "2.0 ",
     # "01/14/04 14:26:57 "
     # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     len <- c(9,9,7,7,6,7,4,18,220);
     ends <- cumsum(len);
     starts <- ends - len + 1;
     header <- substring(header2, starts, ends);
     header <- trim(header);
   
     # Store the last field
     bfr <- header[9];
   
     header <- list(
       pixelRange = pixelRange,
       sampleName = sampleName,
       CLS = gsub("^CLS=(.*)", "\\1", header[1]),
       RWS = gsub("^RWS=(.*)", "\\1", header[2]),
       XIN = gsub("^XIN=(.*)", "\\1", header[3]),
       YIN = gsub("^YIN=(.*)", "\\1", header[4]),
       VE = gsub("^VE=(.*)", "\\1", header[5]),
       scanTemp = header[6],
       laserPower = header[7],
       scanDate = header[8]
     );
 
     # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     # The 'bfr' field:
     #
     # "There are several sub-fields in this field. The first sub field is the 
     #  scanner ID, sometimes followed by a number, followed by three spaces. 
     #  If the scanner ID is absent, the field consists of four spaces.
     #
     # Example:
     # [123456789012345678900123456789001234567890] (????)
     # "50101230  M10   \024  \024 Hind240.1sq \024  \024  \024  \024  
     # \024  \024  \024  \024  \024 6"
     # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     # 0x14 == 024
     pattern <- "^([^\024]*)[ ]*(\024.*)$";
     scannerInfo <- gsub(pattern, "\\1", bfr);
     scannerInfo <- trim(scannerInfo);
     bfr <- gsub(pattern, "\\2", bfr);
   
   # Not locale safe: pattern <- "^([a-zA-Z0-9]*)[ ]*([a-zA-Z0-9]*)[ ]*";
     pattern <- "^([abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0-9]*)[ ]*([abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0-9]*)[ ]*";
     header$scanner <- list(
         id = gsub(pattern, "\\1", scannerInfo),
       type = gsub(pattern, "\\2", scannerInfo)
     );
   } else {
     # TO DO: Make these NAs to have the correct storage modes
     naValue <- as.character(NA);
     naValue <- "";
     header <- list(
       pixelRange = naValue,
       sampleName = naValue,
       CLS = naValue,
       RWS = naValue,
       XIN = naValue,
       YIN = naValue,
       VE = naValue,
       scanTemp = naValue,
       laserPower = naValue,
       scanDate = naValue,
       scanner = list(id=naValue, type=naValue)
     );
   }
9fc6b826
 
a27f3354
   bfr <- tail;
9fc6b826
 
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   #  Next are 10 structured comment fields. Each field is preceded by the 
   #  delimiter 0x14 and a space. The field is followed by a space and 0x14. 
   #  Only field two is valid, the other 9 fields are obsolete. Field 2 
   #  contains the probe array type, followed by .1sq. The 1sq extension is 
   #  also obsolete.
   #
   #  Next (after the last structured field) there is the chip orientation 
   #  preceded by a space.
   #
   #  The rest of the field is filled with nulls (zeros)".  Size: char[220]
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   pattern <- "\024 ([^\024]*)(.*)";
   values <- c();
5f7aff5e
   lastNchar <- -Inf;
   while (nchar(bfr) != lastNchar) {
     lastNchar <- nchar(bfr);
9fc6b826
     value <- gsub(pattern, "\\1", bfr);
     value <- trim(value);
     bfr <- gsub(pattern, "\\2", bfr);
     values <- c(values, value);
   }
 
   header$misc <- values;
   header$chipType <- gsub("[.]1sq$", "", values[2]);
 
   header;
 } # .unwrapDatHeaderString()
 
 
 
 .unwrapCelHeaderV3String <- function(header, ...) {
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   # Already a list?
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   if (is.list(header)) {
   } else {
     header <- strsplit(header, split="\n")[[1]];
 #    keep <- (unlist(lapply(header, FUN=nchar)) > 0);
 #    header <- header[keep];
   }
 
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   # Parse the CEL v3 header
   #
   # "The data in each section is of the format TAG=VALUE."
   #
   # Cols - The number of columns in the array (of cells).
   # Rows - The number of rows in the array (of cells).
   # TotalX - Same as Cols.
   # TotalY - Same as Rows.
   # OffsetX - Not used, always 0.
   # OffsetY - Not used, always 0.
   # GridCornerUL - XY coordinates of the upper left grid corner in pixel
   #   coordinates.
   # GridCornerUR - XY coordinates of the upper right grid corner in pixel
   #   coordinates.
   # GridCornerLR - XY coordinates of the lower right grid corner in pixel
   #   coordinates.
   # GridCornerLL - XY coordinates of the lower left grid corner in pixel
   #   coordinates.
5f7aff5e
   # Axis-invertX - Not used, always 0.
   # AxisInvertY - Not used, always 0.
9fc6b826
   # swapXY - Not used, always 0.
   # DatHeader - The header from the DAT file.
   # Algorithm  - The algorithm name used to create the CEL file.
   # AlgorithmParameters - The parameters used by the algorithm. The format 
   #   is TAG:VALUE pairs separated by semi-colons or TAG=VALUE pairs separated
   #   by spaces.
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   pattern <- "^([-a-zA-Z0-9]*)=(.*)$";
   names <- gsub(pattern, "\\1", header);
   values <- gsub(pattern, "\\2", header);
   names(values) <- names;
   header <- as.list(values);
5f7aff5e
   # Fix some mishaps in names (sic!; see DevNet forum this week) /HB 2006-09-10
 #  names(header) <- gsub("^Axis-invert", "Axis-Invert", names(header));
 #  names(header) <- gsub("^AxisInvert", "Axis-Invert", names(header));
9fc6b826
 
   # Assert that all mandatory fields are there
5f7aff5e
   knownFields <- c("Cols", "Rows", "TotalX", "TotalY", "OffsetX", "OffsetY", "GridCornerUL", "GridCornerUR", "GridCornerLR", "GridCornerLL", "Axis-invertX", "AxisInvertY", "swapXY", "DatHeader", "Algorithm", "AlgorithmParameters");
9fc6b826
   missing <- !(knownFields %in% names(header));
   if (any(missing)) {
     stop("Argument 'header' does not contain all mandatory fields: ", 
                                    paste(knownFields[missing], collapse=", "));
   }
 
   # Unwrap DAT header string
   header$DatHeader <- .unwrapDatHeaderString(header$DatHeader);
 
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   # Unwrap 'AlgorithmParameters':
   #
   # AlgorithmParameters - The parameters used by the algorithm. The format 
   #   is TAG:VALUE pairs separated by semi-colons or TAG=VALUE pairs separated
   #   by spaces.
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   header$AlgorithmParameters <- .unwrapTagValuePairs(header$AlgorithmParameters);
 
   header;
 } # .unwrapCelHeaderV3String()
 
 
 
 # \arguments{
 #   \item{header}{A @list structure as returned by @see "readCelHeader".}
 # }
 .unwrapCelHeaderV4 <- function(header, ...) {
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   # Validate arguments
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   # Argument 'header':
   if (!is.list(header)) {
     stop("Argument 'header' is not a list: ", mode(header));
   }
 
   # Assert that all header fields are there
   knownFields <- c("version", "cols", "rows", "total", "algorithm", "parameters", "chiptype", "header", "cellmargin", "noutliers", "nmasked");
   missing <- !(knownFields %in% names(header));
   if (any(missing)) {
     stop("Argument 'header' does not contain all mandatory fields: ", 
                                    paste(knownFields[missing], collapse=", "));
   }
 
   header$parameters <- .unwrapTagValuePairs(header$parameters);
 
   header$header <- .unwrapCelHeaderV3String(header$header);
 
   header;
 } # .unwrapCelHeaderV4()
 
 
ede2c2f5
 
9fc6b826
 ############################################################################
 # HISTORY:
a27f3354
 # 2011-02-22
 # o ROBUSTNESS/BUG FIX: The internal .unwrapDatHeaderString() would
 #   throw "Internal error: Failed to extract 'pixelRange' and 'sampleName' 
 #   from DAT header.  They became identical: ..." in case the DAT header
 #   of the CEL file did not contain all fields.  The function has now
 #   been updated to be more forgiving and robust so that missing values
 #   are returned for such fields instead.
ede2c2f5
 # 2007-08-16
 # o BUG FIX: Internal .unwrapDatHeaderString() failed to correctly extract
 #   'pixelRange' and 'sampleName' from DAT header.
482dbbd4
 # 2006-12-28
 # o R CMD check v2.5.0 devel complained about: Warning: '\]' is an 
 #   unrecognized escape in a character string. Warning: unrecognized escape
 #   removed from "^([^\]]*])[ ]*(.*)[ ]*".  Replaced with '\\]'.
65301877
 # 2006-09-10
 # o BUG FIX: Local trim() was missing in one of the private functions.
9fc6b826
 # 2006-09-06
 # o Created.  This is used by writeCelHeaderV4().
 ############################################################################