Browse code

Version: 1.23.3 [2011-02-22] o ROBUSTNESS/BUG FIX: The internal .unwrapDatHeaderString() would throw "Internal error: Failed to extract 'pixelRange' and 'sampleName' from DAT header. They became identical: ..." in case the DAT header of the CEL file did not contain all fields. The function has now been updated to be more forgiving and robust so that missing values are returned for such fields instead.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/affxparser@53090 bc3139a8-67e5-0310-9ffc-ced21a209358

Henrik Bengtsson authored on 23/02/2011 02:54:46
Showing 5 changed files

... ...
@@ -1,6 +1,6 @@
1 1
 Package: affxparser
2
-Version: 1.23.2
3
-Date: 2011-02-15
2
+Version: 1.23.3
3
+Date: 2011-02-22
4 4
 Title: Affymetrix File Parsing SDK
5 5
 Author: Henrik Bengtsson, James Bullard, Robert Gentleman, Kasper Daniel Hansen, Martin Morgan
6 6
 Maintainer: Kasper Daniel Hansen <[email protected]>
... ...
@@ -8,6 +8,6 @@ Description: Package for parsing Affymetrix files (CDF, CEL, CHP, BPMAP, BAR).
8 8
 FusionDetails: Fusion SDK v1.1.0
9 9
 License: LGPL (>= 2)
10 10
 Depends: R (>= 2.6.0)
11
-Suggests: R.utils, AffymetrixDataTestFiles
11
+Suggests: R.utils (>= 1.6.3), AffymetrixDataTestFiles
12 12
 LazyLoad: yes
13 13
 biocViews: Infrastructure, DataImport
... ...
@@ -82,7 +82,7 @@ convertCel <- function(filename, outFilename, readMap=NULL, writeMap=NULL, versi
82 82
   if (!is.null(newChipType)) {
83 83
     newChipType <- as.character(newChipType);
84 84
     if (nchar(newChipType) == 0) {
85
-      stop("Argument 'newChipType' cannot be a empty string.");
85
+      stop("Argument 'newChipType' cannot be an empty string.");
86 86
     }
87 87
   }
88 88
 
... ...
@@ -109,7 +109,7 @@ convertCel <- function(filename, outFilename, readMap=NULL, writeMap=NULL, versi
109 109
   hdr <- cel$header;
110 110
   if (!is.null(newChipType)) {
111 111
     if (verbose) {
112
-      cat("Updating chip type from '", hdr$chiptype, "' to '", 
112
+      cat("Updating the chip type label from '", hdr$chiptype, "' to '", 
113 113
                                               newChipType, "'.\n", sep="");
114 114
     }
115 115
 
... ...
@@ -41,97 +41,128 @@
41 41
     header <- strsplit(header, split="\n")[[1]];
42 42
   }
43 43
 
44
+
45
+  # Extract the "head" and the "tail" of the DAT header
46
+  pattern <- "([^\024]*)(\024.*)";
47
+  head <- gsub(pattern, "\\1", header);
48
+  tail <- gsub(pattern, "\\2", header);
49
+
44 50
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
45 51
   # [123456789012345678900123456789001234567890]
46 52
   # "[5..65534]  NA06985_H_tH_B5_3005533:",      ????
47 53
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
48 54
   pattern <- "^([^:]*):(.*)$";
49
-  bfr <- gsub(pattern, "\\1", header);
50
-  header2 <- gsub(pattern, "\\2", header);
51
-
52
-  bfr <- trim(bfr);                           # Example: "[12..40151]  Fetal 3"
53
-  pattern <- "^([^]]*])[ ]*(.*)[ ]*";
54
-  pixelRange <- gsub(pattern, "\\1", bfr);
55
-  sampleName <- gsub(pattern, "\\2", bfr);
56
-  if (identical(pixelRange, sampleName)) {
57
-    stop("Internal error: Failed to extract 'pixelRange' and 'sampleName' from DAT header.  They became identical: ", pixelRange);
58
-  }
55
+  if (regexpr(pattern, head) != -1) {
56
+    bfr <- gsub(pattern, "\\1", header);
57
+    header2 <- gsub(pattern, "\\2", header);
58
+    bfr <- trim(bfr);             # Example: "[12..40151]  Fetal 3"
59
+    if (nchar(bfr) > 0) {
60
+      pattern <- "^([^ ]*])[ ]*(.*)[ ]*";
61
+      pixelRange <- gsub(pattern, "\\1", bfr);
62
+      sampleName <- gsub(pattern, "\\2", bfr);
63
+      if (identical(pixelRange, sampleName)) {
64
+        stop("Internal error: Failed to extract 'pixelRange' and 'sampleName' from DAT header.  They became identical: ", pixelRange);
65
+      }
66
+    } else {
67
+      pixelRange <- "";
68
+      sampleName <- "";
69
+    }
59 70
 
60
-  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
61
-  # Parse the DAT header
62
-  #
63
-  # 1. Number of pixels per row (padded with spaces), preceded with 
64
-  #    "CLS=". char[9]
65
-  # 2. Number of rows in the image (padded with spaces), preceded with 
66
-  #    "RWS=".char[9]
67
-  # 3. Pixel width in micrometers (padded with spaces), preceded with 
68
-  #    "XIN=" char[7]
69
-  # 4. Pixel height in micrometers (padded with spaces), preceded with 
70
-  #    "YIN=". char[7]
71
-  # 5. Scan speed in millimeters per second (padded with spaces), preceded 
72
-  #    with "VE=". char[6]
73
-  # 6. Temperature in degrees Celsius (padded with spaces). If no temperature
74
-  #    was set then the entire field is empty. char[7]
75
-  # 7. Laser power in milliwatts or microwatts (padded with spaces). char[4]
76
-  # 8. Date and time of scan (padded with spaces). char[18]
77
-  #
78
-  # Example:
79
-  # [123456789012345678900123456789001234567890] (See above)
80
-  # "CLS=8714 ",
81
-  # "RWS=8714 ",
82
-  # "XIN=1  ",
83
-  # "YIN=1  ",
84
-  # "VE=30 ",
85
-  # "       ",
86
-  # "2.0 ",
87
-  # "01/14/04 14:26:57 "
88
-  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
89
-  len <- c(9,9,7,7,6,7,4,18,220);
90
-  ends <- cumsum(len);
91
-  starts <- ends - len + 1;
92
-  header <- substring(header2, starts, ends);
93
-  header <- trim(header);
94
-
95
-  # Store the last field
96
-  bfr <- header[9];
97
-
98
-  header <- list(
99
-    pixelRange = pixelRange,
100
-    sampleName = sampleName,
101
-    CLS = gsub("^CLS=(.*)", "\\1", header[1]),
102
-    RWS = gsub("^RWS=(.*)", "\\1", header[2]),
103
-    XIN = gsub("^XIN=(.*)", "\\1", header[3]),
104
-    YIN = gsub("^YIN=(.*)", "\\1", header[4]),
105
-    VE = gsub("^VE=(.*)", "\\1", header[5]),
106
-    scanTemp = header[6],
107
-    laserPower = header[7],
108
-    scanDate = header[8]
109
-  )
71
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
72
+    # Parse the DAT header
73
+    #
74
+    # 1. Number of pixels per row (padded with spaces), preceded with 
75
+    #    "CLS=". char[9]
76
+    # 2. Number of rows in the image (padded with spaces), preceded with 
77
+    #    "RWS=".char[9]
78
+    # 3. Pixel width in micrometers (padded with spaces), preceded with 
79
+    #    "XIN=" char[7]
80
+    # 4. Pixel height in micrometers (padded with spaces), preceded with 
81
+    #    "YIN=". char[7]
82
+    # 5. Scan speed in millimeters per second (padded with spaces), preceded 
83
+    #    with "VE=". char[6]
84
+    # 6. Temperature in degrees Celsius (padded with spaces). If no temperature
85
+    #    was set then the entire field is empty. char[7]
86
+    # 7. Laser power in milliwatts or microwatts (padded with spaces). char[4]
87
+    # 8. Date and time of scan (padded with spaces). char[18]
88
+    #
89
+    # Example:
90
+    # [123456789012345678900123456789001234567890] (See above)
91
+    # "CLS=8714 ",
92
+    # "RWS=8714 ",
93
+    # "XIN=1  ",
94
+    # "YIN=1  ",
95
+    # "VE=30 ",
96
+    # "       ",
97
+    # "2.0 ",
98
+    # "01/14/04 14:26:57 "
99
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
100
+    len <- c(9,9,7,7,6,7,4,18,220);
101
+    ends <- cumsum(len);
102
+    starts <- ends - len + 1;
103
+    header <- substring(header2, starts, ends);
104
+    header <- trim(header);
105
+  
106
+    # Store the last field
107
+    bfr <- header[9];
108
+  
109
+    header <- list(
110
+      pixelRange = pixelRange,
111
+      sampleName = sampleName,
112
+      CLS = gsub("^CLS=(.*)", "\\1", header[1]),
113
+      RWS = gsub("^RWS=(.*)", "\\1", header[2]),
114
+      XIN = gsub("^XIN=(.*)", "\\1", header[3]),
115
+      YIN = gsub("^YIN=(.*)", "\\1", header[4]),
116
+      VE = gsub("^VE=(.*)", "\\1", header[5]),
117
+      scanTemp = header[6],
118
+      laserPower = header[7],
119
+      scanDate = header[8]
120
+    );
121
+
122
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
123
+    # The 'bfr' field:
124
+    #
125
+    # "There are several sub-fields in this field. The first sub field is the 
126
+    #  scanner ID, sometimes followed by a number, followed by three spaces. 
127
+    #  If the scanner ID is absent, the field consists of four spaces.
128
+    #
129
+    # Example:
130
+    # [123456789012345678900123456789001234567890] (????)
131
+    # "50101230  M10   \024  \024 Hind240.1sq \024  \024  \024  \024  
132
+    # \024  \024  \024  \024  \024 6"
133
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
134
+    # 0x14 == 024
135
+    pattern <- "^([^\024]*)[ ]*(\024.*)$";
136
+    scannerInfo <- gsub(pattern, "\\1", bfr);
137
+    scannerInfo <- trim(scannerInfo);
138
+    bfr <- gsub(pattern, "\\2", bfr);
139
+  
140
+  # Not locale safe: pattern <- "^([a-zA-Z0-9]*)[ ]*([a-zA-Z0-9]*)[ ]*";
141
+    pattern <- "^([abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0-9]*)[ ]*([abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0-9]*)[ ]*";
142
+    header$scanner <- list(
143
+        id = gsub(pattern, "\\1", scannerInfo),
144
+      type = gsub(pattern, "\\2", scannerInfo)
145
+    );
146
+  } else {
147
+    # TO DO: Make these NAs to have the correct storage modes
148
+    naValue <- as.character(NA);
149
+    naValue <- "";
150
+    header <- list(
151
+      pixelRange = naValue,
152
+      sampleName = naValue,
153
+      CLS = naValue,
154
+      RWS = naValue,
155
+      XIN = naValue,
156
+      YIN = naValue,
157
+      VE = naValue,
158
+      scanTemp = naValue,
159
+      laserPower = naValue,
160
+      scanDate = naValue,
161
+      scanner = list(id=naValue, type=naValue)
162
+    );
163
+  }
110 164
 
111
-  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
112
-  # The 'bfr' field:
113
-  #
114
-  # "There are several sub-fields in this field. The first sub field is the 
115
-  #  scanner ID, sometimes followed by a number, followed by three spaces. 
116
-  #  If the scanner ID is absent, the field consists of four spaces.
117
-  #
118
-  # Example:
119
-  # [123456789012345678900123456789001234567890] (????)
120
-  # "50101230  M10   \024  \024 Hind240.1sq \024  \024  \024  \024  
121
-  # \024  \024  \024  \024  \024 6"
122
-  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
123
-  # 0x14 == 024
124
-  pattern <- "^([^\024]*)[ ]*(\024.*)$";
125
-  scannerInfo <- gsub(pattern, "\\1", bfr);
126
-  scannerInfo <- trim(scannerInfo);
127
-  bfr <- gsub(pattern, "\\2", bfr);
128
-
129
-# Not locale safe: pattern <- "^([a-zA-Z0-9]*)[ ]*([a-zA-Z0-9]*)[ ]*";
130
-  pattern <- "^([abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0-9]*)[ ]*([abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0-9]*)[ ]*";
131
-  header$scanner <- list(
132
-      id = gsub(pattern, "\\1", scannerInfo),
133
-    type = gsub(pattern, "\\2", scannerInfo)
134
-  )
165
+  bfr <- tail;
135 166
 
136 167
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
137 168
   #  Next are 10 structured comment fields. Each field is preceded by the 
... ...
@@ -268,6 +299,13 @@
268 299
 
269 300
 ############################################################################
270 301
 # HISTORY:
302
+# 2011-02-22
303
+# o ROBUSTNESS/BUG FIX: The internal .unwrapDatHeaderString() would
304
+#   throw "Internal error: Failed to extract 'pixelRange' and 'sampleName' 
305
+#   from DAT header.  They became identical: ..." in case the DAT header
306
+#   of the CEL file did not contain all fields.  The function has now
307
+#   been updated to be more forgiving and robust so that missing values
308
+#   are returned for such fields instead.
271 309
 # 2007-08-16
272 310
 # o BUG FIX: Internal .unwrapDatHeaderString() failed to correctly extract
273 311
 #   'pixelRange' and 'sampleName' from DAT header.
... ...
@@ -1,6 +1,15 @@
1 1
 Package: affxparser
2 2
 ===================
3 3
 
4
+Version: 1.23.3 [2011-02-22]
5
+o ROBUSTNESS/BUG FIX: The internal .unwrapDatHeaderString() would
6
+  throw "Internal error: Failed to extract 'pixelRange' and 'sampleName' 
7
+  from DAT header.  They became identical: ..." in case the DAT header
8
+  of the CEL file did not contain all fields.  The function has now
9
+  been updated to be more forgiving and robust so that missing values
10
+  are returned for such fields instead. 
11
+
12
+
4 13
 Version: 1.23.2 [2011-02-15]
5 14
 o DOCUMENTATION: Added a clarification to the help page on
6 15
   'Cell coordinates and cell indices' that the convention 
... ...
@@ -71,7 +71,7 @@
71 71
    zero-based coordinates in the CDF file.
72 72
 
73 73
    All methods of the \pkg{affxparser} package make use of these
74
-   (x,y) coordinates, and some methods makes it possible to read
74
+   (x,y) coordinates, and some methods make it possible to read
75 75
    them as well.  However, it is much more common that the methods
76 76
    return cell indices \emph{calculated} from the (x,y) coordinates
77 77
    as explained above.