git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/affxparser@53090 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,6 +1,6 @@ |
1 | 1 |
Package: affxparser |
2 |
-Version: 1.23.2 |
|
3 |
-Date: 2011-02-15 |
|
2 |
+Version: 1.23.3 |
|
3 |
+Date: 2011-02-22 |
|
4 | 4 |
Title: Affymetrix File Parsing SDK |
5 | 5 |
Author: Henrik Bengtsson, James Bullard, Robert Gentleman, Kasper Daniel Hansen, Martin Morgan |
6 | 6 |
Maintainer: Kasper Daniel Hansen <[email protected]> |
... | ... |
@@ -8,6 +8,6 @@ Description: Package for parsing Affymetrix files (CDF, CEL, CHP, BPMAP, BAR). |
8 | 8 |
FusionDetails: Fusion SDK v1.1.0 |
9 | 9 |
License: LGPL (>= 2) |
10 | 10 |
Depends: R (>= 2.6.0) |
11 |
-Suggests: R.utils, AffymetrixDataTestFiles |
|
11 |
+Suggests: R.utils (>= 1.6.3), AffymetrixDataTestFiles |
|
12 | 12 |
LazyLoad: yes |
13 | 13 |
biocViews: Infrastructure, DataImport |
... | ... |
@@ -82,7 +82,7 @@ convertCel <- function(filename, outFilename, readMap=NULL, writeMap=NULL, versi |
82 | 82 |
if (!is.null(newChipType)) { |
83 | 83 |
newChipType <- as.character(newChipType); |
84 | 84 |
if (nchar(newChipType) == 0) { |
85 |
- stop("Argument 'newChipType' cannot be a empty string."); |
|
85 |
+ stop("Argument 'newChipType' cannot be an empty string."); |
|
86 | 86 |
} |
87 | 87 |
} |
88 | 88 |
|
... | ... |
@@ -109,7 +109,7 @@ convertCel <- function(filename, outFilename, readMap=NULL, writeMap=NULL, versi |
109 | 109 |
hdr <- cel$header; |
110 | 110 |
if (!is.null(newChipType)) { |
111 | 111 |
if (verbose) { |
112 |
- cat("Updating chip type from '", hdr$chiptype, "' to '", |
|
112 |
+ cat("Updating the chip type label from '", hdr$chiptype, "' to '", |
|
113 | 113 |
newChipType, "'.\n", sep=""); |
114 | 114 |
} |
115 | 115 |
|
... | ... |
@@ -41,97 +41,128 @@ |
41 | 41 |
header <- strsplit(header, split="\n")[[1]]; |
42 | 42 |
} |
43 | 43 |
|
44 |
+ |
|
45 |
+ # Extract the "head" and the "tail" of the DAT header |
|
46 |
+ pattern <- "([^\024]*)(\024.*)"; |
|
47 |
+ head <- gsub(pattern, "\\1", header); |
|
48 |
+ tail <- gsub(pattern, "\\2", header); |
|
49 |
+ |
|
44 | 50 |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
45 | 51 |
# [123456789012345678900123456789001234567890] |
46 | 52 |
# "[5..65534] NA06985_H_tH_B5_3005533:", ???? |
47 | 53 |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
48 | 54 |
pattern <- "^([^:]*):(.*)$"; |
49 |
- bfr <- gsub(pattern, "\\1", header); |
|
50 |
- header2 <- gsub(pattern, "\\2", header); |
|
51 |
- |
|
52 |
- bfr <- trim(bfr); # Example: "[12..40151] Fetal 3" |
|
53 |
- pattern <- "^([^]]*])[ ]*(.*)[ ]*"; |
|
54 |
- pixelRange <- gsub(pattern, "\\1", bfr); |
|
55 |
- sampleName <- gsub(pattern, "\\2", bfr); |
|
56 |
- if (identical(pixelRange, sampleName)) { |
|
57 |
- stop("Internal error: Failed to extract 'pixelRange' and 'sampleName' from DAT header. They became identical: ", pixelRange); |
|
58 |
- } |
|
55 |
+ if (regexpr(pattern, head) != -1) { |
|
56 |
+ bfr <- gsub(pattern, "\\1", header); |
|
57 |
+ header2 <- gsub(pattern, "\\2", header); |
|
58 |
+ bfr <- trim(bfr); # Example: "[12..40151] Fetal 3" |
|
59 |
+ if (nchar(bfr) > 0) { |
|
60 |
+ pattern <- "^([^ ]*])[ ]*(.*)[ ]*"; |
|
61 |
+ pixelRange <- gsub(pattern, "\\1", bfr); |
|
62 |
+ sampleName <- gsub(pattern, "\\2", bfr); |
|
63 |
+ if (identical(pixelRange, sampleName)) { |
|
64 |
+ stop("Internal error: Failed to extract 'pixelRange' and 'sampleName' from DAT header. They became identical: ", pixelRange); |
|
65 |
+ } |
|
66 |
+ } else { |
|
67 |
+ pixelRange <- ""; |
|
68 |
+ sampleName <- ""; |
|
69 |
+ } |
|
59 | 70 |
|
60 |
- # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
61 |
- # Parse the DAT header |
|
62 |
- # |
|
63 |
- # 1. Number of pixels per row (padded with spaces), preceded with |
|
64 |
- # "CLS=". char[9] |
|
65 |
- # 2. Number of rows in the image (padded with spaces), preceded with |
|
66 |
- # "RWS=".char[9] |
|
67 |
- # 3. Pixel width in micrometers (padded with spaces), preceded with |
|
68 |
- # "XIN=" char[7] |
|
69 |
- # 4. Pixel height in micrometers (padded with spaces), preceded with |
|
70 |
- # "YIN=". char[7] |
|
71 |
- # 5. Scan speed in millimeters per second (padded with spaces), preceded |
|
72 |
- # with "VE=". char[6] |
|
73 |
- # 6. Temperature in degrees Celsius (padded with spaces). If no temperature |
|
74 |
- # was set then the entire field is empty. char[7] |
|
75 |
- # 7. Laser power in milliwatts or microwatts (padded with spaces). char[4] |
|
76 |
- # 8. Date and time of scan (padded with spaces). char[18] |
|
77 |
- # |
|
78 |
- # Example: |
|
79 |
- # [123456789012345678900123456789001234567890] (See above) |
|
80 |
- # "CLS=8714 ", |
|
81 |
- # "RWS=8714 ", |
|
82 |
- # "XIN=1 ", |
|
83 |
- # "YIN=1 ", |
|
84 |
- # "VE=30 ", |
|
85 |
- # " ", |
|
86 |
- # "2.0 ", |
|
87 |
- # "01/14/04 14:26:57 " |
|
88 |
- # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
89 |
- len <- c(9,9,7,7,6,7,4,18,220); |
|
90 |
- ends <- cumsum(len); |
|
91 |
- starts <- ends - len + 1; |
|
92 |
- header <- substring(header2, starts, ends); |
|
93 |
- header <- trim(header); |
|
94 |
- |
|
95 |
- # Store the last field |
|
96 |
- bfr <- header[9]; |
|
97 |
- |
|
98 |
- header <- list( |
|
99 |
- pixelRange = pixelRange, |
|
100 |
- sampleName = sampleName, |
|
101 |
- CLS = gsub("^CLS=(.*)", "\\1", header[1]), |
|
102 |
- RWS = gsub("^RWS=(.*)", "\\1", header[2]), |
|
103 |
- XIN = gsub("^XIN=(.*)", "\\1", header[3]), |
|
104 |
- YIN = gsub("^YIN=(.*)", "\\1", header[4]), |
|
105 |
- VE = gsub("^VE=(.*)", "\\1", header[5]), |
|
106 |
- scanTemp = header[6], |
|
107 |
- laserPower = header[7], |
|
108 |
- scanDate = header[8] |
|
109 |
- ) |
|
71 |
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
72 |
+ # Parse the DAT header |
|
73 |
+ # |
|
74 |
+ # 1. Number of pixels per row (padded with spaces), preceded with |
|
75 |
+ # "CLS=". char[9] |
|
76 |
+ # 2. Number of rows in the image (padded with spaces), preceded with |
|
77 |
+ # "RWS=".char[9] |
|
78 |
+ # 3. Pixel width in micrometers (padded with spaces), preceded with |
|
79 |
+ # "XIN=" char[7] |
|
80 |
+ # 4. Pixel height in micrometers (padded with spaces), preceded with |
|
81 |
+ # "YIN=". char[7] |
|
82 |
+ # 5. Scan speed in millimeters per second (padded with spaces), preceded |
|
83 |
+ # with "VE=". char[6] |
|
84 |
+ # 6. Temperature in degrees Celsius (padded with spaces). If no temperature |
|
85 |
+ # was set then the entire field is empty. char[7] |
|
86 |
+ # 7. Laser power in milliwatts or microwatts (padded with spaces). char[4] |
|
87 |
+ # 8. Date and time of scan (padded with spaces). char[18] |
|
88 |
+ # |
|
89 |
+ # Example: |
|
90 |
+ # [123456789012345678900123456789001234567890] (See above) |
|
91 |
+ # "CLS=8714 ", |
|
92 |
+ # "RWS=8714 ", |
|
93 |
+ # "XIN=1 ", |
|
94 |
+ # "YIN=1 ", |
|
95 |
+ # "VE=30 ", |
|
96 |
+ # " ", |
|
97 |
+ # "2.0 ", |
|
98 |
+ # "01/14/04 14:26:57 " |
|
99 |
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
100 |
+ len <- c(9,9,7,7,6,7,4,18,220); |
|
101 |
+ ends <- cumsum(len); |
|
102 |
+ starts <- ends - len + 1; |
|
103 |
+ header <- substring(header2, starts, ends); |
|
104 |
+ header <- trim(header); |
|
105 |
+ |
|
106 |
+ # Store the last field |
|
107 |
+ bfr <- header[9]; |
|
108 |
+ |
|
109 |
+ header <- list( |
|
110 |
+ pixelRange = pixelRange, |
|
111 |
+ sampleName = sampleName, |
|
112 |
+ CLS = gsub("^CLS=(.*)", "\\1", header[1]), |
|
113 |
+ RWS = gsub("^RWS=(.*)", "\\1", header[2]), |
|
114 |
+ XIN = gsub("^XIN=(.*)", "\\1", header[3]), |
|
115 |
+ YIN = gsub("^YIN=(.*)", "\\1", header[4]), |
|
116 |
+ VE = gsub("^VE=(.*)", "\\1", header[5]), |
|
117 |
+ scanTemp = header[6], |
|
118 |
+ laserPower = header[7], |
|
119 |
+ scanDate = header[8] |
|
120 |
+ ); |
|
121 |
+ |
|
122 |
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
123 |
+ # The 'bfr' field: |
|
124 |
+ # |
|
125 |
+ # "There are several sub-fields in this field. The first sub field is the |
|
126 |
+ # scanner ID, sometimes followed by a number, followed by three spaces. |
|
127 |
+ # If the scanner ID is absent, the field consists of four spaces. |
|
128 |
+ # |
|
129 |
+ # Example: |
|
130 |
+ # [123456789012345678900123456789001234567890] (????) |
|
131 |
+ # "50101230 M10 \024 \024 Hind240.1sq \024 \024 \024 \024 |
|
132 |
+ # \024 \024 \024 \024 \024 6" |
|
133 |
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
134 |
+ # 0x14 == 024 |
|
135 |
+ pattern <- "^([^\024]*)[ ]*(\024.*)$"; |
|
136 |
+ scannerInfo <- gsub(pattern, "\\1", bfr); |
|
137 |
+ scannerInfo <- trim(scannerInfo); |
|
138 |
+ bfr <- gsub(pattern, "\\2", bfr); |
|
139 |
+ |
|
140 |
+ # Not locale safe: pattern <- "^([a-zA-Z0-9]*)[ ]*([a-zA-Z0-9]*)[ ]*"; |
|
141 |
+ pattern <- "^([abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0-9]*)[ ]*([abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0-9]*)[ ]*"; |
|
142 |
+ header$scanner <- list( |
|
143 |
+ id = gsub(pattern, "\\1", scannerInfo), |
|
144 |
+ type = gsub(pattern, "\\2", scannerInfo) |
|
145 |
+ ); |
|
146 |
+ } else { |
|
147 |
+ # TO DO: Make these NAs to have the correct storage modes |
|
148 |
+ naValue <- as.character(NA); |
|
149 |
+ naValue <- ""; |
|
150 |
+ header <- list( |
|
151 |
+ pixelRange = naValue, |
|
152 |
+ sampleName = naValue, |
|
153 |
+ CLS = naValue, |
|
154 |
+ RWS = naValue, |
|
155 |
+ XIN = naValue, |
|
156 |
+ YIN = naValue, |
|
157 |
+ VE = naValue, |
|
158 |
+ scanTemp = naValue, |
|
159 |
+ laserPower = naValue, |
|
160 |
+ scanDate = naValue, |
|
161 |
+ scanner = list(id=naValue, type=naValue) |
|
162 |
+ ); |
|
163 |
+ } |
|
110 | 164 |
|
111 |
- # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
112 |
- # The 'bfr' field: |
|
113 |
- # |
|
114 |
- # "There are several sub-fields in this field. The first sub field is the |
|
115 |
- # scanner ID, sometimes followed by a number, followed by three spaces. |
|
116 |
- # If the scanner ID is absent, the field consists of four spaces. |
|
117 |
- # |
|
118 |
- # Example: |
|
119 |
- # [123456789012345678900123456789001234567890] (????) |
|
120 |
- # "50101230 M10 \024 \024 Hind240.1sq \024 \024 \024 \024 |
|
121 |
- # \024 \024 \024 \024 \024 6" |
|
122 |
- # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
123 |
- # 0x14 == 024 |
|
124 |
- pattern <- "^([^\024]*)[ ]*(\024.*)$"; |
|
125 |
- scannerInfo <- gsub(pattern, "\\1", bfr); |
|
126 |
- scannerInfo <- trim(scannerInfo); |
|
127 |
- bfr <- gsub(pattern, "\\2", bfr); |
|
128 |
- |
|
129 |
-# Not locale safe: pattern <- "^([a-zA-Z0-9]*)[ ]*([a-zA-Z0-9]*)[ ]*"; |
|
130 |
- pattern <- "^([abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0-9]*)[ ]*([abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0-9]*)[ ]*"; |
|
131 |
- header$scanner <- list( |
|
132 |
- id = gsub(pattern, "\\1", scannerInfo), |
|
133 |
- type = gsub(pattern, "\\2", scannerInfo) |
|
134 |
- ) |
|
165 |
+ bfr <- tail; |
|
135 | 166 |
|
136 | 167 |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
137 | 168 |
# Next are 10 structured comment fields. Each field is preceded by the |
... | ... |
@@ -268,6 +299,13 @@ |
268 | 299 |
|
269 | 300 |
############################################################################ |
270 | 301 |
# HISTORY: |
302 |
+# 2011-02-22 |
|
303 |
+# o ROBUSTNESS/BUG FIX: The internal .unwrapDatHeaderString() would |
|
304 |
+# throw "Internal error: Failed to extract 'pixelRange' and 'sampleName' |
|
305 |
+# from DAT header. They became identical: ..." in case the DAT header |
|
306 |
+# of the CEL file did not contain all fields. The function has now |
|
307 |
+# been updated to be more forgiving and robust so that missing values |
|
308 |
+# are returned for such fields instead. |
|
271 | 309 |
# 2007-08-16 |
272 | 310 |
# o BUG FIX: Internal .unwrapDatHeaderString() failed to correctly extract |
273 | 311 |
# 'pixelRange' and 'sampleName' from DAT header. |
... | ... |
@@ -1,6 +1,15 @@ |
1 | 1 |
Package: affxparser |
2 | 2 |
=================== |
3 | 3 |
|
4 |
+Version: 1.23.3 [2011-02-22] |
|
5 |
+o ROBUSTNESS/BUG FIX: The internal .unwrapDatHeaderString() would |
|
6 |
+ throw "Internal error: Failed to extract 'pixelRange' and 'sampleName' |
|
7 |
+ from DAT header. They became identical: ..." in case the DAT header |
|
8 |
+ of the CEL file did not contain all fields. The function has now |
|
9 |
+ been updated to be more forgiving and robust so that missing values |
|
10 |
+ are returned for such fields instead. |
|
11 |
+ |
|
12 |
+ |
|
4 | 13 |
Version: 1.23.2 [2011-02-15] |
5 | 14 |
o DOCUMENTATION: Added a clarification to the help page on |
6 | 15 |
'Cell coordinates and cell indices' that the convention |
... | ... |
@@ -71,7 +71,7 @@ |
71 | 71 |
zero-based coordinates in the CDF file. |
72 | 72 |
|
73 | 73 |
All methods of the \pkg{affxparser} package make use of these |
74 |
- (x,y) coordinates, and some methods makes it possible to read |
|
74 |
+ (x,y) coordinates, and some methods make it possible to read |
|
75 | 75 |
them as well. However, it is much more common that the methods |
76 | 76 |
return cell indices \emph{calculated} from the (x,y) coordinates |
77 | 77 |
as explained above. |