From: hb <[email protected]>
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/affxparser@121021 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -198,7 +198,7 @@ |
198 | 198 |
## Writing each group in turn |
199 | 199 |
# Number of bytes: (18+64)*nbrOfGroups + 14*totalNbrOfCells bytes |
200 | 200 |
groupDirections <- c(nodirection=0, sense=1, antisense=2, unknown=3); |
201 |
- for(igroup in seq(along.with = unit$groups)) { |
|
201 |
+ for(igroup in seq_along(unit$groups)) { |
|
202 | 202 |
group <- unit$groups[[igroup]] |
203 | 203 |
groupDirection <- groupDirections[group$groupdirection]; |
204 | 204 |
groupDirection <- switch(group$groupdirection, |
... | ... |
@@ -229,7 +229,7 @@ |
229 | 229 |
ncol = 4) |
230 | 230 |
|
231 | 231 |
# Number of bytes: 14*nbrOfCells bytes |
232 |
- for(icell in seq(along.with = group$x)) { |
|
232 |
+ for(icell in seq_along(group$x)) { |
|
233 | 233 |
# Number of bytes: 1*4+2*2+1*4+1*2=14 bytes |
234 | 234 |
writeBin(cells[icell, 1], |
235 | 235 |
con = con, size = 4, endian = "little") |
... | ... |
@@ -284,7 +284,7 @@ |
284 | 284 |
cells <- matrix(as.integer(c(qcunit$x, qcunit$y, qcunit$length, |
285 | 285 |
qcunit$pm, qcunit$background)), |
286 | 286 |
ncol = 5) |
287 |
- for(icell in seq(along.with = qcunit$x)) { |
|
287 |
+ for(icell in seq_along(qcunit$x)) { |
|
288 | 288 |
writeBin(cells[icell, 1:2], con = con, size = 2, endian = "little") |
289 | 289 |
writeBin(cells[icell, 3:5], con = con, size = 1, endian = "little") |
290 | 290 |
} |
...
Version: 1.32.3 [2013-06-29]
o BUG FIX: Since affxparser v1.30.2/1.31.2 (r72352; 2013-01-08),
writeCdf() would incorrectly encode the unit types, iff the input
'cdf' argument specified them as integers, e.g. as done by
writeCdf() for AffyGenePDInfo in aroma.affymetrix. More
specifically, the unit type index would be off by one, e.g. an
'expression' unit (1) would be encoded as an 'unknown' unit (0)
and so on. On the other hand, if they were specified by their
unit-type names (e.g. 'expression') the encoding should still be
correct, e.g. if input is constructed from readCdf() of affxparser.
Thanks to Guido Hooiveld at Wageningen UR (The Netherlands) for
reporting on this.
o BUG FIX: Similarily, writeCdf() has "always", at least affxparser
v1.7.4 since (r21888; 2007-01-09), encoded unit directions and
QC unit types incorrectly, iff they were specified as integers.
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/affxparser@78016 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -158,23 +158,27 @@ |
158 | 158 |
|
159 | 159 |
.writeCdfUnit <- function(unit, con, unitname=NULL) { |
160 | 160 |
## 3. Write the unit |
161 |
- unitDirections <- c(nodirection=0, sense=1, antisense=2, unknown=3); |
|
162 |
- unitDirection <- unitDirections[unit$unitdirection]; |
|
163 |
- unitType <- switch(unit$unittype, |
|
164 |
- unknown = 0, |
|
165 |
- expression = 1, |
|
166 |
- genotyping = 2, |
|
167 |
- resequencing = 3, |
|
168 |
- tag = 4, |
|
169 |
- copynumber = 5, |
|
170 |
- genotypingcontrol = 6, |
|
171 |
- expressioncontrol = 7) |
|
172 |
- |
|
173 |
- unitDirection <- switch(unit$unitdirection, |
|
174 |
- nodirection = 0, |
|
175 |
- sense = 1, |
|
176 |
- antisense = 2, |
|
177 |
- unknown = 3) |
|
161 |
+ unitType <- unit$unittype |
|
162 |
+ if (!is.numeric(unitType)) { |
|
163 |
+ unitType <- switch(unitType, |
|
164 |
+ unknown = 0, |
|
165 |
+ expression = 1, |
|
166 |
+ genotyping = 2, |
|
167 |
+ resequencing = 3, |
|
168 |
+ tag = 4, |
|
169 |
+ copynumber = 5, |
|
170 |
+ genotypingcontrol = 6, |
|
171 |
+ expressioncontrol = 7) |
|
172 |
+ } |
|
173 |
+ |
|
174 |
+ unitDirection <- unit$unitdirection |
|
175 |
+ if (!is.numeric(unitDirection)) { |
|
176 |
+ unitDirection <- switch(unitDirection, |
|
177 |
+ nodirection = 0, |
|
178 |
+ sense = 1, |
|
179 |
+ antisense = 2, |
|
180 |
+ unknown = 3) |
|
181 |
+ } |
|
178 | 182 |
|
179 | 183 |
unitInfo <- as.integer(c(unitType, unitDirection, |
180 | 184 |
unit$natoms, length(unit$groups), |
... | ... |
@@ -244,26 +248,29 @@ |
244 | 248 |
|
245 | 249 |
.writeCdfQcUnit <- function(qcunit, con) { |
246 | 250 |
## 2. Actually write the qcunit |
247 |
- type <- switch(qcunit$type, |
|
248 |
- unknown = 0, |
|
249 |
- checkerboardNegative = 1, |
|
250 |
- checkerboardPositive = 2, |
|
251 |
- hybeNegative = 3, |
|
252 |
- hybePositive = 4, |
|
253 |
- textFeaturesNegative = 5, |
|
254 |
- textFeaturesPositive = 6, |
|
255 |
- centralNegative = 7, |
|
256 |
- centralPositive = 8, |
|
257 |
- geneExpNegative = 9, |
|
258 |
- geneExpPositive = 10, |
|
259 |
- cycleFidelityNegative = 11, |
|
260 |
- cycleFidelityPositive = 12, |
|
261 |
- centralCrossNegative = 13, |
|
262 |
- centralCrossPositive = 14, |
|
263 |
- crossHybeNegative = 15, |
|
264 |
- crossHybePositive = 16, |
|
265 |
- SpatialNormNegative = 17, |
|
266 |
- SpatialNormPositive = 18) |
|
251 |
+ type <- qcunit$type; |
|
252 |
+ if (!is.numeric(type)) { |
|
253 |
+ type <- switch(type, |
|
254 |
+ unknown = 0, |
|
255 |
+ checkerboardNegative = 1, |
|
256 |
+ checkerboardPositive = 2, |
|
257 |
+ hybeNegative = 3, |
|
258 |
+ hybePositive = 4, |
|
259 |
+ textFeaturesNegative = 5, |
|
260 |
+ textFeaturesPositive = 6, |
|
261 |
+ centralNegative = 7, |
|
262 |
+ centralPositive = 8, |
|
263 |
+ geneExpNegative = 9, |
|
264 |
+ geneExpPositive = 10, |
|
265 |
+ cycleFidelityNegative = 11, |
|
266 |
+ cycleFidelityPositive = 12, |
|
267 |
+ centralCrossNegative = 13, |
|
268 |
+ centralCrossPositive = 14, |
|
269 |
+ crossHybeNegative = 15, |
|
270 |
+ crossHybePositive = 16, |
|
271 |
+ SpatialNormNegative = 17, |
|
272 |
+ SpatialNormPositive = 18) |
|
273 |
+ } |
|
267 | 274 |
|
268 | 275 |
# Write 2 + 4 bytes |
269 | 276 |
nbrOfBytes <- 6; |
... | ... |
@@ -286,6 +293,12 @@ |
286 | 293 |
|
287 | 294 |
############################################################################ |
288 | 295 |
# HISTORY: |
296 |
+# 2013-06-29 |
|
297 |
+# o BUG FIX: Since affxparser 1.30.2/1.31.2, .writeCdfUnit() encoded unit |
|
298 |
+# types incorrectly, iff specified as integers. |
|
299 |
+# o BUG FIX: Likewise, .writeCdfUnit() has always encoded unit directions |
|
300 |
+# incorrectly, iff specified as integers. Moreover, .writeCdfQcUnit() |
|
301 |
+# has always encoded unit types incorrectly, iff specified as integers. |
|
289 | 302 |
# 2013-05-25 /HB |
290 | 303 |
# o Removed all gc() in .initializeCdf(). |
291 | 304 |
# 2013-01-07 /HB |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/affxparser@76889 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,306 +1,293 @@ |
1 | 1 |
.initializeCdf <- function(con, nRows = 1, nCols = 1, |
2 |
- nUnits = 1, nQcUnits = 0, |
|
3 |
- refSeq = "", |
|
4 |
- unitnames = rep("", nUnits), |
|
5 |
- qcUnitLengths = rep(0, nQcUnits), |
|
6 |
- unitLengths = rep(0, nUnits), |
|
7 |
- ...) { |
|
8 |
- # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
9 |
- # Validate arguments |
|
10 |
- # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
11 |
- if(length(qcUnitLengths) != nQcUnits) { |
|
12 |
- stop("Number of elements in argument 'qcUnitLengths' does not match 'nQcUnits'"); |
|
2 |
+ nUnits = 1, nQcUnits = 0, |
|
3 |
+ refSeq = "", |
|
4 |
+ unitnames = rep("", nUnits), |
|
5 |
+ qcUnitLengths = rep(0, nQcUnits), |
|
6 |
+ unitLengths = rep(0, nUnits), |
|
7 |
+ ...) { |
|
8 |
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
9 |
+ # Validate arguments |
|
10 |
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
11 |
+ if(length(qcUnitLengths) != nQcUnits) { |
|
12 |
+ stop("Number of elements in argument 'qcUnitLengths' does not match 'nQcUnits'"); |
|
13 |
+ } |
|
14 |
+ |
|
15 |
+ if(length(unitLengths) != nUnits) { |
|
16 |
+ stop("Number of elements in argument 'unitLengths' does not match 'nUnits'"); |
|
17 |
+ } |
|
18 |
+ |
|
19 |
+ if(length(refSeq) != 1) { |
|
20 |
+ stop("Argument 'refSeq' should be a single character."); |
|
21 |
+ } |
|
22 |
+ |
|
23 |
+ lrefSeq <- nchar(refSeq); |
|
24 |
+ |
|
25 |
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
26 |
+ # CDF header |
|
27 |
+ # |
|
28 |
+ # 1 Magic number. Always set to 67. [integer] |
|
29 |
+ # 2 Version number. [integer] |
|
30 |
+ # 3 The number of columns of cells on the array. [unsigned short] |
|
31 |
+ # 4 The number of rows of cells on the array. [unsigned short] |
|
32 |
+ # 5 The number of units in the array not including QC units. The term |
|
33 |
+ # unit is an internal term which means probe set. [integer] |
|
34 |
+ # 6 The number of QC units. [integer] |
|
35 |
+ # 7 The length of the resequencing reference sequence. [integer] |
|
36 |
+ # 8 The resequencing reference sequence. [char[len]] |
|
37 |
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
38 |
+ offset <- 0; |
|
39 |
+ |
|
40 |
+ ## Magic number and version number |
|
41 |
+ writeBin(object = as.integer(c(67, 1)), |
|
42 |
+ con = con, size = 4, endian = "little") |
|
43 |
+ ## NCols, NRows |
|
44 |
+ writeBin(object = as.integer(c(nCols, nRows)), |
|
45 |
+ con = con, size = 2, endian = "little") |
|
46 |
+ ## NumberUnits, NumberQCUnits |
|
47 |
+ writeBin(object = as.integer(c(nUnits, nQcUnits)), |
|
48 |
+ con = con, size = 4, endian = "little") |
|
49 |
+ ## Length of refSeqsequence |
|
50 |
+ writeBin(object = as.integer(lrefSeq), |
|
51 |
+ con = con, size = 4, endian = "little") |
|
52 |
+ offset <- 24; |
|
53 |
+ |
|
54 |
+ fOffset <- seek(con=con, origin="start", rw="write"); |
|
55 |
+ if (offset != fOffset) { |
|
56 |
+ stop("File format write error (step 1): File offset is not the excepted one: ", fOffset, " != ", offset); |
|
57 |
+ } |
|
58 |
+ |
|
59 |
+ ## RefSeqsequece |
|
60 |
+ if(lrefSeq > 0) |
|
61 |
+ writeChar(as.character(refSeq), con=con, eos=NULL); |
|
62 |
+ |
|
63 |
+ # Current offset |
|
64 |
+ offset <- offset + lrefSeq; |
|
65 |
+ |
|
66 |
+ fOffset <- seek(con=con, origin="start", rw="write"); |
|
67 |
+ if (offset != fOffset) { |
|
68 |
+ stop("File format write error (step 2): File offset is not the excepted one: ", fOffset, " != ", offset); |
|
69 |
+ } |
|
70 |
+ |
|
71 |
+ |
|
72 |
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
73 |
+ # Unit names |
|
74 |
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
75 |
+ # Write to raw vector (2*10^6 units => 122Mb; should be ok for now) |
|
76 |
+ # Since we can't create strings with '\0':s, we use '\xFF', |
|
77 |
+ # write to raw and then replace '\xFF' with '\0'. Thus, unit names with |
|
78 |
+ # '\xFF' are invalid, but this should not be a real problem. |
|
79 |
+ pads <- sapply(0:64, FUN=function(x) paste(rep("\xFF", x), collapse="")); |
|
80 |
+ |
|
81 |
+ # Write the unit names in chunks to save memory |
|
82 |
+ nbrOfUnits <- length(unitnames); |
|
83 |
+ chunkSize <- 100000; |
|
84 |
+ nbrOfChunks <- ceiling(nbrOfUnits / chunkSize); |
|
85 |
+ |
|
86 |
+ # Allocate raw vector |
|
87 |
+ raw <- raw(64*chunkSize); |
|
88 |
+ |
|
89 |
+ for (kk in 1:nbrOfChunks) { |
|
90 |
+ # Units for this chunk |
|
91 |
+ from <- (kk-1)*chunkSize+1; |
|
92 |
+ to <- min(from+chunkSize-1, nbrOfUnits); |
|
93 |
+ unitnamesFF <- unitnames[from:to]; |
|
94 |
+ |
|
95 |
+ # Pad the unit names |
|
96 |
+ unitnamesFF <- paste(unitnamesFF, pads[64-nchar(unitnamesFF)], sep=""); |
|
97 |
+ |
|
98 |
+ # Truncate last chunk? |
|
99 |
+ if (chunkSize > length(unitnamesFF)) { |
|
100 |
+ raw <- raw[1:(64*length(unitnamesFF))]; |
|
13 | 101 |
} |
14 | 102 |
|
15 |
- if(length(unitLengths) != nUnits) { |
|
16 |
- stop("Number of elements in argument 'unitLengths' does not match 'nUnits'"); |
|
17 |
- } |
|
18 |
- |
|
19 |
- if(length(refSeq) != 1) |
|
20 |
- stop("Argument 'refSeq' should be a single character."); |
|
21 |
- |
|
22 |
- lrefSeq <- nchar(refSeq); |
|
23 |
- |
|
24 |
- # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
25 |
- # CDF header |
|
26 |
- # |
|
27 |
- # 1 Magic number. Always set to 67. [integer] |
|
28 |
- # 2 Version number. [integer] |
|
29 |
- # 3 The number of columns of cells on the array. [unsigned short] |
|
30 |
- # 4 The number of rows of cells on the array. [unsigned short] |
|
31 |
- # 5 The number of units in the array not including QC units. The term |
|
32 |
- # unit is an internal term which means probe set. [integer] |
|
33 |
- # 6 The number of QC units. [integer] |
|
34 |
- # 7 The length of the resequencing reference sequence. [integer] |
|
35 |
- # 8 The resequencing reference sequence. [char[len]] |
|
36 |
- # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
37 |
- offset <- 0; |
|
38 |
- |
|
39 |
- ## Magic number and version number |
|
40 |
- writeBin(object = as.integer(c(67, 1)), |
|
41 |
- con = con, size = 4, endian = "little") |
|
42 |
- ## NCols, NRows |
|
43 |
- writeBin(object = as.integer(c(nCols, nRows)), |
|
44 |
- con = con, size = 2, endian = "little") |
|
45 |
- ## NumberUnits, NumberQCUnits |
|
46 |
- writeBin(object = as.integer(c(nUnits, nQcUnits)), |
|
47 |
- con = con, size = 4, endian = "little") |
|
48 |
- ## Length of refSeqsequence |
|
49 |
- writeBin(object = as.integer(lrefSeq), |
|
50 |
- con = con, size = 4, endian = "little") |
|
51 |
- offset <- 24; |
|
52 |
- |
|
53 |
- fOffset <- seek(con=con, origin="start", rw="write"); |
|
54 |
- if (offset != fOffset) { |
|
55 |
- stop("File format write error (step 1): File offset is not the excepted one: ", fOffset, " != ", offset); |
|
56 |
- } |
|
57 |
- |
|
58 |
- ## RefSeqsequece |
|
59 |
- if(lrefSeq > 0) |
|
60 |
- writeChar(as.character(refSeq), con=con, eos=NULL); |
|
61 |
- |
|
62 |
- # Current offset |
|
63 |
- offset <- offset + lrefSeq; |
|
64 |
- |
|
65 |
- fOffset <- seek(con=con, origin="start", rw="write"); |
|
66 |
- if (offset != fOffset) { |
|
67 |
- stop("File format write error (step 2): File offset is not the excepted one: ", fOffset, " != ", offset); |
|
68 |
- } |
|
69 |
- |
|
70 |
- |
|
71 |
- # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
72 |
- # Unit names |
|
73 |
- # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
74 |
- # Write to raw vector (2*10^6 units => 122Mb; should be ok for now) |
|
75 |
- # Since we can't create strings with '\0':s, we use '\xFF', |
|
76 |
- # write to raw and then replace '\xFF' with '\0'. Thus, unit names with |
|
77 |
- # '\xFF' are invalid, but this should not be a real problem. |
|
78 |
- pads <- sapply(0:64, FUN=function(x) paste(rep("\xFF", x), collapse="")); |
|
79 |
- |
|
80 |
- # Write the unit names in chunks to save memory |
|
81 |
- nbrOfUnits <- length(unitnames); |
|
82 |
- chunkSize <- 100000; |
|
83 |
- nbrOfChunks <- ceiling(nbrOfUnits / chunkSize); |
|
84 |
- |
|
85 |
- # Allocate raw vector |
|
86 |
- raw <- raw(64*chunkSize); |
|
87 |
- |
|
88 |
- for (kk in 1:nbrOfChunks) { |
|
89 |
- # Units for this chunk |
|
90 |
- from <- (kk-1)*chunkSize+1; |
|
91 |
- to <- min(from+chunkSize-1, nbrOfUnits); |
|
92 |
- unitnamesFF <- unitnames[from:to]; |
|
93 |
- |
|
94 |
- # Pad the unit names |
|
95 |
- unitnamesFF <- paste(unitnamesFF, pads[64-nchar(unitnamesFF)], sep=""); |
|
96 |
- |
|
97 |
- # Truncate last chunk? |
|
98 |
- if (chunkSize > length(unitnamesFF)) { |
|
99 |
- raw <- raw[1:(64*length(unitnamesFF))]; |
|
100 |
- } |
|
101 |
- |
|
102 |
- # Write unit names to raw vector |
|
103 |
- raw <- writeBin(con=raw, unitnamesFF, size=1); |
|
104 |
- |
|
105 |
- rm(unitnamesFF); |
|
106 |
- |
|
107 |
- # Garbage collect |
|
108 |
-# gc <- gc(); |
|
109 |
-# print(gc); |
|
110 |
- |
|
111 |
- # Replace all '\xFF' with '\0'. |
|
112 |
- idxs <- which(raw == as.raw(255)); |
|
113 |
- raw[idxs] <- as.raw(0); |
|
114 |
- rm(idxs); |
|
115 |
- |
|
116 |
- writeBin(con=con, raw); |
|
117 |
- } # for (kk in ...) |
|
118 |
- |
|
119 |
- rm(raw); |
|
120 |
- # Garbage collect |
|
121 |
- gc <- gc(); |
|
122 |
- |
|
123 |
-# writeChar(con=con, as.character(unitnames), nchars=rep(64, nUnits), eos=NULL) |
|
124 |
- |
|
125 |
- bytesOfUnitNames <- 64 * nUnits; |
|
126 |
- offset <- offset + bytesOfUnitNames; |
|
127 |
- |
|
128 |
- fOffset <- seek(con=con, origin="start", rw="write"); |
|
129 |
- if (offset != fOffset) { |
|
130 |
- stop("File format write error (step 3): File offset is not the excepted one: ", fOffset, " != ", offset); |
|
131 |
- } |
|
132 |
- |
|
133 |
- bytesOfQcUnits <- 4 * nQcUnits; |
|
134 |
- offset <- offset + bytesOfQcUnits; |
|
135 |
- |
|
136 |
- bytesOfUnits <- 4 * nUnits; |
|
137 |
- offset <- offset + bytesOfUnits; |
|
138 |
- |
|
139 |
- # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
140 |
- # QC units file positions |
|
141 |
- # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
142 |
- if (nQcUnits > 0) { |
|
143 |
- csum <- cumsum(qcUnitLengths); |
|
144 |
- nextOffset <- csum[nQcUnits]; |
|
145 |
- starts <- c(0, csum[-nQcUnits]); |
|
146 |
- starts <- as.integer(offset + starts); |
|
147 |
- writeBin(starts, con = con, size = 4, endian = "little") |
|
148 |
- } else { |
|
149 |
- nextOffset <- 0; |
|
150 |
-# starts <- 0; |
|
151 |
-# starts <- as.integer(offset + starts); |
|
152 |
-# writeBin(starts, con = con, size = 4, endian = "little") |
|
153 |
- } |
|
154 |
- |
|
155 |
- # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
156 |
- # Units file positions |
|
157 |
- # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
158 |
- offset <- offset + nextOffset; |
|
159 |
- if (nUnits > 0) { |
|
160 |
- csum <- cumsum(unitLengths); |
|
161 |
- nextOffset <- csum[nUnits]; |
|
162 |
- starts <- c(0, csum[-nUnits]); |
|
163 |
- starts <- as.integer(offset + starts); |
|
164 |
- writeBin(starts, con = con, size = 4, endian = "little"); |
|
165 |
- } else { |
|
166 |
- nextOffset <- 0; |
|
167 |
- } |
|
103 |
+ # Write unit names to raw vector |
|
104 |
+ raw <- writeBin(con=raw, unitnamesFF, size=1); |
|
105 |
+ unitnamesFF <- NULL; # Not needed anymore |
|
106 |
+ |
|
107 |
+ # Replace all '\xFF' with '\0'. |
|
108 |
+ idxs <- which(raw == as.raw(255)); |
|
109 |
+ raw[idxs] <- as.raw(0); |
|
110 |
+ idxs <- NULL; # Not needed anymore |
|
111 |
+ |
|
112 |
+ writeBin(con=con, raw); |
|
113 |
+ } # for (kk in ...) |
|
114 |
+ raw <- NULL; # Not needed anymore |
|
115 |
+ |
|
116 |
+ bytesOfUnitNames <- 64 * nUnits; |
|
117 |
+ offset <- offset + bytesOfUnitNames; |
|
118 |
+ |
|
119 |
+ fOffset <- seek(con=con, origin="start", rw="write"); |
|
120 |
+ if (offset != fOffset) { |
|
121 |
+ stop("File format write error (step 3): File offset is not the excepted one: ", fOffset, " != ", offset); |
|
122 |
+ } |
|
123 |
+ |
|
124 |
+ bytesOfQcUnits <- 4 * nQcUnits; |
|
125 |
+ offset <- offset + bytesOfQcUnits; |
|
126 |
+ |
|
127 |
+ bytesOfUnits <- 4 * nUnits; |
|
128 |
+ offset <- offset + bytesOfUnits; |
|
129 |
+ |
|
130 |
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
131 |
+ # QC units file positions |
|
132 |
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
133 |
+ if (nQcUnits > 0) { |
|
134 |
+ csum <- cumsum(qcUnitLengths); |
|
135 |
+ nextOffset <- csum[nQcUnits]; |
|
136 |
+ starts <- c(0, csum[-nQcUnits]); |
|
137 |
+ starts <- as.integer(offset + starts); |
|
138 |
+ writeBin(starts, con = con, size = 4, endian = "little") |
|
139 |
+ } else { |
|
140 |
+ nextOffset <- 0; |
|
141 |
+ } |
|
142 |
+ |
|
143 |
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
144 |
+ # Units file positions |
|
145 |
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
146 |
+ offset <- offset + nextOffset; |
|
147 |
+ if (nUnits > 0) { |
|
148 |
+ csum <- cumsum(unitLengths); |
|
149 |
+ nextOffset <- csum[nUnits]; |
|
150 |
+ starts <- c(0, csum[-nUnits]); |
|
151 |
+ starts <- as.integer(offset + starts); |
|
152 |
+ writeBin(starts, con = con, size = 4, endian = "little"); |
|
153 |
+ } else { |
|
154 |
+ nextOffset <- 0; |
|
155 |
+ } |
|
168 | 156 |
} # .initializeCdf() |
169 | 157 |
|
170 | 158 |
|
171 | 159 |
.writeCdfUnit <- function(unit, con, unitname=NULL) { |
172 |
- ## 3. Write the unit |
|
173 |
- unitDirections <- c(nodirection=0, sense=1, antisense=2, unknown=3); |
|
174 |
- unitDirection <- unitDirections[unit$unitdirection]; |
|
175 |
- unitType <- switch(unit$unittype, |
|
176 |
- unknown = 0, |
|
177 |
- expression = 1, |
|
178 |
- genotyping = 2, |
|
179 |
- resequencing = 3, |
|
180 |
- tag = 4, |
|
181 |
- copynumber = 5, |
|
182 |
- genotypingcontrol = 6, |
|
183 |
- expressioncontrol = 7) |
|
184 |
- |
|
185 |
- unitDirection <- switch(unit$unitdirection, |
|
186 |
- nodirection = 0, |
|
187 |
- sense = 1, |
|
188 |
- antisense = 2, |
|
189 |
- unknown = 3) |
|
190 |
- |
|
191 |
- unitInfo <- as.integer(c(unitType, unitDirection, |
|
192 |
- unit$natoms, length(unit$groups), |
|
193 |
- unit$ncells, unit$unitnumber, |
|
194 |
- unit$ncellsperatom)) |
|
195 |
- |
|
196 |
- # Number of bytes: 2+1+4*4+1=20 bytes |
|
197 |
- writeBin(unitInfo[1], |
|
198 |
- con = con, size = 2, endian = "little") |
|
199 |
- writeBin(unitInfo[2], |
|
200 |
- con = con, size = 1, endian = "little") |
|
201 |
- writeBin(unitInfo[3:6], |
|
160 |
+ ## 3. Write the unit |
|
161 |
+ unitDirections <- c(nodirection=0, sense=1, antisense=2, unknown=3); |
|
162 |
+ unitDirection <- unitDirections[unit$unitdirection]; |
|
163 |
+ unitType <- switch(unit$unittype, |
|
164 |
+ unknown = 0, |
|
165 |
+ expression = 1, |
|
166 |
+ genotyping = 2, |
|
167 |
+ resequencing = 3, |
|
168 |
+ tag = 4, |
|
169 |
+ copynumber = 5, |
|
170 |
+ genotypingcontrol = 6, |
|
171 |
+ expressioncontrol = 7) |
|
172 |
+ |
|
173 |
+ unitDirection <- switch(unit$unitdirection, |
|
174 |
+ nodirection = 0, |
|
175 |
+ sense = 1, |
|
176 |
+ antisense = 2, |
|
177 |
+ unknown = 3) |
|
178 |
+ |
|
179 |
+ unitInfo <- as.integer(c(unitType, unitDirection, |
|
180 |
+ unit$natoms, length(unit$groups), |
|
181 |
+ unit$ncells, unit$unitnumber, |
|
182 |
+ unit$ncellsperatom)) |
|
183 |
+ |
|
184 |
+ # Number of bytes: 2+1+4*4+1=20 bytes |
|
185 |
+ writeBin(unitInfo[1], |
|
186 |
+ con = con, size = 2, endian = "little") |
|
187 |
+ writeBin(unitInfo[2], |
|
188 |
+ con = con, size = 1, endian = "little") |
|
189 |
+ writeBin(unitInfo[3:6], |
|
190 |
+ con = con, size = 4, endian = "little") |
|
191 |
+ writeBin(unitInfo[7], |
|
192 |
+ con = con, size = 1, endian = "little") |
|
193 |
+ |
|
194 |
+ ## Writing each group in turn |
|
195 |
+ # Number of bytes: (18+64)*nbrOfGroups + 14*totalNbrOfCells bytes |
|
196 |
+ groupDirections <- c(nodirection=0, sense=1, antisense=2, unknown=3); |
|
197 |
+ for(igroup in seq(along.with = unit$groups)) { |
|
198 |
+ group <- unit$groups[[igroup]] |
|
199 |
+ groupDirection <- groupDirections[group$groupdirection]; |
|
200 |
+ groupDirection <- switch(group$groupdirection, |
|
201 |
+ nodirection = 0, |
|
202 |
+ sense = 1, |
|
203 |
+ antisense = 2, |
|
204 |
+ unknown = 3) |
|
205 |
+ groupInfo <- as.integer(c(group$natoms, length(group$x), |
|
206 |
+ group$ncellsperatom, |
|
207 |
+ groupDirection, min(group$atoms, 0))) |
|
208 |
+ # Number of bytes: 2*4+2*1+2*4=18 bytes |
|
209 |
+ writeBin(groupInfo[1:2], |
|
202 | 210 |
con = con, size = 4, endian = "little") |
203 |
- writeBin(unitInfo[7], |
|
211 |
+ writeBin(groupInfo[3:4], |
|
204 | 212 |
con = con, size = 1, endian = "little") |
213 |
+ writeBin(groupInfo[5:6], |
|
214 |
+ con = con, size = 4, endian = "little") |
|
205 | 215 |
|
206 |
- ## Writing each group in turn |
|
207 |
- # Number of bytes: (18+64)*nbrOfGroups + 14*totalNbrOfCells bytes |
|
208 |
- groupDirections <- c(nodirection=0, sense=1, antisense=2, unknown=3); |
|
209 |
- for(igroup in seq(along.with = unit$groups)) { |
|
210 |
- group <- unit$groups[[igroup]] |
|
211 |
- groupDirection <- groupDirections[group$groupdirection]; |
|
212 |
- groupDirection <- switch(group$groupdirection, |
|
213 |
- nodirection = 0, |
|
214 |
- sense = 1, |
|
215 |
- antisense = 2, |
|
216 |
- unknown = 3) |
|
217 |
- groupInfo <- as.integer(c(group$natoms, length(group$x), |
|
218 |
- group$ncellsperatom, |
|
219 |
- groupDirection, min(group$atoms, 0))) |
|
220 |
- # Number of bytes: 2*4+2*1+2*4=18 bytes |
|
221 |
- writeBin(groupInfo[1:2], |
|
222 |
- con = con, size = 4, endian = "little") |
|
223 |
- writeBin(groupInfo[3:4], |
|
224 |
- con = con, size = 1, endian = "little") |
|
225 |
- writeBin(groupInfo[5:6], |
|
226 |
- con = con, size = 4, endian = "little") |
|
227 |
- |
|
228 |
- # Number of bytes: 64 bytes |
|
229 |
- suppressWarnings({ |
|
230 |
- writeChar(as.character(names(unit$groups)[igroup]), |
|
231 |
- con = con, nchars = 64, eos = NULL) |
|
232 |
- }) |
|
233 |
- |
|
234 |
- ## Writing each cell in turn |
|
235 |
-# cells <- matrix(as.integer(c(group$atom, group$x, |
|
236 |
-# group$y, group$indexpos)), |
|
237 |
-# ncol = 4) |
|
238 |
- cells <- matrix(as.integer(c(group$indexpos, group$x, |
|
239 |
- group$y, group$atom)), |
|
240 |
- ncol = 4) |
|
241 |
- |
|
242 |
- # Number of bytes: 14*nbrOfCells bytes |
|
243 |
- for(icell in seq(along.with = group$x)) { |
|
244 |
- # Number of bytes: 1*4+2*2+1*4+1*2=14 bytes |
|
245 |
- writeBin(cells[icell, 1], |
|
246 |
- con = con, size = 4, endian = "little") |
|
247 |
- writeBin(cells[icell, 2:3], |
|
248 |
- con = con, size = 2, endian = "little") |
|
249 |
- writeBin(cells[icell, 4], |
|
250 |
- con = con, size = 4, endian = "little") |
|
251 |
- writeChar(as.character(c(group$pbase[icell], |
|
252 |
- group$tbase[icell])), |
|
253 |
- con = con, nchars = c(1,1), eos = NULL) |
|
254 |
- } |
|
255 |
- } |
|
216 |
+ # Number of bytes: 64 bytes |
|
217 |
+ suppressWarnings({ |
|
218 |
+ writeChar(as.character(names(unit$groups)[igroup]), |
|
219 |
+ con = con, nchars = 64, eos = NULL) |
|
220 |
+ }) |
|
221 |
+ |
|
222 |
+ ## Writing each cell in turn |
|
223 |
+ cells <- matrix(as.integer(c(group$indexpos, group$x, |
|
224 |
+ group$y, group$atom)), |
|
225 |
+ ncol = 4) |
|
226 |
+ |
|
227 |
+ # Number of bytes: 14*nbrOfCells bytes |
|
228 |
+ for(icell in seq(along.with = group$x)) { |
|
229 |
+ # Number of bytes: 1*4+2*2+1*4+1*2=14 bytes |
|
230 |
+ writeBin(cells[icell, 1], |
|
231 |
+ con = con, size = 4, endian = "little") |
|
232 |
+ writeBin(cells[icell, 2:3], |
|
233 |
+ con = con, size = 2, endian = "little") |
|
234 |
+ writeBin(cells[icell, 4], |
|
235 |
+ con = con, size = 4, endian = "little") |
|
236 |
+ writeChar(as.character(c(group$pbase[icell], |
|
237 |
+ group$tbase[icell])), |
|
238 |
+ con = con, nchars = c(1,1), eos = NULL) |
|
239 |
+ } # for (icell ...) |
|
240 |
+ } # for (igroup ...) |
|
256 | 241 |
} # .writeCdfUnit() |
257 | 242 |
|
258 | 243 |
|
259 | 244 |
|
260 | 245 |
.writeCdfQcUnit <- function(qcunit, con) { |
261 |
- ## 2. Actually write the qcunit |
|
262 |
- type <- switch(qcunit$type, |
|
263 |
- unknown = 0, |
|
264 |
- checkerboardNegative = 1, |
|
265 |
- checkerboardPositive = 2, |
|
266 |
- hybeNegative = 3, |
|
267 |
- hybePositive = 4, |
|
268 |
- textFeaturesNegative = 5, |
|
269 |
- textFeaturesPositive = 6, |
|
270 |
- centralNegative = 7, |
|
271 |
- centralPositive = 8, |
|
272 |
- geneExpNegative = 9, |
|
273 |
- geneExpPositive = 10, |
|
274 |
- cycleFidelityNegative = 11, |
|
275 |
- cycleFidelityPositive = 12, |
|
276 |
- centralCrossNegative = 13, |
|
277 |
- centralCrossPositive = 14, |
|
278 |
- crossHybeNegative = 15, |
|
279 |
- crossHybePositive = 16, |
|
280 |
- SpatialNormNegative = 17, |
|
281 |
- SpatialNormPositive = 18) |
|
282 |
- |
|
283 |
- # Write 2 + 4 bytes |
|
284 |
- nbrOfBytes <- 6; |
|
285 |
- qcunitInfo <- as.integer(c(type, qcunit$ncells)) |
|
286 |
- writeBin(qcunitInfo[1], con = con, size = 2, endian = "little") |
|
287 |
- writeBin(qcunitInfo[2], con = con, size = 4, endian = "little") |
|
288 |
- |
|
289 |
- # Write 2 + 4 bytes |
|
290 |
- nCells <- length(qcunit$x); |
|
291 |
- nbrOfBytes <- 7*nCells; |
|
292 |
- cells <- matrix(as.integer(c(qcunit$x, qcunit$y, qcunit$length, |
|
293 |
- qcunit$pm, qcunit$background)), |
|
294 |
- ncol = 5) |
|
295 |
- for(icell in seq(along.with = qcunit$x)) { |
|
296 |
- writeBin(cells[icell, 1:2], con = con, size = 2, endian = "little") |
|
297 |
- writeBin(cells[icell, 3:5], con = con, size = 1, endian = "little") |
|
298 |
- } |
|
246 |
+ ## 2. Actually write the qcunit |
|
247 |
+ type <- switch(qcunit$type, |
|
248 |
+ unknown = 0, |
|
249 |
+ checkerboardNegative = 1, |
|
250 |
+ checkerboardPositive = 2, |
|
251 |
+ hybeNegative = 3, |
|
252 |
+ hybePositive = 4, |
|
253 |
+ textFeaturesNegative = 5, |
|
254 |
+ textFeaturesPositive = 6, |
|
255 |
+ centralNegative = 7, |
|
256 |
+ centralPositive = 8, |
|
257 |
+ geneExpNegative = 9, |
|
258 |
+ geneExpPositive = 10, |
|
259 |
+ cycleFidelityNegative = 11, |
|
260 |
+ cycleFidelityPositive = 12, |
|
261 |
+ centralCrossNegative = 13, |
|
262 |
+ centralCrossPositive = 14, |
|
263 |
+ crossHybeNegative = 15, |
|
264 |
+ crossHybePositive = 16, |
|
265 |
+ SpatialNormNegative = 17, |
|
266 |
+ SpatialNormPositive = 18) |
|
267 |
+ |
|
268 |
+ # Write 2 + 4 bytes |
|
269 |
+ nbrOfBytes <- 6; |
|
270 |
+ qcunitInfo <- as.integer(c(type, qcunit$ncells)) |
|
271 |
+ writeBin(qcunitInfo[1], con = con, size = 2, endian = "little") |
|
272 |
+ writeBin(qcunitInfo[2], con = con, size = 4, endian = "little") |
|
273 |
+ |
|
274 |
+ # Write 2 + 4 bytes |
|
275 |
+ nCells <- length(qcunit$x); |
|
276 |
+ nbrOfBytes <- 7*nCells; |
|
277 |
+ cells <- matrix(as.integer(c(qcunit$x, qcunit$y, qcunit$length, |
|
278 |
+ qcunit$pm, qcunit$background)), |
|
279 |
+ ncol = 5) |
|
280 |
+ for(icell in seq(along.with = qcunit$x)) { |
|
281 |
+ writeBin(cells[icell, 1:2], con = con, size = 2, endian = "little") |
|
282 |
+ writeBin(cells[icell, 3:5], con = con, size = 1, endian = "little") |
|
283 |
+ } |
|
299 | 284 |
} # .writeCdfQcUnit() |
300 | 285 |
|
301 | 286 |
|
302 | 287 |
############################################################################ |
303 | 288 |
# HISTORY: |
289 |
+# 2013-05-25 /HB |
|
290 |
+# o Removed all gc() in .initializeCdf(). |
|
304 | 291 |
# 2013-01-07 /HB |
305 | 292 |
# o GENERALIZATION: .writeCdfUnit() now also encodes unit types |
306 | 293 |
# 'genotypingcontrol' and 'expressioncontrol'. |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/affxparser@72352 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -170,30 +170,17 @@ |
170 | 170 |
|
171 | 171 |
.writeCdfUnit <- function(unit, con, unitname=NULL) { |
172 | 172 |
## 3. Write the unit |
173 |
-## unitTypes <- c(expression=1, genotyping=2, tag=3, |
|
174 |
-## resequencing=4, unknown=5); |
|
175 |
-## |
|
176 |
-## unitType <- unitTypes[unit$unittype]; |
|
177 | 173 |
unitDirections <- c(nodirection=0, sense=1, antisense=2, unknown=3); |
178 | 174 |
unitDirection <- unitDirections[unit$unitdirection]; |
179 |
- |
|
180 |
-## unitType <- switch(unit$unittype, |
|
181 |
-## expression = 1, |
|
182 |
-## genotyping = 2, |
|
183 |
-## tag = 3, |
|
184 |
-## resequencing = 4, |
|
185 |
-## unknown = 5) |
|
186 |
- |
|
187 |
- # In some version of the Fusion SDK documentation, the unit type |
|
188 |
- # with value 5 (five) was labelled "unknown". For backward |
|
189 |
- # compatibility we recognize input value "unknown" as well. |
|
190 | 175 |
unitType <- switch(unit$unittype, |
176 |
+ unknown = 0, |
|
191 | 177 |
expression = 1, |
192 | 178 |
genotyping = 2, |
193 | 179 |
resequencing = 3, |
194 | 180 |
tag = 4, |
195 | 181 |
copynumber = 5, |
196 |
- unknown = 5) |
|
182 |
+ genotypingcontrol = 6, |
|
183 |
+ expressioncontrol = 7) |
|
197 | 184 |
|
198 | 185 |
unitDirection <- switch(unit$unitdirection, |
199 | 186 |
nodirection = 0, |
... | ... |
@@ -314,6 +301,11 @@ |
314 | 301 |
|
315 | 302 |
############################################################################ |
316 | 303 |
# HISTORY: |
304 |
+# 2013-01-07 /HB |
|
305 |
+# o GENERALIZATION: .writeCdfUnit() now also encodes unit types |
|
306 |
+# 'genotypingcontrol' and 'expressioncontrol'. |
|
307 |
+# o BUG FIX: .writeCdfUnit() incorrectly encoded the 'unknown' unit type |
|
308 |
+# as 5 and not 0. |
|
317 | 309 |
# 2008-08-09 /HB |
318 | 310 |
# o BUG FIX: .writeCdfUnit() did output unit type 'resequencing' and 'tag' |
319 | 311 |
# as 4 and 3, and not 3 and 4, respectively. |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/affxparser@66038 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -52,7 +52,7 @@ |
52 | 52 |
|
53 | 53 |
fOffset <- seek(con=con, origin="start", rw="write"); |
54 | 54 |
if (offset != fOffset) { |
55 |
- throw("File format write error (step 1): File offset is not the excepted one: ", fOffset, " != ", offset); |
|
55 |
+ stop("File format write error (step 1): File offset is not the excepted one: ", fOffset, " != ", offset); |
|
56 | 56 |
} |
57 | 57 |
|
58 | 58 |
## RefSeqsequece |
... | ... |
@@ -64,7 +64,7 @@ |
64 | 64 |
|
65 | 65 |
fOffset <- seek(con=con, origin="start", rw="write"); |
66 | 66 |
if (offset != fOffset) { |
67 |
- throw("File format write error (step 2): File offset is not the excepted one: ", fOffset, " != ", offset); |
|
67 |
+ stop("File format write error (step 2): File offset is not the excepted one: ", fOffset, " != ", offset); |
|
68 | 68 |
} |
69 | 69 |
|
70 | 70 |
|
... | ... |
@@ -127,7 +127,7 @@ |
127 | 127 |
|
128 | 128 |
fOffset <- seek(con=con, origin="start", rw="write"); |
129 | 129 |
if (offset != fOffset) { |
130 |
- throw("File format write error (step 3): File offset is not the excepted one: ", fOffset, " != ", offset); |
|
130 |
+ stop("File format write error (step 3): File offset is not the excepted one: ", fOffset, " != ", offset); |
|
131 | 131 |
} |
132 | 132 |
|
133 | 133 |
bytesOfQcUnits <- 4 * nQcUnits; |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/affxparser@33157 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -170,18 +170,31 @@ |
170 | 170 |
|
171 | 171 |
.writeCdfUnit <- function(unit, con, unitname=NULL) { |
172 | 172 |
## 3. Write the unit |
173 |
- unitTypes <- c(expression=1, genotyping=2, tag=3, |
|
174 |
- resequencing=4, unknown=5); |
|
175 |
- unitType <- unitTypes[unit$unittype]; |
|
173 |
+## unitTypes <- c(expression=1, genotyping=2, tag=3, |
|
174 |
+## resequencing=4, unknown=5); |
|
175 |
+## |
|
176 |
+## unitType <- unitTypes[unit$unittype]; |
|
176 | 177 |
unitDirections <- c(nodirection=0, sense=1, antisense=2, unknown=3); |
177 | 178 |
unitDirection <- unitDirections[unit$unitdirection]; |
178 | 179 |
|
180 |
+## unitType <- switch(unit$unittype, |
|
181 |
+## expression = 1, |
|
182 |
+## genotyping = 2, |
|
183 |
+## tag = 3, |
|
184 |
+## resequencing = 4, |
|
185 |
+## unknown = 5) |
|
186 |
+ |
|
187 |
+ # In some version of the Fusion SDK documentation, the unit type |
|
188 |
+ # with value 5 (five) was labelled "unknown". For backward |
|
189 |
+ # compatibility we recognize input value "unknown" as well. |
|
179 | 190 |
unitType <- switch(unit$unittype, |
180 | 191 |
expression = 1, |
181 | 192 |
genotyping = 2, |
182 |
- tag = 3, |
|
183 |
- resequencing = 4, |
|
193 |
+ resequencing = 3, |
|
194 |
+ tag = 4, |
|
195 |
+ copynumber = 5, |
|
184 | 196 |
unknown = 5) |
197 |
+ |
|
185 | 198 |
unitDirection <- switch(unit$unitdirection, |
186 | 199 |
nodirection = 0, |
187 | 200 |
sense = 1, |
... | ... |
@@ -301,8 +314,11 @@ |
301 | 314 |
|
302 | 315 |
############################################################################ |
303 | 316 |
# HISTORY: |
317 |
+# 2008-08-09 /HB |
|
318 |
+# o BUG FIX: .writeCdfUnit() did output unit type 'resequencing' and 'tag' |
|
319 |
+# as 4 and 3, and not 3 and 4, respectively. |
|
304 | 320 |
# 2007-11-13 /KH |
305 |
-# o BUG FIX: The rrror message in internal .initializeCdf() would mention |
|
321 |
+# o BUG FIX: The error message in internal .initializeCdf() would mention |
|
306 | 322 |
# 'qcUnitLengths' when it was meant to say 'unitLengths'. |
307 | 323 |
# 2007-07-13 /HB |
308 | 324 |
# o While writing unit names in .initializeCdf(), quite a few copies were |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/affxparser@29108 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -301,6 +301,9 @@ |
301 | 301 |
|
302 | 302 |
############################################################################ |
303 | 303 |
# HISTORY: |
304 |
+# 2007-11-13 /KH |
|
305 |
+# o BUG FIX: The rrror message in internal .initializeCdf() would mention |
|
306 |
+# 'qcUnitLengths' when it was meant to say 'unitLengths'. |
|
304 | 307 |
# 2007-07-13 /HB |
305 | 308 |
# o While writing unit names in .initializeCdf(), quite a few copies were |
306 | 309 |
# created using up a lot of memory. By removing unused objects and |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/affxparser@28691 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -13,7 +13,7 @@ |
13 | 13 |
} |
14 | 14 |
|
15 | 15 |
if(length(unitLengths) != nUnits) { |
16 |
- stop("Number of elements in argument 'qcUnitLengths' does not match 'nUnits'"); |
|
16 |
+ stop("Number of elements in argument 'unitLengths' does not match 'nUnits'"); |
|
17 | 17 |
} |
18 | 18 |
|
19 | 19 |
if(length(refSeq) != 1) |
... | ... |
@@ -28,7 +28,7 @@ |
28 | 28 |
# 2 Version number. [integer] |
29 | 29 |
# 3 The number of columns of cells on the array. [unsigned short] |
30 | 30 |
# 4 The number of rows of cells on the array. [unsigned short] |
31 |
- # 5 The number of units in the array not including QC units. The term |
|
31 |
+ # 5 The number of units in the array not including QC units. The term |
|
32 | 32 |
# unit is an internal term which means probe set. [integer] |
33 | 33 |
# 6 The number of QC units. [integer] |
34 | 34 |
# 7 The length of the resequencing reference sequence. [integer] |
... | ... |
@@ -53,8 +53,8 @@ |
53 | 53 |
fOffset <- seek(con=con, origin="start", rw="write"); |
54 | 54 |
if (offset != fOffset) { |
55 | 55 |
throw("File format write error (step 1): File offset is not the excepted one: ", fOffset, " != ", offset); |
56 |
- } |
|
57 |
- |
|
56 |
+ } |
|
57 |
+ |
|
58 | 58 |
## RefSeqsequece |
59 | 59 |
if(lrefSeq > 0) |
60 | 60 |
writeChar(as.character(refSeq), con=con, eos=NULL); |
... | ... |
@@ -65,7 +65,7 @@ |
65 | 65 |
fOffset <- seek(con=con, origin="start", rw="write"); |
66 | 66 |
if (offset != fOffset) { |
67 | 67 |
throw("File format write error (step 2): File offset is not the excepted one: ", fOffset, " != ", offset); |
68 |
- } |
|
68 |
+ } |
|
69 | 69 |
|
70 | 70 |
|
71 | 71 |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
... | ... |
@@ -128,7 +128,7 @@ |
128 | 128 |
fOffset <- seek(con=con, origin="start", rw="write"); |
129 | 129 |
if (offset != fOffset) { |
130 | 130 |
throw("File format write error (step 3): File offset is not the excepted one: ", fOffset, " != ", offset); |
131 |
- } |
|
131 |
+ } |
|
132 | 132 |
|
133 | 133 |
bytesOfQcUnits <- 4 * nQcUnits; |
134 | 134 |
offset <- offset + bytesOfQcUnits; |
... | ... |
@@ -170,7 +170,7 @@ |
170 | 170 |
|
171 | 171 |
.writeCdfUnit <- function(unit, con, unitname=NULL) { |
172 | 172 |
## 3. Write the unit |
173 |
- unitTypes <- c(expression=1, genotyping=2, tag=3, |
|
173 |
+ unitTypes <- c(expression=1, genotyping=2, tag=3, |
|
174 | 174 |
resequencing=4, unknown=5); |
175 | 175 |
unitType <- unitTypes[unit$unittype]; |
176 | 176 |
unitDirections <- c(nodirection=0, sense=1, antisense=2, unknown=3); |
... | ... |
@@ -193,7 +193,7 @@ |
193 | 193 |
unit$ncells, unit$unitnumber, |
194 | 194 |
unit$ncellsperatom)) |
195 | 195 |
|
196 |
- # Number of bytes: 2+1+4*4+1=20 bytes |
|
196 |
+ # Number of bytes: 2+1+4*4+1=20 bytes |
|
197 | 197 |
writeBin(unitInfo[1], |
198 | 198 |
con = con, size = 2, endian = "little") |
199 | 199 |
writeBin(unitInfo[2], |
... | ... |
@@ -228,7 +228,7 @@ |
228 | 228 |
# Number of bytes: 64 bytes |
229 | 229 |
suppressWarnings({ |
230 | 230 |
writeChar(as.character(names(unit$groups)[igroup]), |
231 |
- con = con, nchars = 64, eos = NULL) |
|
231 |
+ con = con, nchars = 64, eos = NULL) |
|
232 | 232 |
}) |
233 | 233 |
|
234 | 234 |
## Writing each cell in turn |
... | ... |
@@ -302,7 +302,7 @@ |
302 | 302 |
############################################################################ |
303 | 303 |
# HISTORY: |
304 | 304 |
# 2007-07-13 /HB |
305 |
-# o While writing unit names in .initializeCdf(), quite a few copies were |
|
305 |
+# o While writing unit names in .initializeCdf(), quite a few copies were |
|
306 | 306 |
# created using up a lot of memory. By removing unused objects and |
307 | 307 |
# writing unit names in chunks memory usage is now stable and < 200MB. |
308 | 308 |
# 2007-02-01 /HB |
... | ... |
@@ -314,7 +314,7 @@ |
314 | 314 |
# o Added writeCdfHeader(), writeCdfQcUnits() and writeCdfUnits(). With |
315 | 315 |
# these it is now possible to build up the CDF in chunks. |
316 | 316 |
# o Removed obsolete arguments 'addName' and 'addPositions' and all related |
317 |
-# code. Internal variable 'positions' is not needed anymore. |
|
317 |
+# code. Internal variable 'positions' is not needed anymore. |
|
318 | 318 |
# There are no more seek():s in the code. |
319 | 319 |
# o Removed obsolete .writeCdfUnit2(). |
320 | 320 |
# o Now only every 1000th unit (instead of 100th) is reported. It is now |
Version: 1.8.1 [2007-07-26]
o Now affxparser install on OSX with PPC.
Version: 1.7.6 [2007-03-28] (never committed until v1.9.2)
o Modified findCdf() such that it is possible to set an alternative
function for how CDFs are located.
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/affxparser@26016 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -34,6 +34,8 @@ |
34 | 34 |
# 7 The length of the resequencing reference sequence. [integer] |
35 | 35 |
# 8 The resequencing reference sequence. [char[len]] |
36 | 36 |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
37 |
+ offset <- 0; |
|
38 |
+ |
|
37 | 39 |
## Magic number and version number |
38 | 40 |
writeBin(object = as.integer(c(67, 1)), |
39 | 41 |
con = con, size = 4, endian = "little") |
... | ... |
@@ -46,12 +48,25 @@ |
46 | 48 |
## Length of refSeqsequence |
47 | 49 |
writeBin(object = as.integer(lrefSeq), |
48 | 50 |
con = con, size = 4, endian = "little") |
51 |
+ offset <- 24; |
|
52 |
+ |
|
53 |
+ fOffset <- seek(con=con, origin="start", rw="write"); |
|
54 |
+ if (offset != fOffset) { |
|
55 |
+ throw("File format write error (step 1): File offset is not the excepted one: ", fOffset, " != ", offset); |
|
56 |
+ } |
|
57 |
+ |
|
49 | 58 |
## RefSeqsequece |
50 | 59 |
if(lrefSeq > 0) |
51 | 60 |
writeChar(as.character(refSeq), con=con, eos=NULL); |
52 | 61 |
|
53 | 62 |
# Current offset |
54 |
- offset <- 24 + lrefSeq; |
|
63 |
+ offset <- offset + lrefSeq; |
|
64 |
+ |
|
65 |
+ fOffset <- seek(con=con, origin="start", rw="write"); |
|
66 |
+ if (offset != fOffset) { |
|
67 |
+ throw("File format write error (step 2): File offset is not the excepted one: ", fOffset, " != ", offset); |
|
68 |
+ } |
|
69 |
+ |
|
55 | 70 |
|
56 | 71 |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
57 | 72 |
# Unit names |
... | ... |
@@ -61,17 +76,60 @@ |
61 | 76 |
# write to raw and then replace '\xFF' with '\0'. Thus, unit names with |
62 | 77 |
# '\xFF' are invalid, but this should not be a real problem. |
63 | 78 |
pads <- sapply(0:64, FUN=function(x) paste(rep("\xFF", x), collapse="")); |
64 |
- unitnames <- paste(unitnames, pads[64-nchar(unitnames)], sep=""); |
|
65 |
- raw <- raw(64*length(unitnames)); |
|
66 |
- raw <- writeBin(con=raw, unitnames, size=1); |
|
67 |
- raw[raw == as.raw(255)] <- as.raw(0); |
|
68 |
- writeBin(con=con, raw); |
|
69 |
- rm(raw); |
|
79 |
+ |
|
80 |
+ # Write the unit names in chunks to save memory |
|
81 |
+ nbrOfUnits <- length(unitnames); |
|
82 |
+ chunkSize <- 100000; |
|
83 |
+ nbrOfChunks <- ceiling(nbrOfUnits / chunkSize); |
|
84 |
+ |
|
85 |
+ # Allocate raw vector |
|
86 |
+ raw <- raw(64*chunkSize); |
|
87 |
+ |
|
88 |
+ for (kk in 1:nbrOfChunks) { |
|
89 |
+ # Units for this chunk |
|
90 |
+ from <- (kk-1)*chunkSize+1; |
|
91 |
+ to <- min(from+chunkSize-1, nbrOfUnits); |
|
92 |
+ unitnamesFF <- unitnames[from:to]; |
|
93 |
+ |
|
94 |
+ # Pad the unit names |
|
95 |
+ unitnamesFF <- paste(unitnamesFF, pads[64-nchar(unitnamesFF)], sep=""); |
|
96 |
+ |
|
97 |
+ # Truncate last chunk? |
|
98 |
+ if (chunkSize > length(unitnamesFF)) { |
|
99 |
+ raw <- raw[1:(64*length(unitnamesFF))]; |
|
100 |
+ } |
|
101 |
+ |
|
102 |
+ # Write unit names to raw vector |
|
103 |
+ raw <- writeBin(con=raw, unitnamesFF, size=1); |
|
104 |
+ |
|
105 |
+ rm(unitnamesFF); |
|
106 |
+ |
|
107 |
+ # Garbage collect |
|
108 |
+# gc <- gc(); |
|
109 |
+# print(gc); |
|
110 |
+ |
|
111 |
+ # Replace all '\xFF' with '\0'. |
|
112 |
+ idxs <- which(raw == as.raw(255)); |
|
113 |
+ raw[idxs] <- as.raw(0); |
|
114 |
+ rm(idxs); |
|
115 |
+ |
|
116 |
+ writeBin(con=con, raw); |
|
117 |
+ } # for (kk in ...) |
|
118 |
+ |
|
119 |
+ rm(raw); |
|
120 |
+ # Garbage collect |
|
121 |
+ gc <- gc(); |
|
122 |
+ |
|
70 | 123 |
# writeChar(con=con, as.character(unitnames), nchars=rep(64, nUnits), eos=NULL) |
71 | 124 |
|
72 | 125 |
bytesOfUnitNames <- 64 * nUnits; |
73 | 126 |
offset <- offset + bytesOfUnitNames; |
74 | 127 |
|
128 |
+ fOffset <- seek(con=con, origin="start", rw="write"); |
|
129 |
+ if (offset != fOffset) { |
|
130 |
+ throw("File format write error (step 3): File offset is not the excepted one: ", fOffset, " != ", offset); |
|
131 |
+ } |
|
132 |
+ |
|
75 | 133 |
bytesOfQcUnits <- 4 * nQcUnits; |
76 | 134 |
offset <- offset + bytesOfQcUnits; |
77 | 135 |
|
... | ... |
@@ -243,6 +301,10 @@ |
243 | 301 |
|
244 | 302 |
############################################################################ |
245 | 303 |
# HISTORY: |
304 |
+# 2007-07-13 /HB |
|
305 |
+# o While writing unit names in .initializeCdf(), quite a few copies were |
|
306 |
+# created using up a lot of memory. By removing unused objects and |
|
307 |
+# writing unit names in chunks memory usage is now stable and < 200MB. |
|
246 | 308 |
# 2007-02-01 /HB |
247 | 309 |
# o Updated to camel case as much as possible to match JBs updates in the |
248 | 310 |
# branch. |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/affxparser@22355 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,35 +1,25 @@ |
1 |
-.initializeCdf <- function(con, nrows = 1, ncols = 1, |
|
2 |
- nunits = 1, nqcunits = 0, |
|
3 |
- refseq = "", |
|
4 |
- unitnames = rep("", nunits), |
|
5 |
- qcunitpositions = rep(1, nqcunits), |
|
6 |
- unitpositions = rep(2, nunits), |
|
7 |
- qcUnitLengths = rep(0, nqcunits), |
|
8 |
- unitLengths = rep(0, nunits), |
|
1 |
+.initializeCdf <- function(con, nRows = 1, nCols = 1, |
|
2 |
+ nUnits = 1, nQcUnits = 0, |
|
3 |
+ refSeq = "", |
|
4 |
+ unitnames = rep("", nUnits), |
|
5 |
+ qcUnitLengths = rep(0, nQcUnits), |
|
6 |
+ unitLengths = rep(0, nUnits), |
|
9 | 7 |
...) { |
10 | 8 |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
11 | 9 |
# Validate arguments |
12 | 10 |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
13 |
- if(length(qcunitpositions) != nqcunits) { |
|
14 |
- stop("Number of elements in argument 'qcunitpositions' does not match 'nqcunits'"); |
|
11 |
+ if(length(qcUnitLengths) != nQcUnits) { |
|
12 |
+ stop("Number of elements in argument 'qcUnitLengths' does not match 'nQcUnits'"); |
|
15 | 13 |
} |
16 | 14 |
|
17 |
- if(length(unitpositions) != nunits) { |
|
18 |
- stop("Number of elements in argument 'unitpositions' does not match 'nunits'"); |
|
15 |
+ if(length(unitLengths) != nUnits) { |
|
16 |
+ stop("Number of elements in argument 'qcUnitLengths' does not match 'nUnits'"); |
|
19 | 17 |
} |
20 | 18 |
|
21 |
- if(length(qcUnitLengths) != nqcunits) { |
|
22 |
- stop("Number of elements in argument 'qcUnitLengths' does not match 'nqcunits'"); |
|
23 |
- } |
|
24 |
- |
|
25 |
- if(length(unitLengths) != nunits) { |
|
26 |
- stop("Number of elements in argument 'qcUnitLengths' does not match 'nunits'"); |
|
27 |
- } |
|
28 |
- |
|
29 |
- if(length(refseq) != 1) |
|
30 |
- stop("Argument 'refseq' should be a single character."); |
|
19 |
+ if(length(refSeq) != 1) |
|
20 |
+ stop("Argument 'refSeq' should be a single character."); |
|
31 | 21 |
|
32 |
- lrefseq <- nchar(refseq); |
|
22 |
+ lrefSeq <- nchar(refSeq); |
|
33 | 23 |
|
34 | 24 |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
35 | 25 |
# CDF header |
... | ... |
@@ -47,21 +37,21 @@ |
47 | 37 |
## Magic number and version number |
48 | 38 |
writeBin(object = as.integer(c(67, 1)), |
49 | 39 |
con = con, size = 4, endian = "little") |
50 |
- ## Ncols, Nrows |
|
51 |
- writeBin(object = as.integer(c(ncols, nrows)), |
|
40 |
+ ## NCols, NRows |
|
41 |
+ writeBin(object = as.integer(c(nCols, nRows)), |
|
52 | 42 |
con = con, size = 2, endian = "little") |
53 | 43 |
## NumberUnits, NumberQCUnits |
54 |
- writeBin(object = as.integer(c(nunits, nqcunits)), |
|
44 |
+ writeBin(object = as.integer(c(nUnits, nQcUnits)), |
|
55 | 45 |
con = con, size = 4, endian = "little") |
56 |
- ## Length of refseqsequence |
|
57 |
- writeBin(object = as.integer(lrefseq), |
|
46 |
+ ## Length of refSeqsequence |
|
47 |
+ writeBin(object = as.integer(lrefSeq), |
|
58 | 48 |
con = con, size = 4, endian = "little") |
59 |
- ## Refseqsequece |
|
60 |
- if(lrefseq > 0) |
|
61 |
- writeChar(as.character(refseq), con=con, eos=NULL); |
|
49 |
+ ## RefSeqsequece |
|
50 |
+ if(lrefSeq > 0) |
|
51 |
+ writeChar(as.character(refSeq), con=con, eos=NULL); |
|
62 | 52 |
|
63 | 53 |
# Current offset |
64 |
- offset <- 24 + lrefseq; |
|
54 |
+ offset <- 24 + lrefSeq; |
|
65 | 55 |
|
66 | 56 |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
67 | 57 |
# Unit names |
... | ... |
@@ -77,24 +67,24 @@ |
77 | 67 |
raw[raw == as.raw(255)] <- as.raw(0); |
78 | 68 |
writeBin(con=con, raw); |
79 | 69 |
rm(raw); |
80 |
-# writeChar(con=con, as.character(unitnames), nchars=rep(64, nunits), eos=NULL) |
|
70 |
+# writeChar(con=con, as.character(unitnames), nchars=rep(64, nUnits), eos=NULL) |
|
81 | 71 |
|
82 |
- bytesOfUnitNames <- 64 * nunits; |
|
72 |
+ bytesOfUnitNames <- 64 * nUnits; |
|
83 | 73 |
offset <- offset + bytesOfUnitNames; |
84 | 74 |
|
85 |
- bytesOfQcUnits <- 4 * nqcunits; |
|
75 |
+ bytesOfQcUnits <- 4 * nQcUnits; |
|
86 | 76 |
offset <- offset + bytesOfQcUnits; |
87 | 77 |
|
88 |
- bytesOfUnits <- 4 * nunits; |
|
78 |
+ bytesOfUnits <- 4 * nUnits; |
|
89 | 79 |
offset <- offset + bytesOfUnits; |
90 | 80 |
|
91 | 81 |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
92 | 82 |
# QC units file positions |
93 | 83 |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
94 |
- if (nqcunits > 0) { |
|
84 |
+ if (nQcUnits > 0) { |
|
95 | 85 |
csum <- cumsum(qcUnitLengths); |
96 |
- nextOffset <- csum[nqcunits]; |
|
97 |
- starts <- c(0, csum[-nqcunits]); |
|
86 |
+ nextOffset <- csum[nQcUnits]; |
|
87 |
+ starts <- c(0, csum[-nQcUnits]); |
|
98 | 88 |
starts <- as.integer(offset + starts); |
99 | 89 |
writeBin(starts, con = con, size = 4, endian = "little") |
100 | 90 |
} else { |
... | ... |
@@ -108,10 +98,10 @@ |
108 | 98 |
# Units file positions |
109 | 99 |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
110 | 100 |
offset <- offset + nextOffset; |
111 |
- if (nunits > 0) { |
|
101 |
+ if (nUnits > 0) { |
|
112 | 102 |
csum <- cumsum(unitLengths); |
113 |
- nextOffset <- csum[nunits]; |
|
114 |
- starts <- c(0, csum[-nunits]); |
|
103 |
+ nextOffset <- csum[nUnits]; |
|
104 |
+ starts <- c(0, csum[-nUnits]); |
|
115 | 105 |
starts <- as.integer(offset + starts); |
116 | 106 |
writeBin(starts, con = con, size = 4, endian = "little"); |
117 | 107 |
} else { |
... | ... |
@@ -124,23 +114,23 @@ |
124 | 114 |
## 3. Write the unit |
125 | 115 |
unitTypes <- c(expression=1, genotyping=2, tag=3, |
126 | 116 |
resequencing=4, unknown=5); |
127 |
- unittype <- unitTypes[unit$unittype]; |
|
117 |
+ unitType <- unitTypes[unit$unittype]; |
|
128 | 118 |
unitDirections <- c(nodirection=0, sense=1, antisense=2, unknown=3); |
129 |
- unitdirection <- unitDirections[unit$unitdirection]; |
|
119 |
+ unitDirection <- unitDirections[unit$unitdirection]; |
|
130 | 120 |
|
131 |
- unittype <- switch(unit$unittype, |
|
121 |
+ unitType <- switch(unit$unittype, |
|
132 | 122 |
expression = 1, |
133 | 123 |
genotyping = 2, |
134 | 124 |
tag = 3, |
135 | 125 |
resequencing = 4, |
136 | 126 |
unknown = 5) |
137 |
- unitdirection <- switch(unit$unitdirection, |
|
127 |
+ unitDirection <- switch(unit$unitdirection, |
|
138 | 128 |
nodirection = 0, |
139 | 129 |
sense = 1, |
140 | 130 |
antisense = 2, |
141 | 131 |
unknown = 3) |
142 | 132 |
|
143 |
- unitInfo <- as.integer(c(unittype, unitdirection, |
|
133 |
+ unitInfo <- as.integer(c(unitType, unitDirection, |
|
144 | 134 |
unit$natoms, length(unit$groups), |
145 | 135 |
unit$ncells, unit$unitnumber, |
146 | 136 |
unit$ncellsperatom)) |
... | ... |
@@ -160,15 +150,15 @@ |
160 | 150 |
groupDirections <- c(nodirection=0, sense=1, antisense=2, unknown=3); |
161 | 151 |
for(igroup in seq(along.with = unit$groups)) { |
162 | 152 |
group <- unit$groups[[igroup]] |
163 |
- groupdirection <- groupDirections[group$groupdirection]; |
|
164 |
- groupdirection <- switch(group$groupdirection, |
|
153 |
+ groupDirection <- groupDirections[group$groupdirection]; |
|
154 |
+ groupDirection <- switch(group$groupdirection, |
|
165 | 155 |
nodirection = 0, |
166 | 156 |
sense = 1, |
167 | 157 |
antisense = 2, |
168 | 158 |
unknown = 3) |
169 | 159 |
groupInfo <- as.integer(c(group$natoms, length(group$x), |
170 | 160 |
group$ncellsperatom, |
171 |
- groupdirection, min(group$atoms, 0))) |
|
161 |
+ groupDirection, min(group$atoms, 0))) |
|
172 | 162 |
# Number of bytes: 2*4+2*1+2*4=18 bytes |
173 | 163 |
writeBin(groupInfo[1:2], |
174 | 164 |
con = con, size = 4, endian = "little") |
... | ... |
@@ -239,8 +229,8 @@ |
239 | 229 |
writeBin(qcunitInfo[2], con = con, size = 4, endian = "little") |
240 | 230 |
|
241 | 231 |
# Write 2 + 4 bytes |
242 |
- ncells <- length(qcunit$x); |
|
243 |
- nbrOfBytes <- 7*ncells; |
|
232 |
+ nCells <- length(qcunit$x); |
|
233 |
+ nbrOfBytes <- 7*nCells; |
|
244 | 234 |
cells <- matrix(as.integer(c(qcunit$x, qcunit$y, qcunit$length, |
245 | 235 |
qcunit$pm, qcunit$background)), |
246 | 236 |
ncol = 5) |
... | ... |
@@ -253,6 +243,11 @@ |
253 | 243 |
|
254 | 244 |
############################################################################ |
255 | 245 |
# HISTORY: |
246 |
+# 2007-02-01 /HB |
|
247 |
+# o Updated to camel case as much as possible to match JBs updates in the |
|
248 |
+# branch. |
|
249 |
+# o Removed non-used arguments 'unitpositions' and 'qcunitpositions' from |
|
250 |
+# .initializeCdf(). |
|
256 | 251 |
# 2007-01-10 /HB |
257 | 252 |
# o Added writeCdfHeader(), writeCdfQcUnits() and writeCdfUnits(). With |
258 | 253 |
# these it is now possible to build up the CDF in chunks. |
... | ... |
@@ -267,13 +262,13 @@ |
267 | 262 |
# with other code, pursuant to communication from KH. |
268 | 263 |
# 2006-10-25 /HB (+KS) |
269 | 264 |
# o BUG FIX: .initializeCdf() was writing false file offset for QC units |
270 |
-# when the number QC nunits were zero. This would core dump readCdfNnn(). |
|
265 |
+# when the number QC nUnits were zero. This would core dump readCdfNnn(). |
|
271 | 266 |
# 2006-09-21 /HB |
272 | 267 |
# o BUG FIX: The 'atom' and 'indexpos' fields were swapped. |
273 | 268 |
# o Now suppressing warnings "writeChar: more characters requested..." in |
274 | 269 |
# writeCdf(). |
275 | 270 |
# 2006-09-11 /HB |
276 |
-# o BUG FIX: nrows & ncols were swapped in the CDF header. |
|
271 |
+# o BUG FIX: nRows & nCols were swapped in the CDF header. |
|
277 | 272 |
# 2006-09-09 /HB |
278 | 273 |
# o Updated writeCdf() has been validate with compareCdfs() on a few arrays. |
279 | 274 |
# o With the below "optimizations" writeCdf() now writes Hu6800.CDF with |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/affxparser@21888 bc3139a8-67e5-0310-9ffc-ced21a209358
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,295 @@ |
1 |
+.initializeCdf <- function(con, nrows = 1, ncols = 1, |
|
2 |
+ nunits = 1, nqcunits = 0, |
|
3 |
+ refseq = "", |
|
4 |
+ unitnames = rep("", nunits), |
|
5 |
+ qcunitpositions = rep(1, nqcunits), |
|
6 |
+ unitpositions = rep(2, nunits), |
|
7 |
+ qcUnitLengths = rep(0, nqcunits), |
|
8 |
+ unitLengths = rep(0, nunits), |
|
9 |
+ ...) { |
|
10 |
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
11 |
+ # Validate arguments |
|
12 |
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
13 |
+ if(length(qcunitpositions) != nqcunits) { |
|
14 |
+ stop("Number of elements in argument 'qcunitpositions' does not match 'nqcunits'"); |
|
15 |
+ } |
|
16 |
+ |
|
17 |
+ if(length(unitpositions) != nunits) { |
|
18 |
+ stop("Number of elements in argument 'unitpositions' does not match 'nunits'"); |
|
19 |
+ } |
|
20 |
+ |
|
21 |
+ if(length(qcUnitLengths) != nqcunits) { |
|
22 |
+ stop("Number of elements in argument 'qcUnitLengths' does not match 'nqcunits'"); |
|
23 |
+ } |
|
24 |
+ |
|
25 |
+ if(length(unitLengths) != nunits) { |
|
26 |
+ stop("Number of elements in argument 'qcUnitLengths' does not match 'nunits'"); |
|
27 |
+ } |
|
28 |
+ |
|
29 |
+ if(length(refseq) != 1) |
|
30 |
+ stop("Argument 'refseq' should be a single character."); |
|
31 |
+ |
|
32 |
+ lrefseq <- nchar(refseq); |
|
33 |
+ |
|
34 |
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
35 |
+ # CDF header |
|
36 |
+ # |
|
37 |
+ # 1 Magic number. Always set to 67. [integer] |
|
38 |
+ # 2 Version number. [integer] |
|
39 |
+ # 3 The number of columns of cells on the array. [unsigned short] |
|
40 |
+ # 4 The number of rows of cells on the array. [unsigned short] |
|
41 |
+ # 5 The number of units in the array not including QC units. The term |
|
42 |
+ # unit is an internal term which means probe set. [integer] |
|
43 |
+ # 6 The number of QC units. [integer] |
|
44 |
+ # 7 The length of the resequencing reference sequence. [integer] |
|
45 |
+ # 8 The resequencing reference sequence. [char[len]] |
|
46 |
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
47 |
+ ## Magic number and version number |
|
48 |
+ writeBin(object = as.integer(c(67, 1)), |
|
49 |
+ con = con, size = 4, endian = "little") |
|
50 |
+ ## Ncols, Nrows |
|
51 |
+ writeBin(object = as.integer(c(ncols, nrows)), |
|
52 |
+ con = con, size = 2, endian = "little") |
|
53 |
+ ## NumberUnits, NumberQCUnits |
|
54 |
+ writeBin(object = as.integer(c(nunits, nqcunits)), |
|
55 |
+ con = con, size = 4, endian = "little") |
|
56 |
+ ## Length of refseqsequence |
|
57 |
+ writeBin(object = as.integer(lrefseq), |
|
58 |
+ con = con, size = 4, endian = "little") |
|
59 |
+ ## Refseqsequece |
|
60 |
+ if(lrefseq > 0) |
|
61 |
+ writeChar(as.character(refseq), con=con, eos=NULL); |
|
62 |
+ |
|
63 |
+ # Current offset |
|
64 |
+ offset <- 24 + lrefseq; |
|
65 |
+ |
|
66 |
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
67 |
+ # Unit names |
|
68 |
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
69 |
+ # Write to raw vector (2*10^6 units => 122Mb; should be ok for now) |
|
70 |
+ # Since we can't create strings with '\0':s, we use '\xFF', |
|
71 |
+ # write to raw and then replace '\xFF' with '\0'. Thus, unit names with |
|
72 |
+ # '\xFF' are invalid, but this should not be a real problem. |
|
73 |
+ pads <- sapply(0:64, FUN=function(x) paste(rep("\xFF", x), collapse="")); |
|
74 |
+ unitnames <- paste(unitnames, pads[64-nchar(unitnames)], sep=""); |
|
75 |
+ raw <- raw(64*length(unitnames)); |
|
76 |
+ raw <- writeBin(con=raw, unitnames, size=1); |
|
77 |
+ raw[raw == as.raw(255)] <- as.raw(0); |
|
78 |
+ writeBin(con=con, raw); |
|
79 |
+ rm(raw); |
|
80 |
+# writeChar(con=con, as.character(unitnames), nchars=rep(64, nunits), eos=NULL) |
|
81 |
+ |
|
82 |
+ bytesOfUnitNames <- 64 * nunits; |
|
83 |
+ offset <- offset + bytesOfUnitNames; |
|
84 |
+ |
|
85 |
+ bytesOfQcUnits <- 4 * nqcunits; |
|
86 |
+ offset <- offset + bytesOfQcUnits; |
|
87 |
+ |
|
88 |
+ bytesOfUnits <- 4 * nunits; |
|
89 |
+ offset <- offset + bytesOfUnits; |
|
90 |
+ |
|
91 |
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
92 |
+ # QC units file positions |
|
93 |
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
94 |
+ if (nqcunits > 0) { |
|
95 |
+ csum <- cumsum(qcUnitLengths); |
|
96 |
+ nextOffset <- csum[nqcunits]; |
|
97 |
+ starts <- c(0, csum[-nqcunits]); |
|
98 |
+ starts <- as.integer(offset + starts); |
|
99 |
+ writeBin(starts, con = con, size = 4, endian = "little") |
|
100 |
+ } else { |
|
101 |
+ nextOffset <- 0; |
|
102 |
+# starts <- 0; |
|
103 |
+# starts <- as.integer(offset + starts); |
|
104 |
+# writeBin(starts, con = con, size = 4, endian = "little") |
|
105 |
+ } |
|
106 |
+ |
|
107 |
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
108 |
+ # Units file positions |
|
109 |
+ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
110 |
+ offset <- offset + nextOffset; |
|
111 |
+ if (nunits > 0) { |
|
112 |
+ csum <- cumsum(unitLengths); |
|
113 |
+ nextOffset <- csum[nunits]; |
|
114 |
+ starts <- c(0, csum[-nunits]); |
|
115 |
+ starts <- as.integer(offset + starts); |
|
116 |
+ writeBin(starts, con = con, size = 4, endian = "little"); |
|
117 |
+ } else { |
|
118 |
+ nextOffset <- 0; |
|
119 |
+ } |
|
120 |
+} # .initializeCdf() |
|
121 |
+ |
|
122 |
+ |
|
123 |
+.writeCdfUnit <- function(unit, con, unitname=NULL) { |
|
124 |
+ ## 3. Write the unit |
|
125 |
+ unitTypes <- c(expression=1, genotyping=2, tag=3, |
|
126 |
+ resequencing=4, unknown=5); |
|
127 |
+ unittype <- unitTypes[unit$unittype]; |
|
128 |
+ unitDirections <- c(nodirection=0, sense=1, antisense=2, unknown=3); |
|
129 |
+ unitdirection <- unitDirections[unit$unitdirection]; |
|
130 |
+ |
|
131 |
+ unittype <- switch(unit$unittype, |
|
132 |
+ expression = 1, |
|
133 |
+ genotyping = 2, |
|
134 |
+ tag = 3, |
|
135 |
+ resequencing = 4, |
|
136 |
+ unknown = 5) |
|
137 |
+ unitdirection <- switch(unit$unitdirection, |
|
138 |
+ nodirection = 0, |
|
139 |
+ sense = 1, |
|
140 |
+ antisense = 2, |
|
141 |
+ unknown = 3) |
|
142 |
+ |
|
143 |
+ unitInfo <- as.integer(c(unittype, unitdirection, |
|
144 |
+ unit$natoms, length(unit$groups), |
|
145 |
+ unit$ncells, unit$unitnumber, |
|
146 |
+ unit$ncellsperatom)) |
|
147 |
+ |
|
148 |
+ # Number of bytes: 2+1+4*4+1=20 bytes |
|
149 |
+ writeBin(unitInfo[1], |
|
150 |
+ con = con, size = 2, endian = "little") |
|
151 |
+ writeBin(unitInfo[2], |
|
152 |
+ con = con, size = 1, endian = "little") |
|
153 |
+ writeBin(unitInfo[3:6], |
|
154 |
+ con = con, size = 4, endian = "little") |
|
155 |
+ writeBin(unitInfo[7], |
|
156 |
+ con = con, size = 1, endian = "little") |
|
157 |
+ |
|
158 |
+ ## Writing each group in turn |
|
159 |
+ # Number of bytes: (18+64)*nbrOfGroups + 14*totalNbrOfCells bytes |
|
160 |
+ groupDirections <- c(nodirection=0, sense=1, antisense=2, unknown=3); |
|
161 |
+ for(igroup in seq(along.with = unit$groups)) { |
|
162 |
+ group <- unit$groups[[igroup]] |
|
163 |
+ groupdirection <- groupDirections[group$groupdirection]; |
|
164 |
+ groupdirection <- switch(group$groupdirection, |
|
165 |
+ nodirection = 0, |
|
166 |
+ sense = 1, |
|
167 |
+ antisense = 2, |
|
168 |
+ unknown = 3) |
|
169 |
+ groupInfo <- as.integer(c(group$natoms, length(group$x), |
|
170 |
+ group$ncellsperatom, |
|
171 |
+ groupdirection, min(group$atoms, 0))) |
|
172 |
+ # Number of bytes: 2*4+2*1+2*4=18 bytes |
|
173 |
+ writeBin(groupInfo[1:2], |
|
174 |
+ con = con, size = 4, endian = "little") |
|
175 |
+ writeBin(groupInfo[3:4], |
|
176 |
+ con = con, size = 1, endian = "little") |
|
177 |
+ writeBin(groupInfo[5:6], |
|
178 |
+ con = con, size = 4, endian = "little") |
|
179 |
+ |
|
180 |
+ # Number of bytes: 64 bytes |
|
181 |
+ suppressWarnings({ |
|
182 |
+ writeChar(as.character(names(unit$groups)[igroup]), |
|
183 |
+ con = con, nchars = 64, eos = NULL) |
|
184 |
+ }) |
|
185 |
+ |
|
186 |
+ ## Writing each cell in turn |
|
187 |
+# cells <- matrix(as.integer(c(group$atom, group$x, |
|
188 |
+# group$y, group$indexpos)), |
|
189 |
+# ncol = 4) |
|
190 |
+ cells <- matrix(as.integer(c(group$indexpos, group$x, |
|
191 |
+ group$y, group$atom)), |
|
192 |
+ ncol = 4) |
|
193 |
+ |
|
194 |
+ # Number of bytes: 14*nbrOfCells bytes |
|
195 |
+ for(icell in seq(along.with = group$x)) { |
|
196 |
+ # Number of bytes: 1*4+2*2+1*4+1*2=14 bytes |
|
197 |
+ writeBin(cells[icell, 1], |
|
198 |
+ con = con, size = 4, endian = "little") |
|
199 |
+ writeBin(cells[icell, 2:3], |
|
200 |
+ con = con, size = 2, endian = "little") |
|
201 |
+ writeBin(cells[icell, 4], |
|
202 |
+ con = con, size = 4, endian = "little") |
|
203 |
+ writeChar(as.character(c(group$pbase[icell], |
|
204 |
+ group$tbase[icell])), |
|
205 |
+ con = con, nchars = c(1,1), eos = NULL) |
|
206 |
+ } |
|
207 |
+ } |
|
208 |
+} # .writeCdfUnit() |
|
209 |
+ |
|
210 |
+ |
|
211 |
+ |
|
212 |
+.writeCdfQcUnit <- function(qcunit, con) { |
|
213 |
+ ## 2. Actually write the qcunit |
|
214 |
+ type <- switch(qcunit$type, |
|
215 |
+ unknown = 0, |
|
216 |
+ checkerboardNegative = 1, |
|
217 |
+ checkerboardPositive = 2, |
|
218 |
+ hybeNegative = 3, |
|
219 |
+ hybePositive = 4, |
|
220 |
+ textFeaturesNegative = 5, |
|
221 |
+ textFeaturesPositive = 6, |
|
222 |
+ centralNegative = 7, |
|
223 |
+ centralPositive = 8, |
|
224 |
+ geneExpNegative = 9, |
|
225 |
+ geneExpPositive = 10, |
|
226 |
+ cycleFidelityNegative = 11, |
|
227 |
+ cycleFidelityPositive = 12, |
|
228 |
+ centralCrossNegative = 13, |
|
229 |
+ centralCrossPositive = 14, |
|
230 |
+ crossHybeNegative = 15, |
|
231 |
+ crossHybePositive = 16, |
|
232 |
+ SpatialNormNegative = 17, |
|
233 |
+ SpatialNormPositive = 18) |
|
234 |
+ |
|
235 |
+ # Write 2 + 4 bytes |
|
236 |
+ nbrOfBytes <- 6; |
|
237 |
+ qcunitInfo <- as.integer(c(type, qcunit$ncells)) |
|
238 |
+ writeBin(qcunitInfo[1], con = con, size = 2, endian = "little") |
|
239 |
+ writeBin(qcunitInfo[2], con = con, size = 4, endian = "little") |
|
240 |
+ |
|
241 |
+ # Write 2 + 4 bytes |
|
242 |
+ ncells <- length(qcunit$x); |
|
243 |
+ nbrOfBytes <- 7*ncells; |
|
244 |
+ cells <- matrix(as.integer(c(qcunit$x, qcunit$y, qcunit$length, |
|
245 |
+ qcunit$pm, qcunit$background)), |
|
246 |
+ ncol = 5) |
|
247 |
+ for(icell in seq(along.with = qcunit$x)) { |
|
248 |
+ writeBin(cells[icell, 1:2], con = con, size = 2, endian = "little") |
|
249 |
+ writeBin(cells[icell, 3:5], con = con, size = 1, endian = "little") |
|
250 |
+ } |
|
251 |
+} # .writeCdfQcUnit() |
|
252 |
+ |
|
253 |
+ |
|
254 |
+############################################################################ |
|
255 |
+# HISTORY: |
|
256 |
+# 2007-01-10 /HB |
|
257 |
+# o Added writeCdfHeader(), writeCdfQcUnits() and writeCdfUnits(). With |
|
258 |
+# these it is now possible to build up the CDF in chunks. |
|
259 |
+# o Removed obsolete arguments 'addName' and 'addPositions' and all related |
|
260 |
+# code. Internal variable 'positions' is not needed anymore. |
|
261 |
+# There are no more seek():s in the code. |
|
262 |
+# o Removed obsolete .writeCdfUnit2(). |
|
263 |
+# o Now only every 1000th unit (instead of 100th) is reported. It is now |
|
264 |
+# also a count down. |
|
265 |
+# 2006-12-18 /KS |
|
266 |
+# o Make global replacement "block" -> "group" to maintain consistency |
|
267 |
+# with other code, pursuant to communication from KH. |
|
268 |
+# 2006-10-25 /HB (+KS) |
|
269 |
+# o BUG FIX: .initializeCdf() was writing false file offset for QC units |
|
270 |
+# when the number QC nunits were zero. This would core dump readCdfNnn(). |
|
271 |
+# 2006-09-21 /HB |
|
272 |
+# o BUG FIX: The 'atom' and 'indexpos' fields were swapped. |
|
273 |
+# o Now suppressing warnings "writeChar: more characters requested..." in |
|
274 |
+# writeCdf(). |
|
275 |
+# 2006-09-11 /HB |
|
276 |
+# o BUG FIX: nrows & ncols were swapped in the CDF header. |
|
277 |
+# 2006-09-09 /HB |
|
278 |
+# o Updated writeCdf() has been validate with compareCdfs() on a few arrays. |
|
279 |
+# o With the below "optimizations" writeCdf() now writes Hu6800.CDF with |
|
280 |
+# units in 130s compared to 140s. |
|
281 |
+# o Now initializeCdf() dumps all unit names at once by first building a |
|
282 |
+# raw vector. This is now much faster than before. |
|
283 |
+# o Now writeCdf() does not seek() around in the file anymore. This should |
|
284 |
+# speed up writing at least a bit. |
|
285 |
+# o Made some optimization, which speeds up the writing a bit. Jumping |
|
286 |
+# around in the file with seek() is expensive and should be avoided. |
|
287 |
+# o Rename writeUnit() to writeCdfUnit() and same for the QC function. |
|
288 |
+# o Added more verbose output and better errror messages for writeCdf(). |
|
289 |
+# 2006-09-07 /HB |
|
290 |
+# o Maybe initalizeCdf(), writeUnit(), and writeQcUnit() should be made |
|
291 |
+# private functions of this package. |
|
292 |
+# o Removed textCdf2binCdf() skeleton. See convertCdf() instead. |
|
293 |
+# o Updated writeCdf() such that the connection is guaranteed to be closed |
|
294 |
+# regardless. |
|
295 |
+############################################################################ |