Browse code

git-bioc: Copied updates from Git master branch to Bioconductor SVN

From: hb <[email protected]>

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/affxparser@121021 bc3139a8-67e5-0310-9ffc-ced21a209358

H Bengtsson authored on 16/09/2016 22:01:49
Showing 1 changed files
... ...
@@ -198,7 +198,7 @@
198 198
   ## Writing each group in turn
199 199
   # Number of bytes: (18+64)*nbrOfGroups + 14*totalNbrOfCells bytes
200 200
   groupDirections <- c(nodirection=0, sense=1, antisense=2, unknown=3);
201
-  for(igroup in seq(along.with = unit$groups)) {
201
+  for(igroup in seq_along(unit$groups)) {
202 202
     group <- unit$groups[[igroup]]
203 203
     groupDirection <- groupDirections[group$groupdirection];
204 204
     groupDirection <- switch(group$groupdirection,
... ...
@@ -229,7 +229,7 @@
229 229
                     ncol = 4)
230 230
 
231 231
     # Number of bytes: 14*nbrOfCells bytes
232
-    for(icell in seq(along.with = group$x)) {
232
+    for(icell in seq_along(group$x)) {
233 233
       # Number of bytes: 1*4+2*2+1*4+1*2=14 bytes
234 234
       writeBin(cells[icell, 1],
235 235
                con = con, size = 4, endian = "little")
... ...
@@ -284,7 +284,7 @@
284 284
   cells <- matrix(as.integer(c(qcunit$x, qcunit$y, qcunit$length,
285 285
                                qcunit$pm, qcunit$background)),
286 286
                   ncol = 5)
287
-  for(icell in seq(along.with = qcunit$x)) {
287
+  for(icell in seq_along(qcunit$x)) {
288 288
     writeBin(cells[icell, 1:2], con = con, size = 2, endian = "little")
289 289
     writeBin(cells[icell, 3:5], con = con, size = 1, endian = "little")
290 290
   }
Browse code

Version: 1.33.3 [2013-06-29] o Same updates as in release v1.32.3.

...

Version: 1.32.3 [2013-06-29]
o BUG FIX: Since affxparser v1.30.2/1.31.2 (r72352; 2013-01-08),
writeCdf() would incorrectly encode the unit types, iff the input
'cdf' argument specified them as integers, e.g. as done by
writeCdf() for AffyGenePDInfo in aroma.affymetrix. More
specifically, the unit type index would be off by one, e.g. an
'expression' unit (1) would be encoded as an 'unknown' unit (0)
and so on. On the other hand, if they were specified by their
unit-type names (e.g. 'expression') the encoding should still be
correct, e.g. if input is constructed from readCdf() of affxparser.
Thanks to Guido Hooiveld at Wageningen UR (The Netherlands) for
reporting on this.
o BUG FIX: Similarily, writeCdf() has "always", at least affxparser
v1.7.4 since (r21888; 2007-01-09), encoded unit directions and
QC unit types incorrectly, iff they were specified as integers.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/affxparser@78016 bc3139a8-67e5-0310-9ffc-ced21a209358

H Bengtsson authored on 29/06/2013 10:45:00
Showing 1 changed files
... ...
@@ -158,23 +158,27 @@
158 158
 
159 159
 .writeCdfUnit <- function(unit, con, unitname=NULL) {
160 160
   ## 3. Write the unit
161
-  unitDirections <- c(nodirection=0, sense=1, antisense=2, unknown=3);
162
-  unitDirection <- unitDirections[unit$unitdirection];
163
-  unitType <- switch(unit$unittype,
164
-                     unknown = 0,
165
-                     expression = 1,
166
-                     genotyping = 2,
167
-                     resequencing = 3,
168
-                     tag = 4,
169
-                     copynumber = 5,
170
-                     genotypingcontrol = 6,
171
-                     expressioncontrol = 7)
172
-
173
-  unitDirection <- switch(unit$unitdirection,
174
-                          nodirection = 0,
175
-                          sense = 1,
176
-                          antisense = 2,
177
-                          unknown = 3)
161
+  unitType <- unit$unittype
162
+  if (!is.numeric(unitType)) {
163
+    unitType <- switch(unitType,
164
+                       unknown = 0,
165
+                       expression = 1,
166
+                       genotyping = 2,
167
+                       resequencing = 3,
168
+                       tag = 4,
169
+                       copynumber = 5,
170
+                       genotypingcontrol = 6,
171
+                       expressioncontrol = 7)
172
+  }
173
+
174
+  unitDirection <- unit$unitdirection
175
+  if (!is.numeric(unitDirection)) {
176
+    unitDirection <- switch(unitDirection,
177
+                            nodirection = 0,
178
+                            sense = 1,
179
+                            antisense = 2,
180
+                            unknown = 3)
181
+  }
178 182
 
179 183
   unitInfo <- as.integer(c(unitType, unitDirection,
180 184
                            unit$natoms, length(unit$groups),
... ...
@@ -244,26 +248,29 @@
244 248
 
245 249
 .writeCdfQcUnit <- function(qcunit, con) {
246 250
   ## 2. Actually write the qcunit
247
-  type <- switch(qcunit$type,
248
-                 unknown = 0,
249
-                 checkerboardNegative = 1,
250
-                 checkerboardPositive = 2,
251
-                 hybeNegative = 3,
252
-                 hybePositive = 4,
253
-                 textFeaturesNegative = 5,
254
-                 textFeaturesPositive = 6,
255
-                 centralNegative = 7,
256
-                 centralPositive = 8,
257
-                 geneExpNegative = 9,
258
-                 geneExpPositive = 10,
259
-                 cycleFidelityNegative = 11,
260
-                 cycleFidelityPositive = 12,
261
-                 centralCrossNegative = 13,
262
-                 centralCrossPositive = 14,
263
-                 crossHybeNegative = 15,
264
-                 crossHybePositive = 16,
265
-                 SpatialNormNegative = 17,
266
-                 SpatialNormPositive = 18)
251
+  type <- qcunit$type;
252
+  if (!is.numeric(type)) {
253
+    type <- switch(type,
254
+                   unknown = 0,
255
+                   checkerboardNegative = 1,
256
+                   checkerboardPositive = 2,
257
+                   hybeNegative = 3,
258
+                   hybePositive = 4,
259
+                   textFeaturesNegative = 5,
260
+                   textFeaturesPositive = 6,
261
+                   centralNegative = 7,
262
+                   centralPositive = 8,
263
+                   geneExpNegative = 9,
264
+                   geneExpPositive = 10,
265
+                   cycleFidelityNegative = 11,
266
+                   cycleFidelityPositive = 12,
267
+                   centralCrossNegative = 13,
268
+                   centralCrossPositive = 14,
269
+                   crossHybeNegative = 15,
270
+                   crossHybePositive = 16,
271
+                   SpatialNormNegative = 17,
272
+                   SpatialNormPositive = 18)
273
+  }
267 274
 
268 275
   # Write 2 + 4 bytes
269 276
   nbrOfBytes <- 6;
... ...
@@ -286,6 +293,12 @@
286 293
 
287 294
 ############################################################################
288 295
 # HISTORY:
296
+# 2013-06-29
297
+# o BUG FIX: Since affxparser 1.30.2/1.31.2, .writeCdfUnit() encoded unit
298
+#   types incorrectly, iff specified as integers.
299
+# o BUG FIX: Likewise, .writeCdfUnit() has always encoded unit directions
300
+#   incorrectly, iff specified as integers.  Moreover, .writeCdfQcUnit()
301
+#   has always encoded unit types incorrectly, iff specified as integers.
289 302
 # 2013-05-25 /HB
290 303
 # o Removed all gc() in .initializeCdf().
291 304
 # 2013-01-07 /HB
Browse code

Version: 1.33.2 [2013-05-25] o SPEEDUP: Removed all remaining gc() calls. o SPEEDUP: Replaced all rm() calls with NULL assignments.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/affxparser@76889 bc3139a8-67e5-0310-9ffc-ced21a209358

H Bengtsson authored on 25/05/2013 22:34:42
Showing 1 changed files
... ...
@@ -1,306 +1,293 @@
1 1
 .initializeCdf <- function(con, nRows = 1, nCols = 1,
2
-                          nUnits = 1, nQcUnits = 0,
3
-                          refSeq = "",
4
-                          unitnames = rep("", nUnits),
5
-                          qcUnitLengths = rep(0, nQcUnits),
6
-                          unitLengths = rep(0, nUnits),
7
-                          ...) {
8
-    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9
-    # Validate arguments
10
-    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
11
-    if(length(qcUnitLengths) != nQcUnits) {
12
-      stop("Number of elements in argument 'qcUnitLengths' does not match 'nQcUnits'");
2
+                           nUnits = 1, nQcUnits = 0,
3
+                           refSeq = "",
4
+                           unitnames = rep("", nUnits),
5
+                           qcUnitLengths = rep(0, nQcUnits),
6
+                           unitLengths = rep(0, nUnits),
7
+                           ...) {
8
+  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9
+  # Validate arguments
10
+  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
11
+  if(length(qcUnitLengths) != nQcUnits) {
12
+    stop("Number of elements in argument 'qcUnitLengths' does not match 'nQcUnits'");
13
+  }
14
+
15
+  if(length(unitLengths) != nUnits) {
16
+    stop("Number of elements in argument 'unitLengths' does not match 'nUnits'");
17
+  }
18
+
19
+  if(length(refSeq) != 1) {
20
+    stop("Argument 'refSeq' should be a single character.");
21
+  }
22
+
23
+  lrefSeq <- nchar(refSeq);
24
+
25
+  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
26
+  # CDF header
27
+  #
28
+  # 1 Magic number. Always set to 67.                           [integer]
29
+  # 2 Version number.                                           [integer]
30
+  # 3 The number of columns of cells on the array.       [unsigned short]
31
+  # 4 The number of rows of cells on the array.          [unsigned short]
32
+  # 5 The number of units in the array not including QC units. The term
33
+  #   unit is an internal term which means probe set.           [integer]
34
+  # 6 The number of QC units.                                   [integer]
35
+  # 7 The length of the resequencing reference sequence.        [integer]
36
+  # 8 The resequencing reference sequence.                    [char[len]]
37
+  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
38
+  offset <- 0;
39
+
40
+  ## Magic number and version number
41
+  writeBin(object = as.integer(c(67, 1)),
42
+           con = con, size = 4, endian = "little")
43
+  ## NCols, NRows
44
+  writeBin(object = as.integer(c(nCols, nRows)),
45
+           con = con, size = 2, endian = "little")
46
+  ## NumberUnits, NumberQCUnits
47
+  writeBin(object = as.integer(c(nUnits, nQcUnits)),
48
+           con = con, size = 4, endian = "little")
49
+  ## Length of refSeqsequence
50
+  writeBin(object = as.integer(lrefSeq),
51
+           con = con, size = 4, endian = "little")
52
+  offset <- 24;
53
+
54
+  fOffset <- seek(con=con, origin="start", rw="write");
55
+  if (offset != fOffset) {
56
+    stop("File format write error (step 1): File offset is not the excepted one: ", fOffset, " != ", offset);
57
+  }
58
+
59
+  ## RefSeqsequece
60
+  if(lrefSeq > 0)
61
+    writeChar(as.character(refSeq), con=con, eos=NULL);
62
+
63
+  # Current offset
64
+  offset <- offset + lrefSeq;
65
+
66
+  fOffset <- seek(con=con, origin="start", rw="write");
67
+  if (offset != fOffset) {
68
+    stop("File format write error (step 2): File offset is not the excepted one: ", fOffset, " != ", offset);
69
+  }
70
+
71
+
72
+  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
73
+  # Unit names
74
+  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
75
+  # Write to raw vector (2*10^6 units => 122Mb; should be ok for now)
76
+  # Since we can't create strings with '\0':s, we use '\xFF',
77
+  # write to raw and then replace '\xFF' with '\0'. Thus, unit names with
78
+  # '\xFF' are invalid, but this should not be a real problem.
79
+  pads <- sapply(0:64, FUN=function(x) paste(rep("\xFF", x), collapse=""));
80
+
81
+  # Write the unit names in chunks to save memory
82
+  nbrOfUnits <- length(unitnames);
83
+  chunkSize <- 100000;
84
+  nbrOfChunks <- ceiling(nbrOfUnits / chunkSize);
85
+
86
+  # Allocate raw vector
87
+  raw <- raw(64*chunkSize);
88
+
89
+  for (kk in 1:nbrOfChunks) {
90
+    # Units for this chunk
91
+    from <- (kk-1)*chunkSize+1;
92
+    to <- min(from+chunkSize-1, nbrOfUnits);
93
+    unitnamesFF <- unitnames[from:to];
94
+
95
+    # Pad the unit names
96
+    unitnamesFF <- paste(unitnamesFF, pads[64-nchar(unitnamesFF)], sep="");
97
+
98
+    # Truncate last chunk?
99
+    if (chunkSize > length(unitnamesFF)) {
100
+      raw <- raw[1:(64*length(unitnamesFF))];
13 101
     }
14 102
 
15
-    if(length(unitLengths) != nUnits) {
16
-      stop("Number of elements in argument 'unitLengths' does not match 'nUnits'");
17
-    }
18
-
19
-    if(length(refSeq) != 1)
20
-        stop("Argument 'refSeq' should be a single character.");
21
-
22
-    lrefSeq <- nchar(refSeq);
23
-
24
-    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
25
-    # CDF header
26
-    #
27
-    # 1 Magic number. Always set to 67.                           [integer]
28
-    # 2 Version number.                                           [integer]
29
-    # 3 The number of columns of cells on the array.       [unsigned short]
30
-    # 4 The number of rows of cells on the array.          [unsigned short]
31
-    # 5 The number of units in the array not including QC units. The term
32
-    #   unit is an internal term which means probe set.           [integer]
33
-    # 6 The number of QC units.                                   [integer]
34
-    # 7 The length of the resequencing reference sequence.        [integer]
35
-    # 8 The resequencing reference sequence.                    [char[len]]
36
-    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
37
-    offset <- 0;
38
-
39
-    ## Magic number and version number
40
-    writeBin(object = as.integer(c(67, 1)),
41
-             con = con, size = 4, endian = "little")
42
-    ## NCols, NRows
43
-    writeBin(object = as.integer(c(nCols, nRows)),
44
-             con = con, size = 2, endian = "little")
45
-    ## NumberUnits, NumberQCUnits
46
-    writeBin(object = as.integer(c(nUnits, nQcUnits)),
47
-             con = con, size = 4, endian = "little")
48
-    ## Length of refSeqsequence
49
-    writeBin(object = as.integer(lrefSeq),
50
-             con = con, size = 4, endian = "little")
51
-    offset <- 24;
52
-
53
-    fOffset <- seek(con=con, origin="start", rw="write");
54
-    if (offset != fOffset) {
55
-      stop("File format write error (step 1): File offset is not the excepted one: ", fOffset, " != ", offset);
56
-    }
57
-
58
-    ## RefSeqsequece
59
-    if(lrefSeq > 0)
60
-      writeChar(as.character(refSeq), con=con, eos=NULL);
61
-
62
-    # Current offset
63
-    offset <- offset + lrefSeq;
64
-
65
-    fOffset <- seek(con=con, origin="start", rw="write");
66
-    if (offset != fOffset) {
67
-      stop("File format write error (step 2): File offset is not the excepted one: ", fOffset, " != ", offset);
68
-    }
69
-
70
-
71
-    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
72
-    # Unit names
73
-    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
74
-    # Write to raw vector (2*10^6 units => 122Mb; should be ok for now)
75
-    # Since we can't create strings with '\0':s, we use '\xFF',
76
-    # write to raw and then replace '\xFF' with '\0'. Thus, unit names with
77
-    # '\xFF' are invalid, but this should not be a real problem.
78
-    pads <- sapply(0:64, FUN=function(x) paste(rep("\xFF", x), collapse=""));
79
-
80
-    # Write the unit names in chunks to save memory
81
-    nbrOfUnits <- length(unitnames);
82
-    chunkSize <- 100000;
83
-    nbrOfChunks <- ceiling(nbrOfUnits / chunkSize);
84
-
85
-    # Allocate raw vector
86
-    raw <- raw(64*chunkSize);
87
-
88
-    for (kk in 1:nbrOfChunks) {
89
-      # Units for this chunk
90
-      from <- (kk-1)*chunkSize+1;
91
-      to <- min(from+chunkSize-1, nbrOfUnits);
92
-      unitnamesFF <- unitnames[from:to];
93
-
94
-      # Pad the unit names
95
-      unitnamesFF <- paste(unitnamesFF, pads[64-nchar(unitnamesFF)], sep="");
96
-
97
-      # Truncate last chunk?
98
-      if (chunkSize > length(unitnamesFF)) {
99
-        raw <- raw[1:(64*length(unitnamesFF))];
100
-      }
101
-
102
-      # Write unit names to raw vector
103
-      raw <- writeBin(con=raw, unitnamesFF, size=1);
104
-
105
-      rm(unitnamesFF);
106
-
107
-      # Garbage collect
108
-#      gc <- gc();
109
-#      print(gc);
110
-
111
-      # Replace all '\xFF' with '\0'.
112
-      idxs <- which(raw == as.raw(255));
113
-      raw[idxs] <- as.raw(0);
114
-      rm(idxs);
115
-
116
-      writeBin(con=con, raw);
117
-   } # for (kk in ...)
118
-
119
-   rm(raw);
120
-   # Garbage collect
121
-   gc <- gc();
122
-
123
-#    writeChar(con=con, as.character(unitnames), nchars=rep(64, nUnits), eos=NULL)
124
-
125
-    bytesOfUnitNames <- 64 * nUnits;
126
-    offset <- offset + bytesOfUnitNames;
127
-
128
-    fOffset <- seek(con=con, origin="start", rw="write");
129
-    if (offset != fOffset) {
130
-      stop("File format write error (step 3): File offset is not the excepted one: ", fOffset, " != ", offset);
131
-    }
132
-
133
-    bytesOfQcUnits <- 4 * nQcUnits;
134
-    offset <- offset + bytesOfQcUnits;
135
-
136
-    bytesOfUnits <- 4 * nUnits;
137
-    offset <- offset + bytesOfUnits;
138
-
139
-    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
140
-    # QC units file positions
141
-    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
142
-    if (nQcUnits > 0) {
143
-      csum <- cumsum(qcUnitLengths);
144
-      nextOffset <- csum[nQcUnits];
145
-      starts <- c(0, csum[-nQcUnits]);
146
-      starts <- as.integer(offset + starts);
147
-      writeBin(starts, con = con, size = 4, endian = "little")
148
-    } else {
149
-      nextOffset <- 0;
150
-#      starts <- 0;
151
-#      starts <- as.integer(offset + starts);
152
-#      writeBin(starts, con = con, size = 4, endian = "little")
153
-    }
154
-
155
-    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
156
-    # Units file positions
157
-    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
158
-    offset <- offset + nextOffset;
159
-    if (nUnits > 0) {
160
-      csum <- cumsum(unitLengths);
161
-      nextOffset <- csum[nUnits];
162
-      starts <- c(0, csum[-nUnits]);
163
-      starts <- as.integer(offset + starts);
164
-      writeBin(starts, con = con, size = 4, endian = "little");
165
-    } else {
166
-      nextOffset <- 0;
167
-    }
103
+    # Write unit names to raw vector
104
+    raw <- writeBin(con=raw, unitnamesFF, size=1);
105
+    unitnamesFF <- NULL; # Not needed anymore
106
+
107
+    # Replace all '\xFF' with '\0'.
108
+    idxs <- which(raw == as.raw(255));
109
+    raw[idxs] <- as.raw(0);
110
+    idxs <- NULL; # Not needed anymore
111
+
112
+    writeBin(con=con, raw);
113
+  } # for (kk in ...)
114
+  raw <- NULL; # Not needed anymore
115
+
116
+  bytesOfUnitNames <- 64 * nUnits;
117
+  offset <- offset + bytesOfUnitNames;
118
+
119
+  fOffset <- seek(con=con, origin="start", rw="write");
120
+  if (offset != fOffset) {
121
+    stop("File format write error (step 3): File offset is not the excepted one: ", fOffset, " != ", offset);
122
+  }
123
+
124
+  bytesOfQcUnits <- 4 * nQcUnits;
125
+  offset <- offset + bytesOfQcUnits;
126
+
127
+  bytesOfUnits <- 4 * nUnits;
128
+  offset <- offset + bytesOfUnits;
129
+
130
+  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
131
+  # QC units file positions
132
+  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
133
+  if (nQcUnits > 0) {
134
+    csum <- cumsum(qcUnitLengths);
135
+    nextOffset <- csum[nQcUnits];
136
+    starts <- c(0, csum[-nQcUnits]);
137
+    starts <- as.integer(offset + starts);
138
+    writeBin(starts, con = con, size = 4, endian = "little")
139
+  } else {
140
+    nextOffset <- 0;
141
+  }
142
+
143
+  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
144
+  # Units file positions
145
+  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
146
+  offset <- offset + nextOffset;
147
+  if (nUnits > 0) {
148
+    csum <- cumsum(unitLengths);
149
+    nextOffset <- csum[nUnits];
150
+    starts <- c(0, csum[-nUnits]);
151
+    starts <- as.integer(offset + starts);
152
+    writeBin(starts, con = con, size = 4, endian = "little");
153
+  } else {
154
+    nextOffset <- 0;
155
+  }
168 156
 } # .initializeCdf()
169 157
 
170 158
 
171 159
 .writeCdfUnit <- function(unit, con, unitname=NULL) {
172
-    ## 3. Write the unit
173
-    unitDirections <- c(nodirection=0, sense=1, antisense=2, unknown=3);
174
-    unitDirection <- unitDirections[unit$unitdirection];
175
-    unitType <- switch(unit$unittype,
176
-                       unknown = 0,
177
-                       expression = 1,
178
-                       genotyping = 2,
179
-                       resequencing = 3,
180
-                       tag = 4,
181
-                       copynumber = 5,
182
-                       genotypingcontrol = 6,
183
-                       expressioncontrol = 7)
184
-
185
-    unitDirection <- switch(unit$unitdirection,
186
-                            nodirection = 0,
187
-                            sense = 1,
188
-                            antisense = 2,
189
-                            unknown = 3)
190
-
191
-    unitInfo <- as.integer(c(unitType, unitDirection,
192
-                             unit$natoms, length(unit$groups),
193
-                             unit$ncells, unit$unitnumber,
194
-                             unit$ncellsperatom))
195
-
196
-    # Number of bytes: 2+1+4*4+1=20 bytes
197
-    writeBin(unitInfo[1],
198
-             con = con, size = 2, endian = "little")
199
-    writeBin(unitInfo[2],
200
-             con = con, size = 1, endian = "little")
201
-    writeBin(unitInfo[3:6],
160
+  ## 3. Write the unit
161
+  unitDirections <- c(nodirection=0, sense=1, antisense=2, unknown=3);
162
+  unitDirection <- unitDirections[unit$unitdirection];
163
+  unitType <- switch(unit$unittype,
164
+                     unknown = 0,
165
+                     expression = 1,
166
+                     genotyping = 2,
167
+                     resequencing = 3,
168
+                     tag = 4,
169
+                     copynumber = 5,
170
+                     genotypingcontrol = 6,
171
+                     expressioncontrol = 7)
172
+
173
+  unitDirection <- switch(unit$unitdirection,
174
+                          nodirection = 0,
175
+                          sense = 1,
176
+                          antisense = 2,
177
+                          unknown = 3)
178
+
179
+  unitInfo <- as.integer(c(unitType, unitDirection,
180
+                           unit$natoms, length(unit$groups),
181
+                           unit$ncells, unit$unitnumber,
182
+                           unit$ncellsperatom))
183
+
184
+  # Number of bytes: 2+1+4*4+1=20 bytes
185
+  writeBin(unitInfo[1],
186
+           con = con, size = 2, endian = "little")
187
+  writeBin(unitInfo[2],
188
+           con = con, size = 1, endian = "little")
189
+  writeBin(unitInfo[3:6],
190
+           con = con, size = 4, endian = "little")
191
+  writeBin(unitInfo[7],
192
+           con = con, size = 1, endian = "little")
193
+
194
+  ## Writing each group in turn
195
+  # Number of bytes: (18+64)*nbrOfGroups + 14*totalNbrOfCells bytes
196
+  groupDirections <- c(nodirection=0, sense=1, antisense=2, unknown=3);
197
+  for(igroup in seq(along.with = unit$groups)) {
198
+    group <- unit$groups[[igroup]]
199
+    groupDirection <- groupDirections[group$groupdirection];
200
+    groupDirection <- switch(group$groupdirection,
201
+                             nodirection = 0,
202
+                             sense = 1,
203
+                             antisense = 2,
204
+                             unknown = 3)
205
+    groupInfo <- as.integer(c(group$natoms, length(group$x),
206
+                              group$ncellsperatom,
207
+                              groupDirection, min(group$atoms, 0)))
208
+    # Number of bytes: 2*4+2*1+2*4=18 bytes
209
+    writeBin(groupInfo[1:2],
202 210
              con = con, size = 4, endian = "little")
203
-    writeBin(unitInfo[7],
211
+    writeBin(groupInfo[3:4],
204 212
              con = con, size = 1, endian = "little")
213
+    writeBin(groupInfo[5:6],
214
+             con = con, size = 4, endian = "little")
205 215
 
206
-    ## Writing each group in turn
207
-    # Number of bytes: (18+64)*nbrOfGroups + 14*totalNbrOfCells bytes
208
-    groupDirections <- c(nodirection=0, sense=1, antisense=2, unknown=3);
209
-    for(igroup in seq(along.with = unit$groups)) {
210
-        group <- unit$groups[[igroup]]
211
-        groupDirection <- groupDirections[group$groupdirection];
212
-        groupDirection <- switch(group$groupdirection,
213
-                                 nodirection = 0,
214
-                                 sense = 1,
215
-                                 antisense = 2,
216
-                                 unknown = 3)
217
-        groupInfo <- as.integer(c(group$natoms, length(group$x),
218
-                                  group$ncellsperatom,
219
-                                  groupDirection, min(group$atoms, 0)))
220
-       # Number of bytes: 2*4+2*1+2*4=18 bytes
221
-        writeBin(groupInfo[1:2],
222
-                 con = con, size = 4, endian = "little")
223
-        writeBin(groupInfo[3:4],
224
-                 con = con, size = 1, endian = "little")
225
-        writeBin(groupInfo[5:6],
226
-                 con = con, size = 4, endian = "little")
227
-
228
-        # Number of bytes: 64 bytes
229
-        suppressWarnings({
230
-          writeChar(as.character(names(unit$groups)[igroup]),
231
-                    con = con, nchars = 64, eos = NULL)
232
-        })
233
-
234
-        ## Writing each cell in turn
235
-#        cells <- matrix(as.integer(c(group$atom, group$x,
236
-#                                     group$y, group$indexpos)),
237
-#                        ncol = 4)
238
-        cells <- matrix(as.integer(c(group$indexpos, group$x,
239
-                                     group$y, group$atom)),
240
-                        ncol = 4)
241
-
242
-        # Number of bytes: 14*nbrOfCells bytes
243
-        for(icell in seq(along.with = group$x)) {
244
-            # Number of bytes: 1*4+2*2+1*4+1*2=14 bytes
245
-            writeBin(cells[icell, 1],
246
-                     con = con, size = 4, endian = "little")
247
-            writeBin(cells[icell, 2:3],
248
-                     con = con, size = 2, endian = "little")
249
-            writeBin(cells[icell, 4],
250
-                     con = con, size = 4, endian = "little")
251
-            writeChar(as.character(c(group$pbase[icell],
252
-                                     group$tbase[icell])),
253
-                      con = con, nchars = c(1,1), eos = NULL)
254
-        }
255
-    }
216
+    # Number of bytes: 64 bytes
217
+    suppressWarnings({
218
+      writeChar(as.character(names(unit$groups)[igroup]),
219
+                con = con, nchars = 64, eos = NULL)
220
+    })
221
+
222
+    ## Writing each cell in turn
223
+    cells <- matrix(as.integer(c(group$indexpos, group$x,
224
+                                 group$y, group$atom)),
225
+                    ncol = 4)
226
+
227
+    # Number of bytes: 14*nbrOfCells bytes
228
+    for(icell in seq(along.with = group$x)) {
229
+      # Number of bytes: 1*4+2*2+1*4+1*2=14 bytes
230
+      writeBin(cells[icell, 1],
231
+               con = con, size = 4, endian = "little")
232
+      writeBin(cells[icell, 2:3],
233
+               con = con, size = 2, endian = "little")
234
+      writeBin(cells[icell, 4],
235
+               con = con, size = 4, endian = "little")
236
+      writeChar(as.character(c(group$pbase[icell],
237
+                               group$tbase[icell])),
238
+                con = con, nchars = c(1,1), eos = NULL)
239
+    } # for (icell ...)
240
+  } # for (igroup ...)
256 241
 } # .writeCdfUnit()
257 242
 
258 243
 
259 244
 
260 245
 .writeCdfQcUnit <- function(qcunit, con) {
261
-    ## 2. Actually write the qcunit
262
-    type <- switch(qcunit$type,
263
-                   unknown = 0,
264
-                   checkerboardNegative = 1,
265
-                   checkerboardPositive = 2,
266
-                   hybeNegative = 3,
267
-                   hybePositive = 4,
268
-                   textFeaturesNegative = 5,
269
-                   textFeaturesPositive = 6,
270
-                   centralNegative = 7,
271
-                   centralPositive = 8,
272
-                   geneExpNegative = 9,
273
-                   geneExpPositive = 10,
274
-                   cycleFidelityNegative = 11,
275
-                   cycleFidelityPositive = 12,
276
-                   centralCrossNegative = 13,
277
-                   centralCrossPositive = 14,
278
-                   crossHybeNegative = 15,
279
-                   crossHybePositive = 16,
280
-                   SpatialNormNegative = 17,
281
-                   SpatialNormPositive = 18)
282
-
283
-    # Write 2 + 4 bytes
284
-    nbrOfBytes <- 6;
285
-    qcunitInfo <- as.integer(c(type, qcunit$ncells))
286
-    writeBin(qcunitInfo[1], con = con, size = 2, endian = "little")
287
-    writeBin(qcunitInfo[2], con = con, size = 4, endian = "little")
288
-
289
-    # Write 2 + 4 bytes
290
-    nCells <- length(qcunit$x);
291
-    nbrOfBytes <- 7*nCells;
292
-    cells <- matrix(as.integer(c(qcunit$x, qcunit$y, qcunit$length,
293
-                                 qcunit$pm, qcunit$background)),
294
-                    ncol = 5)
295
-    for(icell in seq(along.with = qcunit$x)) {
296
-        writeBin(cells[icell, 1:2], con = con, size = 2, endian = "little")
297
-        writeBin(cells[icell, 3:5], con = con, size = 1, endian = "little")
298
-    }
246
+  ## 2. Actually write the qcunit
247
+  type <- switch(qcunit$type,
248
+                 unknown = 0,
249
+                 checkerboardNegative = 1,
250
+                 checkerboardPositive = 2,
251
+                 hybeNegative = 3,
252
+                 hybePositive = 4,
253
+                 textFeaturesNegative = 5,
254
+                 textFeaturesPositive = 6,
255
+                 centralNegative = 7,
256
+                 centralPositive = 8,
257
+                 geneExpNegative = 9,
258
+                 geneExpPositive = 10,
259
+                 cycleFidelityNegative = 11,
260
+                 cycleFidelityPositive = 12,
261
+                 centralCrossNegative = 13,
262
+                 centralCrossPositive = 14,
263
+                 crossHybeNegative = 15,
264
+                 crossHybePositive = 16,
265
+                 SpatialNormNegative = 17,
266
+                 SpatialNormPositive = 18)
267
+
268
+  # Write 2 + 4 bytes
269
+  nbrOfBytes <- 6;
270
+  qcunitInfo <- as.integer(c(type, qcunit$ncells))
271
+  writeBin(qcunitInfo[1], con = con, size = 2, endian = "little")
272
+  writeBin(qcunitInfo[2], con = con, size = 4, endian = "little")
273
+
274
+  # Write 2 + 4 bytes
275
+  nCells <- length(qcunit$x);
276
+  nbrOfBytes <- 7*nCells;
277
+  cells <- matrix(as.integer(c(qcunit$x, qcunit$y, qcunit$length,
278
+                               qcunit$pm, qcunit$background)),
279
+                  ncol = 5)
280
+  for(icell in seq(along.with = qcunit$x)) {
281
+    writeBin(cells[icell, 1:2], con = con, size = 2, endian = "little")
282
+    writeBin(cells[icell, 3:5], con = con, size = 1, endian = "little")
283
+  }
299 284
 } # .writeCdfQcUnit()
300 285
 
301 286
 
302 287
 ############################################################################
303 288
 # HISTORY:
289
+# 2013-05-25 /HB
290
+# o Removed all gc() in .initializeCdf().
304 291
 # 2013-01-07 /HB
305 292
 # o GENERALIZATION: .writeCdfUnit() now also encodes unit types
306 293
 #   'genotypingcontrol' and 'expressioncontrol'.
Browse code

Version: 1.31.2 [2013-01-07] o BUG FIX: writeCdf() did not encode unit types as decoded by readCdf(). Unit type 'unknown' was incorrectly encoded such that readCdf() would decode it as 'copynumber'. Also, unit types 'genotypingcontrol' and 'expressioncontrol' where not encoded at all.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/affxparser@72352 bc3139a8-67e5-0310-9ffc-ced21a209358

H Bengtsson authored on 08/01/2013 02:58:16
Showing 1 changed files
... ...
@@ -170,30 +170,17 @@
170 170
 
171 171
 .writeCdfUnit <- function(unit, con, unitname=NULL) {
172 172
     ## 3. Write the unit
173
-##    unitTypes <- c(expression=1, genotyping=2, tag=3,
174
-##                                             resequencing=4, unknown=5);
175
-##
176
-##    unitType <- unitTypes[unit$unittype];
177 173
     unitDirections <- c(nodirection=0, sense=1, antisense=2, unknown=3);
178 174
     unitDirection <- unitDirections[unit$unitdirection];
179
-
180
-##    unitType <- switch(unit$unittype,
181
-##                       expression = 1,
182
-##                       genotyping = 2,
183
-##                       tag = 3,
184
-##                       resequencing = 4,
185
-##                       unknown = 5)
186
-
187
-    # In some version of the Fusion SDK documentation, the unit type 
188
-    # with value 5 (five) was labelled "unknown".  For backward 
189
-    # compatibility we recognize input value "unknown" as well.
190 175
     unitType <- switch(unit$unittype,
176
+                       unknown = 0,
191 177
                        expression = 1,
192 178
                        genotyping = 2,
193 179
                        resequencing = 3,
194 180
                        tag = 4,
195 181
                        copynumber = 5,
196
-                       unknown = 5)
182
+                       genotypingcontrol = 6,
183
+                       expressioncontrol = 7)
197 184
 
198 185
     unitDirection <- switch(unit$unitdirection,
199 186
                             nodirection = 0,
... ...
@@ -314,6 +301,11 @@
314 301
 
315 302
 ############################################################################
316 303
 # HISTORY:
304
+# 2013-01-07 /HB
305
+# o GENERALIZATION: .writeCdfUnit() now also encodes unit types
306
+#   'genotypingcontrol' and 'expressioncontrol'.
307
+# o BUG FIX: .writeCdfUnit() incorrectly encoded the 'unknown' unit type
308
+#   as 5 and not 0.
317 309
 # 2008-08-09 /HB
318 310
 # o BUG FIX: .writeCdfUnit() did output unit type 'resequencing' and 'tag'
319 311
 #   as 4 and 3, and not 3 and 4, respectively.
Browse code

Version: 1.29.1 [2012-05-18] o Replaced several throw() with stop(), because the former assumes that R.methodsS3 is loaded, which it may not be. o ROBUSTNESS: Added a system test forvalidating that the package can write and read a CDF. The test is spawning of another R process so that the test is robust against core dumps.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/affxparser@66038 bc3139a8-67e5-0310-9ffc-ced21a209358

Henrik Bengtsson authored on 19/05/2012 06:18:26
Showing 1 changed files
... ...
@@ -52,7 +52,7 @@
52 52
 
53 53
     fOffset <- seek(con=con, origin="start", rw="write");
54 54
     if (offset != fOffset) {
55
-      throw("File format write error (step 1): File offset is not the excepted one: ", fOffset, " != ", offset);
55
+      stop("File format write error (step 1): File offset is not the excepted one: ", fOffset, " != ", offset);
56 56
     }
57 57
 
58 58
     ## RefSeqsequece
... ...
@@ -64,7 +64,7 @@
64 64
 
65 65
     fOffset <- seek(con=con, origin="start", rw="write");
66 66
     if (offset != fOffset) {
67
-      throw("File format write error (step 2): File offset is not the excepted one: ", fOffset, " != ", offset);
67
+      stop("File format write error (step 2): File offset is not the excepted one: ", fOffset, " != ", offset);
68 68
     }
69 69
 
70 70
 
... ...
@@ -127,7 +127,7 @@
127 127
 
128 128
     fOffset <- seek(con=con, origin="start", rw="write");
129 129
     if (offset != fOffset) {
130
-      throw("File format write error (step 3): File offset is not the excepted one: ", fOffset, " != ", offset);
130
+      stop("File format write error (step 3): File offset is not the excepted one: ", fOffset, " != ", offset);
131 131
     }
132 132
 
133 133
     bytesOfQcUnits <- 4 * nQcUnits;
Browse code

## Will wait to bump/rebuild this until further validated: ## ## Version: 1.13.5 [2008-08-09] ## o BUG FIX: writeCdf() would write 'CustomSeq' units ## as 'Tag' units, and vice versa. This means that ## *ASCII* CDFs containing such units and converted with ## convertCdf() would be have an incorrect unit type for ## these units. Also, unit type 'Copy Number' is ## reported as "copynumber" and no longer as "unknown". ## o BUG FIX: The increase of the internal buffer for ## reading the 'refseq' header field of ASCII CDFs that ## was done in v1.11.2 was mistakenly undone in v1.13.3. ## o Made readCdf() recognize more unit types.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/affxparser@33157 bc3139a8-67e5-0310-9ffc-ced21a209358

Henrik Bengtsson authored on 10/08/2008 09:06:09
Showing 1 changed files
... ...
@@ -170,18 +170,31 @@
170 170
 
171 171
 .writeCdfUnit <- function(unit, con, unitname=NULL) {
172 172
     ## 3. Write the unit
173
-    unitTypes <- c(expression=1, genotyping=2, tag=3,
174
-                                             resequencing=4, unknown=5);
175
-    unitType <- unitTypes[unit$unittype];
173
+##    unitTypes <- c(expression=1, genotyping=2, tag=3,
174
+##                                             resequencing=4, unknown=5);
175
+##
176
+##    unitType <- unitTypes[unit$unittype];
176 177
     unitDirections <- c(nodirection=0, sense=1, antisense=2, unknown=3);
177 178
     unitDirection <- unitDirections[unit$unitdirection];
178 179
 
180
+##    unitType <- switch(unit$unittype,
181
+##                       expression = 1,
182
+##                       genotyping = 2,
183
+##                       tag = 3,
184
+##                       resequencing = 4,
185
+##                       unknown = 5)
186
+
187
+    # In some version of the Fusion SDK documentation, the unit type 
188
+    # with value 5 (five) was labelled "unknown".  For backward 
189
+    # compatibility we recognize input value "unknown" as well.
179 190
     unitType <- switch(unit$unittype,
180 191
                        expression = 1,
181 192
                        genotyping = 2,
182
-                       tag = 3,
183
-                       resequencing = 4,
193
+                       resequencing = 3,
194
+                       tag = 4,
195
+                       copynumber = 5,
184 196
                        unknown = 5)
197
+
185 198
     unitDirection <- switch(unit$unitdirection,
186 199
                             nodirection = 0,
187 200
                             sense = 1,
... ...
@@ -301,8 +314,11 @@
301 314
 
302 315
 ############################################################################
303 316
 # HISTORY:
317
+# 2008-08-09 /HB
318
+# o BUG FIX: .writeCdfUnit() did output unit type 'resequencing' and 'tag'
319
+#   as 4 and 3, and not 3 and 4, respectively.
304 320
 # 2007-11-13 /KH
305
-# o BUG FIX: The rrror message in internal .initializeCdf() would mention
321
+# o BUG FIX: The error message in internal .initializeCdf() would mention
306 322
 #   'qcUnitLengths' when it was meant to say 'unitLengths'.
307 323
 # 2007-07-13 /HB
308 324
 # o While writing unit names in .initializeCdf(), quite a few copies were
Browse code

# Added HISTORY comment to writeCdf.private.R. # Corrected JHB comment CDFFileData.cpp. # Updated inst/HISTORY file with correct updates. # Migrated all BUG FIXES/TYPO FIXES to the release version as well.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/affxparser@29108 bc3139a8-67e5-0310-9ffc-ced21a209358

Henrik Bengtsson authored on 08/12/2007 19:20:14
Showing 1 changed files
... ...
@@ -301,6 +301,9 @@
301 301
 
302 302
 ############################################################################
303 303
 # HISTORY:
304
+# 2007-11-13 /KH
305
+# o BUG FIX: The rrror message in internal .initializeCdf() would mention
306
+#   'qcUnitLengths' when it was meant to say 'unitLengths'.
304 307
 # 2007-07-13 /HB
305 308
 # o While writing unit names in .initializeCdf(), quite a few copies were
306 309
 #   created using up a lot of memory.  By removing unused objects and
Browse code

Bugfix in .initializeCdf thanks to bug report from E. Purdum

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/affxparser@28691 bc3139a8-67e5-0310-9ffc-ced21a209358

Kasper D. Hansen authored on 13/11/2007 23:42:35
Showing 1 changed files
... ...
@@ -13,7 +13,7 @@
13 13
     }
14 14
 
15 15
     if(length(unitLengths) != nUnits) {
16
-      stop("Number of elements in argument 'qcUnitLengths' does not match 'nUnits'");
16
+      stop("Number of elements in argument 'unitLengths' does not match 'nUnits'");
17 17
     }
18 18
 
19 19
     if(length(refSeq) != 1)
... ...
@@ -28,7 +28,7 @@
28 28
     # 2 Version number.                                           [integer]
29 29
     # 3 The number of columns of cells on the array.       [unsigned short]
30 30
     # 4 The number of rows of cells on the array.          [unsigned short]
31
-    # 5 The number of units in the array not including QC units. The term 
31
+    # 5 The number of units in the array not including QC units. The term
32 32
     #   unit is an internal term which means probe set.           [integer]
33 33
     # 6 The number of QC units.                                   [integer]
34 34
     # 7 The length of the resequencing reference sequence.        [integer]
... ...
@@ -53,8 +53,8 @@
53 53
     fOffset <- seek(con=con, origin="start", rw="write");
54 54
     if (offset != fOffset) {
55 55
       throw("File format write error (step 1): File offset is not the excepted one: ", fOffset, " != ", offset);
56
-    }   
57
- 
56
+    }
57
+
58 58
     ## RefSeqsequece
59 59
     if(lrefSeq > 0)
60 60
       writeChar(as.character(refSeq), con=con, eos=NULL);
... ...
@@ -65,7 +65,7 @@
65 65
     fOffset <- seek(con=con, origin="start", rw="write");
66 66
     if (offset != fOffset) {
67 67
       throw("File format write error (step 2): File offset is not the excepted one: ", fOffset, " != ", offset);
68
-    }   
68
+    }
69 69
 
70 70
 
71 71
     # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
... ...
@@ -128,7 +128,7 @@
128 128
     fOffset <- seek(con=con, origin="start", rw="write");
129 129
     if (offset != fOffset) {
130 130
       throw("File format write error (step 3): File offset is not the excepted one: ", fOffset, " != ", offset);
131
-    }   
131
+    }
132 132
 
133 133
     bytesOfQcUnits <- 4 * nQcUnits;
134 134
     offset <- offset + bytesOfQcUnits;
... ...
@@ -170,7 +170,7 @@
170 170
 
171 171
 .writeCdfUnit <- function(unit, con, unitname=NULL) {
172 172
     ## 3. Write the unit
173
-    unitTypes <- c(expression=1, genotyping=2, tag=3, 
173
+    unitTypes <- c(expression=1, genotyping=2, tag=3,
174 174
                                              resequencing=4, unknown=5);
175 175
     unitType <- unitTypes[unit$unittype];
176 176
     unitDirections <- c(nodirection=0, sense=1, antisense=2, unknown=3);
... ...
@@ -193,7 +193,7 @@
193 193
                              unit$ncells, unit$unitnumber,
194 194
                              unit$ncellsperatom))
195 195
 
196
-    # Number of bytes: 2+1+4*4+1=20 bytes    
196
+    # Number of bytes: 2+1+4*4+1=20 bytes
197 197
     writeBin(unitInfo[1],
198 198
              con = con, size = 2, endian = "little")
199 199
     writeBin(unitInfo[2],
... ...
@@ -228,7 +228,7 @@
228 228
         # Number of bytes: 64 bytes
229 229
         suppressWarnings({
230 230
           writeChar(as.character(names(unit$groups)[igroup]),
231
-                    con = con, nchars = 64, eos = NULL) 
231
+                    con = con, nchars = 64, eos = NULL)
232 232
         })
233 233
 
234 234
         ## Writing each cell in turn
... ...
@@ -302,7 +302,7 @@
302 302
 ############################################################################
303 303
 # HISTORY:
304 304
 # 2007-07-13 /HB
305
-# o While writing unit names in .initializeCdf(), quite a few copies were 
305
+# o While writing unit names in .initializeCdf(), quite a few copies were
306 306
 #   created using up a lot of memory.  By removing unused objects and
307 307
 #   writing unit names in chunks memory usage is now stable and < 200MB.
308 308
 # 2007-02-01 /HB
... ...
@@ -314,7 +314,7 @@
314 314
 # o Added writeCdfHeader(), writeCdfQcUnits() and writeCdfUnits().  With
315 315
 #   these it is now possible to build up the CDF in chunks.
316 316
 # o Removed obsolete arguments 'addName' and 'addPositions' and all related
317
-#   code.  Internal variable 'positions' is not needed anymore. 
317
+#   code.  Internal variable 'positions' is not needed anymore.
318 318
 #   There are no more seek():s in the code.
319 319
 # o Removed obsolete .writeCdfUnit2().
320 320
 # o Now only every 1000th unit (instead of 100th) is reported. It is now
Browse code

Version: 1.9.2 [2007-07-27] o Optimized writeCdfHeader() for memory. For a CDF with 1,200,000+ units just writing the unit names would consume 1-1.5GB RAM. Now it writes unit names in chunks keeping the memory overhead around 100-200MB. o Made convertCdf() more memory efficient. o BUG FIX: The error message in isCelFile() when the file was not found was broken. o Updated to v1.9.2 on BioC devel.

Version: 1.8.1 [2007-07-26]
o Now affxparser install on OSX with PPC.


Version: 1.7.6 [2007-03-28] (never committed until v1.9.2)
o Modified findCdf() such that it is possible to set an alternative
function for how CDFs are located.


git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/affxparser@26016 bc3139a8-67e5-0310-9ffc-ced21a209358

Henrik Bengtsson authored on 28/07/2007 10:06:17
Showing 1 changed files
... ...
@@ -34,6 +34,8 @@
34 34
     # 7 The length of the resequencing reference sequence.        [integer]
35 35
     # 8 The resequencing reference sequence.                    [char[len]]
36 36
     # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
37
+    offset <- 0;
38
+
37 39
     ## Magic number and version number
38 40
     writeBin(object = as.integer(c(67, 1)),
39 41
              con = con, size = 4, endian = "little")
... ...
@@ -46,12 +48,25 @@
46 48
     ## Length of refSeqsequence
47 49
     writeBin(object = as.integer(lrefSeq),
48 50
              con = con, size = 4, endian = "little")
51
+    offset <- 24;
52
+
53
+    fOffset <- seek(con=con, origin="start", rw="write");
54
+    if (offset != fOffset) {
55
+      throw("File format write error (step 1): File offset is not the excepted one: ", fOffset, " != ", offset);
56
+    }   
57
+ 
49 58
     ## RefSeqsequece
50 59
     if(lrefSeq > 0)
51 60
       writeChar(as.character(refSeq), con=con, eos=NULL);
52 61
 
53 62
     # Current offset
54
-    offset <- 24 + lrefSeq;
63
+    offset <- offset + lrefSeq;
64
+
65
+    fOffset <- seek(con=con, origin="start", rw="write");
66
+    if (offset != fOffset) {
67
+      throw("File format write error (step 2): File offset is not the excepted one: ", fOffset, " != ", offset);
68
+    }   
69
+
55 70
 
56 71
     # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
57 72
     # Unit names
... ...
@@ -61,17 +76,60 @@
61 76
     # write to raw and then replace '\xFF' with '\0'. Thus, unit names with
62 77
     # '\xFF' are invalid, but this should not be a real problem.
63 78
     pads <- sapply(0:64, FUN=function(x) paste(rep("\xFF", x), collapse=""));
64
-    unitnames <- paste(unitnames, pads[64-nchar(unitnames)], sep="");
65
-    raw <- raw(64*length(unitnames));
66
-    raw <- writeBin(con=raw, unitnames, size=1);
67
-    raw[raw == as.raw(255)] <- as.raw(0);
68
-    writeBin(con=con, raw);
69
-    rm(raw);
79
+
80
+    # Write the unit names in chunks to save memory
81
+    nbrOfUnits <- length(unitnames);
82
+    chunkSize <- 100000;
83
+    nbrOfChunks <- ceiling(nbrOfUnits / chunkSize);
84
+
85
+    # Allocate raw vector
86
+    raw <- raw(64*chunkSize);
87
+
88
+    for (kk in 1:nbrOfChunks) {
89
+      # Units for this chunk
90
+      from <- (kk-1)*chunkSize+1;
91
+      to <- min(from+chunkSize-1, nbrOfUnits);
92
+      unitnamesFF <- unitnames[from:to];
93
+
94
+      # Pad the unit names
95
+      unitnamesFF <- paste(unitnamesFF, pads[64-nchar(unitnamesFF)], sep="");
96
+
97
+      # Truncate last chunk?
98
+      if (chunkSize > length(unitnamesFF)) {
99
+        raw <- raw[1:(64*length(unitnamesFF))];
100
+      }
101
+
102
+      # Write unit names to raw vector
103
+      raw <- writeBin(con=raw, unitnamesFF, size=1);
104
+
105
+      rm(unitnamesFF);
106
+
107
+      # Garbage collect
108
+#      gc <- gc();
109
+#      print(gc);
110
+
111
+      # Replace all '\xFF' with '\0'.
112
+      idxs <- which(raw == as.raw(255));
113
+      raw[idxs] <- as.raw(0);
114
+      rm(idxs);
115
+
116
+      writeBin(con=con, raw);
117
+   } # for (kk in ...)
118
+
119
+   rm(raw);
120
+   # Garbage collect
121
+   gc <- gc();
122
+
70 123
 #    writeChar(con=con, as.character(unitnames), nchars=rep(64, nUnits), eos=NULL)
71 124
 
72 125
     bytesOfUnitNames <- 64 * nUnits;
73 126
     offset <- offset + bytesOfUnitNames;
74 127
 
128
+    fOffset <- seek(con=con, origin="start", rw="write");
129
+    if (offset != fOffset) {
130
+      throw("File format write error (step 3): File offset is not the excepted one: ", fOffset, " != ", offset);
131
+    }   
132
+
75 133
     bytesOfQcUnits <- 4 * nQcUnits;
76 134
     offset <- offset + bytesOfQcUnits;
77 135
 
... ...
@@ -243,6 +301,10 @@
243 301
 
244 302
 ############################################################################
245 303
 # HISTORY:
304
+# 2007-07-13 /HB
305
+# o While writing unit names in .initializeCdf(), quite a few copies were 
306
+#   created using up a lot of memory.  By removing unused objects and
307
+#   writing unit names in chunks memory usage is now stable and < 200MB.
246 308
 # 2007-02-01 /HB
247 309
 # o Updated to camel case as much as possible to match JBs updates in the
248 310
 #   branch.
Browse code

o Added missing Rd files. o Converged the method API to the same camel case changes that James Bullard did in the devel branch for those methods that we happened to work on in parallel. o Will commit added/modified changes to the devel branch too.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/affxparser@22355 bc3139a8-67e5-0310-9ffc-ced21a209358

Henrik Bengtsson authored on 01/02/2007 19:02:01
Showing 1 changed files
... ...
@@ -1,35 +1,25 @@
1
-.initializeCdf <- function(con, nrows = 1, ncols = 1,
2
-                          nunits = 1, nqcunits = 0,
3
-                          refseq = "",
4
-                          unitnames = rep("", nunits),
5
-                          qcunitpositions = rep(1, nqcunits),
6
-                          unitpositions = rep(2, nunits),
7
-                          qcUnitLengths = rep(0, nqcunits),
8
-                          unitLengths = rep(0, nunits),
1
+.initializeCdf <- function(con, nRows = 1, nCols = 1,
2
+                          nUnits = 1, nQcUnits = 0,
3
+                          refSeq = "",
4
+                          unitnames = rep("", nUnits),
5
+                          qcUnitLengths = rep(0, nQcUnits),
6
+                          unitLengths = rep(0, nUnits),
9 7
                           ...) {
10 8
     # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
11 9
     # Validate arguments
12 10
     # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
13
-    if(length(qcunitpositions) != nqcunits) {
14
-      stop("Number of elements in argument 'qcunitpositions' does not match 'nqcunits'");
11
+    if(length(qcUnitLengths) != nQcUnits) {
12
+      stop("Number of elements in argument 'qcUnitLengths' does not match 'nQcUnits'");
15 13
     }
16 14
 
17
-    if(length(unitpositions) != nunits) {
18
-      stop("Number of elements in argument 'unitpositions' does not match 'nunits'");
15
+    if(length(unitLengths) != nUnits) {
16
+      stop("Number of elements in argument 'qcUnitLengths' does not match 'nUnits'");
19 17
     }
20 18
 
21
-    if(length(qcUnitLengths) != nqcunits) {
22
-      stop("Number of elements in argument 'qcUnitLengths' does not match 'nqcunits'");
23
-    }
24
-
25
-    if(length(unitLengths) != nunits) {
26
-      stop("Number of elements in argument 'qcUnitLengths' does not match 'nunits'");
27
-    }
28
-
29
-    if(length(refseq) != 1)
30
-        stop("Argument 'refseq' should be a single character.");
19
+    if(length(refSeq) != 1)
20
+        stop("Argument 'refSeq' should be a single character.");
31 21
 
32
-    lrefseq <- nchar(refseq);
22
+    lrefSeq <- nchar(refSeq);
33 23
 
34 24
     # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
35 25
     # CDF header
... ...
@@ -47,21 +37,21 @@
47 37
     ## Magic number and version number
48 38
     writeBin(object = as.integer(c(67, 1)),
49 39
              con = con, size = 4, endian = "little")
50
-    ## Ncols, Nrows
51
-    writeBin(object = as.integer(c(ncols, nrows)),
40
+    ## NCols, NRows
41
+    writeBin(object = as.integer(c(nCols, nRows)),
52 42
              con = con, size = 2, endian = "little")
53 43
     ## NumberUnits, NumberQCUnits
54
-    writeBin(object = as.integer(c(nunits, nqcunits)),
44
+    writeBin(object = as.integer(c(nUnits, nQcUnits)),
55 45
              con = con, size = 4, endian = "little")
56
-    ## Length of refseqsequence
57
-    writeBin(object = as.integer(lrefseq),
46
+    ## Length of refSeqsequence
47
+    writeBin(object = as.integer(lrefSeq),
58 48
              con = con, size = 4, endian = "little")
59
-    ## Refseqsequece
60
-    if(lrefseq > 0)
61
-      writeChar(as.character(refseq), con=con, eos=NULL);
49
+    ## RefSeqsequece
50
+    if(lrefSeq > 0)
51
+      writeChar(as.character(refSeq), con=con, eos=NULL);
62 52
 
63 53
     # Current offset
64
-    offset <- 24 + lrefseq;
54
+    offset <- 24 + lrefSeq;
65 55
 
66 56
     # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
67 57
     # Unit names
... ...
@@ -77,24 +67,24 @@
77 67
     raw[raw == as.raw(255)] <- as.raw(0);
78 68
     writeBin(con=con, raw);
79 69
     rm(raw);
80
-#    writeChar(con=con, as.character(unitnames), nchars=rep(64, nunits), eos=NULL)
70
+#    writeChar(con=con, as.character(unitnames), nchars=rep(64, nUnits), eos=NULL)
81 71
 
82
-    bytesOfUnitNames <- 64 * nunits;
72
+    bytesOfUnitNames <- 64 * nUnits;
83 73
     offset <- offset + bytesOfUnitNames;
84 74
 
85
-    bytesOfQcUnits <- 4 * nqcunits;
75
+    bytesOfQcUnits <- 4 * nQcUnits;
86 76
     offset <- offset + bytesOfQcUnits;
87 77
 
88
-    bytesOfUnits <- 4 * nunits;
78
+    bytesOfUnits <- 4 * nUnits;
89 79
     offset <- offset + bytesOfUnits;
90 80
 
91 81
     # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
92 82
     # QC units file positions
93 83
     # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
94
-    if (nqcunits > 0) {
84
+    if (nQcUnits > 0) {
95 85
       csum <- cumsum(qcUnitLengths);
96
-      nextOffset <- csum[nqcunits];
97
-      starts <- c(0, csum[-nqcunits]);
86
+      nextOffset <- csum[nQcUnits];
87
+      starts <- c(0, csum[-nQcUnits]);
98 88
       starts <- as.integer(offset + starts);
99 89
       writeBin(starts, con = con, size = 4, endian = "little")
100 90
     } else {
... ...
@@ -108,10 +98,10 @@
108 98
     # Units file positions
109 99
     # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
110 100
     offset <- offset + nextOffset;
111
-    if (nunits > 0) {
101
+    if (nUnits > 0) {
112 102
       csum <- cumsum(unitLengths);
113
-      nextOffset <- csum[nunits];
114
-      starts <- c(0, csum[-nunits]);
103
+      nextOffset <- csum[nUnits];
104
+      starts <- c(0, csum[-nUnits]);
115 105
       starts <- as.integer(offset + starts);
116 106
       writeBin(starts, con = con, size = 4, endian = "little");
117 107
     } else {
... ...
@@ -124,23 +114,23 @@
124 114
     ## 3. Write the unit
125 115
     unitTypes <- c(expression=1, genotyping=2, tag=3, 
126 116
                                              resequencing=4, unknown=5);
127
-    unittype <- unitTypes[unit$unittype];
117
+    unitType <- unitTypes[unit$unittype];
128 118
     unitDirections <- c(nodirection=0, sense=1, antisense=2, unknown=3);
129
-    unitdirection <- unitDirections[unit$unitdirection];
119
+    unitDirection <- unitDirections[unit$unitdirection];
130 120
 
131
-    unittype <- switch(unit$unittype,
121
+    unitType <- switch(unit$unittype,
132 122
                        expression = 1,
133 123
                        genotyping = 2,
134 124
                        tag = 3,
135 125
                        resequencing = 4,
136 126
                        unknown = 5)
137
-    unitdirection <- switch(unit$unitdirection,
127
+    unitDirection <- switch(unit$unitdirection,
138 128
                             nodirection = 0,
139 129
                             sense = 1,
140 130
                             antisense = 2,
141 131
                             unknown = 3)
142 132
 
143
-    unitInfo <- as.integer(c(unittype, unitdirection,
133
+    unitInfo <- as.integer(c(unitType, unitDirection,
144 134
                              unit$natoms, length(unit$groups),
145 135
                              unit$ncells, unit$unitnumber,
146 136
                              unit$ncellsperatom))
... ...
@@ -160,15 +150,15 @@
160 150
     groupDirections <- c(nodirection=0, sense=1, antisense=2, unknown=3);
161 151
     for(igroup in seq(along.with = unit$groups)) {
162 152
         group <- unit$groups[[igroup]]
163
-        groupdirection <- groupDirections[group$groupdirection];
164
-        groupdirection <- switch(group$groupdirection,
153
+        groupDirection <- groupDirections[group$groupdirection];
154
+        groupDirection <- switch(group$groupdirection,
165 155
                                  nodirection = 0,
166 156
                                  sense = 1,
167 157
                                  antisense = 2,
168 158
                                  unknown = 3)
169 159
         groupInfo <- as.integer(c(group$natoms, length(group$x),
170 160
                                   group$ncellsperatom,
171
-                                  groupdirection, min(group$atoms, 0)))
161
+                                  groupDirection, min(group$atoms, 0)))
172 162
        # Number of bytes: 2*4+2*1+2*4=18 bytes
173 163
         writeBin(groupInfo[1:2],
174 164
                  con = con, size = 4, endian = "little")
... ...
@@ -239,8 +229,8 @@
239 229
     writeBin(qcunitInfo[2], con = con, size = 4, endian = "little")
240 230
 
241 231
     # Write 2 + 4 bytes
242
-    ncells <- length(qcunit$x);
243
-    nbrOfBytes <- 7*ncells;
232
+    nCells <- length(qcunit$x);
233
+    nbrOfBytes <- 7*nCells;
244 234
     cells <- matrix(as.integer(c(qcunit$x, qcunit$y, qcunit$length,
245 235
                                  qcunit$pm, qcunit$background)),
246 236
                     ncol = 5)
... ...
@@ -253,6 +243,11 @@
253 243
 
254 244
 ############################################################################
255 245
 # HISTORY:
246
+# 2007-02-01 /HB
247
+# o Updated to camel case as much as possible to match JBs updates in the
248
+#   branch.
249
+# o Removed non-used arguments 'unitpositions' and 'qcunitpositions' from
250
+#   .initializeCdf().
256 251
 # 2007-01-10 /HB
257 252
 # o Added writeCdfHeader(), writeCdfQcUnits() and writeCdfUnits().  With
258 253
 #   these it is now possible to build up the CDF in chunks.
... ...
@@ -267,13 +262,13 @@
267 262
 #   with other code, pursuant to communication from KH.
268 263
 # 2006-10-25 /HB (+KS)
269 264
 # o BUG FIX: .initializeCdf() was writing false file offset for QC units
270
-#   when the number QC nunits were zero.  This would core dump readCdfNnn().
265
+#   when the number QC nUnits were zero.  This would core dump readCdfNnn().
271 266
 # 2006-09-21 /HB
272 267
 # o BUG FIX: The 'atom' and 'indexpos' fields were swapped.
273 268
 # o Now suppressing warnings "writeChar: more characters requested..." in
274 269
 #   writeCdf().
275 270
 # 2006-09-11 /HB
276
-# o BUG FIX: nrows & ncols were swapped in the CDF header.
271
+# o BUG FIX: nRows & nCols were swapped in the CDF header.
277 272
 # 2006-09-09 /HB
278 273
 # o Updated writeCdf() has been validate with compareCdfs() on a few arrays.
279 274
 # o With the below "optimizations" writeCdf() now writes Hu6800.CDF with
Browse code

o Added writeCdfHeader(), writeCdfQcUnits() and writeCdfUnits(). These are all used by writeCdf(). They also make it possible to write a CDF in chunks in order to for instance convertCdf() in constant memory. These functions still need to be documented.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/affxparser@21888 bc3139a8-67e5-0310-9ffc-ced21a209358

Henrik Bengtsson authored on 09/01/2007 08:44:30
Showing 1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,295 @@
1
+.initializeCdf <- function(con, nrows = 1, ncols = 1,
2
+                          nunits = 1, nqcunits = 0,
3
+                          refseq = "",
4
+                          unitnames = rep("", nunits),
5
+                          qcunitpositions = rep(1, nqcunits),
6
+                          unitpositions = rep(2, nunits),
7
+                          qcUnitLengths = rep(0, nqcunits),
8
+                          unitLengths = rep(0, nunits),
9
+                          ...) {
10
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
11
+    # Validate arguments
12
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
13
+    if(length(qcunitpositions) != nqcunits) {
14
+      stop("Number of elements in argument 'qcunitpositions' does not match 'nqcunits'");
15
+    }
16
+
17
+    if(length(unitpositions) != nunits) {
18
+      stop("Number of elements in argument 'unitpositions' does not match 'nunits'");
19
+    }
20
+
21
+    if(length(qcUnitLengths) != nqcunits) {
22
+      stop("Number of elements in argument 'qcUnitLengths' does not match 'nqcunits'");
23
+    }
24
+
25
+    if(length(unitLengths) != nunits) {
26
+      stop("Number of elements in argument 'qcUnitLengths' does not match 'nunits'");
27
+    }
28
+
29
+    if(length(refseq) != 1)
30
+        stop("Argument 'refseq' should be a single character.");
31
+
32
+    lrefseq <- nchar(refseq);
33
+
34
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
35
+    # CDF header
36
+    #
37
+    # 1 Magic number. Always set to 67.                           [integer]
38
+    # 2 Version number.                                           [integer]
39
+    # 3 The number of columns of cells on the array.       [unsigned short]
40
+    # 4 The number of rows of cells on the array.          [unsigned short]
41
+    # 5 The number of units in the array not including QC units. The term 
42
+    #   unit is an internal term which means probe set.           [integer]
43
+    # 6 The number of QC units.                                   [integer]
44
+    # 7 The length of the resequencing reference sequence.        [integer]
45
+    # 8 The resequencing reference sequence.                    [char[len]]
46
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
47
+    ## Magic number and version number
48
+    writeBin(object = as.integer(c(67, 1)),
49
+             con = con, size = 4, endian = "little")
50
+    ## Ncols, Nrows
51
+    writeBin(object = as.integer(c(ncols, nrows)),
52
+             con = con, size = 2, endian = "little")
53
+    ## NumberUnits, NumberQCUnits
54
+    writeBin(object = as.integer(c(nunits, nqcunits)),
55
+             con = con, size = 4, endian = "little")
56
+    ## Length of refseqsequence
57
+    writeBin(object = as.integer(lrefseq),
58
+             con = con, size = 4, endian = "little")
59
+    ## Refseqsequece
60
+    if(lrefseq > 0)
61
+      writeChar(as.character(refseq), con=con, eos=NULL);
62
+
63
+    # Current offset
64
+    offset <- 24 + lrefseq;
65
+
66
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
67
+    # Unit names
68
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
69
+    # Write to raw vector (2*10^6 units => 122Mb; should be ok for now)
70
+    # Since we can't create strings with '\0':s, we use '\xFF',
71
+    # write to raw and then replace '\xFF' with '\0'. Thus, unit names with
72
+    # '\xFF' are invalid, but this should not be a real problem.
73
+    pads <- sapply(0:64, FUN=function(x) paste(rep("\xFF", x), collapse=""));
74
+    unitnames <- paste(unitnames, pads[64-nchar(unitnames)], sep="");
75
+    raw <- raw(64*length(unitnames));
76
+    raw <- writeBin(con=raw, unitnames, size=1);
77
+    raw[raw == as.raw(255)] <- as.raw(0);
78
+    writeBin(con=con, raw);
79
+    rm(raw);
80
+#    writeChar(con=con, as.character(unitnames), nchars=rep(64, nunits), eos=NULL)
81
+
82
+    bytesOfUnitNames <- 64 * nunits;
83
+    offset <- offset + bytesOfUnitNames;
84
+
85
+    bytesOfQcUnits <- 4 * nqcunits;
86
+    offset <- offset + bytesOfQcUnits;
87
+
88
+    bytesOfUnits <- 4 * nunits;
89
+    offset <- offset + bytesOfUnits;
90
+
91
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
92
+    # QC units file positions
93
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
94
+    if (nqcunits > 0) {
95
+      csum <- cumsum(qcUnitLengths);
96
+      nextOffset <- csum[nqcunits];
97
+      starts <- c(0, csum[-nqcunits]);
98
+      starts <- as.integer(offset + starts);
99
+      writeBin(starts, con = con, size = 4, endian = "little")
100
+    } else {
101
+      nextOffset <- 0;
102
+#      starts <- 0;
103
+#      starts <- as.integer(offset + starts);
104
+#      writeBin(starts, con = con, size = 4, endian = "little")
105
+    }
106
+
107
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
108
+    # Units file positions
109
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
110
+    offset <- offset + nextOffset;
111
+    if (nunits > 0) {
112
+      csum <- cumsum(unitLengths);
113
+      nextOffset <- csum[nunits];
114
+      starts <- c(0, csum[-nunits]);
115
+      starts <- as.integer(offset + starts);
116
+      writeBin(starts, con = con, size = 4, endian = "little");
117
+    } else {
118
+      nextOffset <- 0;
119
+    }
120
+} # .initializeCdf()
121
+
122
+
123
+.writeCdfUnit <- function(unit, con, unitname=NULL) {
124
+    ## 3. Write the unit
125
+    unitTypes <- c(expression=1, genotyping=2, tag=3, 
126
+                                             resequencing=4, unknown=5);
127
+    unittype <- unitTypes[unit$unittype];
128
+    unitDirections <- c(nodirection=0, sense=1, antisense=2, unknown=3);
129
+    unitdirection <- unitDirections[unit$unitdirection];
130
+
131
+    unittype <- switch(unit$unittype,
132
+                       expression = 1,
133
+                       genotyping = 2,
134
+                       tag = 3,
135
+                       resequencing = 4,
136
+                       unknown = 5)
137
+    unitdirection <- switch(unit$unitdirection,
138
+                            nodirection = 0,
139
+                            sense = 1,
140
+                            antisense = 2,
141
+                            unknown = 3)
142
+
143
+    unitInfo <- as.integer(c(unittype, unitdirection,
144
+                             unit$natoms, length(unit$groups),
145
+                             unit$ncells, unit$unitnumber,
146
+                             unit$ncellsperatom))
147
+
148
+    # Number of bytes: 2+1+4*4+1=20 bytes    
149
+    writeBin(unitInfo[1],
150
+             con = con, size = 2, endian = "little")
151
+    writeBin(unitInfo[2],
152
+             con = con, size = 1, endian = "little")
153
+    writeBin(unitInfo[3:6],
154
+             con = con, size = 4, endian = "little")
155
+    writeBin(unitInfo[7],
156
+             con = con, size = 1, endian = "little")
157
+
158
+    ## Writing each group in turn
159
+    # Number of bytes: (18+64)*nbrOfGroups + 14*totalNbrOfCells bytes
160
+    groupDirections <- c(nodirection=0, sense=1, antisense=2, unknown=3);
161
+    for(igroup in seq(along.with = unit$groups)) {
162
+        group <- unit$groups[[igroup]]
163
+        groupdirection <- groupDirections[group$groupdirection];
164
+        groupdirection <- switch(group$groupdirection,
165
+                                 nodirection = 0,
166
+                                 sense = 1,
167
+                                 antisense = 2,
168
+                                 unknown = 3)
169
+        groupInfo <- as.integer(c(group$natoms, length(group$x),
170
+                                  group$ncellsperatom,
171
+                                  groupdirection, min(group$atoms, 0)))
172
+       # Number of bytes: 2*4+2*1+2*4=18 bytes
173
+        writeBin(groupInfo[1:2],
174
+                 con = con, size = 4, endian = "little")
175
+        writeBin(groupInfo[3:4],
176
+                 con = con, size = 1, endian = "little")
177
+        writeBin(groupInfo[5:6],
178
+                 con = con, size = 4, endian = "little")
179
+
180
+        # Number of bytes: 64 bytes
181
+        suppressWarnings({
182
+          writeChar(as.character(names(unit$groups)[igroup]),
183
+                    con = con, nchars = 64, eos = NULL) 
184
+        })
185
+
186
+        ## Writing each cell in turn
187
+#        cells <- matrix(as.integer(c(group$atom, group$x,
188
+#                                     group$y, group$indexpos)),
189
+#                        ncol = 4)
190
+        cells <- matrix(as.integer(c(group$indexpos, group$x,
191
+                                     group$y, group$atom)),
192
+                        ncol = 4)
193
+
194
+        # Number of bytes: 14*nbrOfCells bytes
195
+        for(icell in seq(along.with = group$x)) {
196
+            # Number of bytes: 1*4+2*2+1*4+1*2=14 bytes
197
+            writeBin(cells[icell, 1],
198
+                     con = con, size = 4, endian = "little")
199
+            writeBin(cells[icell, 2:3],
200
+                     con = con, size = 2, endian = "little")
201
+            writeBin(cells[icell, 4],
202
+                     con = con, size = 4, endian = "little")
203
+            writeChar(as.character(c(group$pbase[icell],
204
+                                     group$tbase[icell])),
205
+                      con = con, nchars = c(1,1), eos = NULL)
206
+        }
207
+    }
208
+} # .writeCdfUnit()
209
+
210
+
211
+
212
+.writeCdfQcUnit <- function(qcunit, con) {
213
+    ## 2. Actually write the qcunit
214
+    type <- switch(qcunit$type,
215
+                   unknown = 0,
216
+                   checkerboardNegative = 1,
217
+                   checkerboardPositive = 2,
218
+                   hybeNegative = 3,
219
+                   hybePositive = 4,
220
+                   textFeaturesNegative = 5,
221
+                   textFeaturesPositive = 6,
222
+                   centralNegative = 7,
223
+                   centralPositive = 8,
224
+                   geneExpNegative = 9,
225
+                   geneExpPositive = 10,
226
+                   cycleFidelityNegative = 11,
227
+                   cycleFidelityPositive = 12,
228
+                   centralCrossNegative = 13,
229
+                   centralCrossPositive = 14,
230
+                   crossHybeNegative = 15,
231
+                   crossHybePositive = 16,
232
+                   SpatialNormNegative = 17,
233
+                   SpatialNormPositive = 18)
234
+
235
+    # Write 2 + 4 bytes
236
+    nbrOfBytes <- 6;
237
+    qcunitInfo <- as.integer(c(type, qcunit$ncells))
238
+    writeBin(qcunitInfo[1], con = con, size = 2, endian = "little")
239
+    writeBin(qcunitInfo[2], con = con, size = 4, endian = "little")
240
+
241
+    # Write 2 + 4 bytes
242
+    ncells <- length(qcunit$x);
243
+    nbrOfBytes <- 7*ncells;
244
+    cells <- matrix(as.integer(c(qcunit$x, qcunit$y, qcunit$length,
245
+                                 qcunit$pm, qcunit$background)),
246
+                    ncol = 5)
247
+    for(icell in seq(along.with = qcunit$x)) {
248
+        writeBin(cells[icell, 1:2], con = con, size = 2, endian = "little")
249
+        writeBin(cells[icell, 3:5], con = con, size = 1, endian = "little")
250
+    }
251
+} # .writeCdfQcUnit()
252
+
253
+
254
+############################################################################
255
+# HISTORY:
256
+# 2007-01-10 /HB
257
+# o Added writeCdfHeader(), writeCdfQcUnits() and writeCdfUnits().  With
258
+#   these it is now possible to build up the CDF in chunks.
259
+# o Removed obsolete arguments 'addName' and 'addPositions' and all related
260
+#   code.  Internal variable 'positions' is not needed anymore. 
261
+#   There are no more seek():s in the code.
262
+# o Removed obsolete .writeCdfUnit2().
263
+# o Now only every 1000th unit (instead of 100th) is reported. It is now
264
+#   also a count down.
265
+# 2006-12-18 /KS
266
+# o Make global replacement "block" -> "group" to maintain consistency
267
+#   with other code, pursuant to communication from KH.
268
+# 2006-10-25 /HB (+KS)
269
+# o BUG FIX: .initializeCdf() was writing false file offset for QC units
270
+#   when the number QC nunits were zero.  This would core dump readCdfNnn().
271
+# 2006-09-21 /HB
272
+# o BUG FIX: The 'atom' and 'indexpos' fields were swapped.
273
+# o Now suppressing warnings "writeChar: more characters requested..." in
274
+#   writeCdf().
275
+# 2006-09-11 /HB
276
+# o BUG FIX: nrows & ncols were swapped in the CDF header.
277
+# 2006-09-09 /HB
278
+# o Updated writeCdf() has been validate with compareCdfs() on a few arrays.
279
+# o With the below "optimizations" writeCdf() now writes Hu6800.CDF with
280
+#   units in 130s compared to 140s.
281
+# o Now initializeCdf() dumps all unit names at once by first building a
282
+#   raw vector.  This is now much faster than before.
283
+# o Now writeCdf() does not seek() around in the file anymore.  This should
284
+#   speed up writing at least a bit.
285
+# o Made some optimization, which speeds up the writing a bit.  Jumping
286
+#   around in the file with seek() is expensive and should be avoided.
287
+# o Rename writeUnit() to writeCdfUnit() and same for the QC function.
288
+# o Added more verbose output and better errror messages for writeCdf().
289
+# 2006-09-07 /HB
290
+# o Maybe initalizeCdf(), writeUnit(), and writeQcUnit() should be made
291
+#   private functions of this package.
292
+# o Removed textCdf2binCdf() skeleton. See convertCdf() instead.
293
+# o Updated writeCdf() such that the connection is guaranteed to be closed
294
+#   regardless.
295
+############################################################################