git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/affxparser@29522 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -96,6 +96,7 @@ readCcg <- function(pathname, verbose=0, .filter=NULL, ...) { |
96 | 96 |
hasFilter <- TRUE; |
97 | 97 |
} |
98 | 98 |
|
99 |
+ |
|
99 | 100 |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
100 | 101 |
# Open file |
101 | 102 |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
... | ... |
@@ -198,6 +199,7 @@ readCcg <- function(pathname, verbose=0, .filter=NULL, ...) { |
198 | 199 |
names <- character(dataGroupHeader$nbrOfDataSets); |
199 | 200 |
for (kk in seq(along=dss)) { |
200 | 201 |
ds <- .readCcgDataSet(con, fileOffset=offset); |
202 |
+ |
|
201 | 203 |
offset <- ds$nextDataSetStart; |
202 | 204 |
dss[[kk]] <- ds; |
203 | 205 |
names[kk] <- ds$name; |
... | ... |
@@ -208,7 +210,6 @@ readCcg <- function(pathname, verbose=0, .filter=NULL, ...) { |
208 | 210 |
header = dataGroupHeader, |
209 | 211 |
dataSets = dss |
210 | 212 |
); |
211 |
- |
|
212 | 213 |
dataGroups <- c(dataGroups, list(dataGroup)); |
213 | 214 |
} # while (nextDataGroupStart != 0) |
214 | 215 |
names(dataGroups) <- unlist(lapply(dataGroups, FUN=function(dg) { |
... | ... |
@@ -238,8 +239,7 @@ readCcg <- function(pathname, verbose=0, .filter=NULL, ...) { |
238 | 239 |
return(""); |
239 | 240 |
bfr <- readBin(con, what=raw(), n=2*nchars); |
240 | 241 |
bfr <- bfr[seq(from=2, to=length(bfr), by=2)]; |
241 |
- bfr <- as.integer(bfr); |
|
242 |
- bfr <- intToChar(bfr); |
|
242 |
+ bfr <- rawToChar(bfr, multiple=TRUE); |
|
243 | 243 |
bfr <- paste(bfr, collapse=""); |
244 | 244 |
bfr; |
245 | 245 |
} |
... | ... |
@@ -300,14 +300,13 @@ readCcg <- function(pathname, verbose=0, .filter=NULL, ...) { |
300 | 300 |
# 4 INT |
301 | 301 |
# 5 UINT |
302 | 302 |
# 6 FLOAT |
303 |
- # 7 DOUBLE |
|
304 |
- # 8 STRING |
|
305 |
- # 9 WSTRING |
|
303 |
+ # 7 STRING |
|
304 |
+ # 8 WSTRING |
|
306 | 305 |
whats <- c("integer", "integer", "integer", "integer", "integer", |
307 |
- "integer", "double", "double", "character", "character"); |
|
308 |
- names(whats) <- c("BYTE", "UBYTE", "SHORT", "USHORT", "INT", "UINT", "FLOAT", "DOUBLE", "STRING", "WSTRING"); |
|
309 |
- signeds <- c(TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE); |
|
310 |
- sizes <- c(1, 1, 2, 2, 4, 4, 4, 8, 1, 2); |
|
306 |
+ "integer", "double", "character", "character"); |
|
307 |
+ names(whats) <- c("BYTE", "UBYTE", "SHORT", "USHORT", "INT", "UINT", "FLOAT", "STRING", "WSTRING"); |
|
308 |
+ signeds <- c(TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, TRUE, TRUE); |
|
309 |
+ sizes <- c(1, 1, 2, 2, 4, 4, 4, 1, 2); |
|
311 | 310 |
|
312 | 311 |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
313 | 312 |
# Local functions |
... | ... |
@@ -337,8 +336,7 @@ readCcg <- function(pathname, verbose=0, .filter=NULL, ...) { |
337 | 336 |
return(""); |
338 | 337 |
bfr <- readBin(con, what=raw(), n=2*nchars); |
339 | 338 |
bfr <- bfr[seq(from=2, to=length(bfr), by=2)]; |
340 |
- bfr <- as.integer(bfr); |
|
341 |
- bfr <- intToChar(bfr); |
|
339 |
+ bfr <- rawToChar(bfr, multiple=TRUE); |
|
342 | 340 |
bfr <- paste(bfr, collapse=""); |
343 | 341 |
bfr; |
344 | 342 |
} |
... | ... |
@@ -352,7 +350,7 @@ readCcg <- function(pathname, verbose=0, .filter=NULL, ...) { |
352 | 350 |
|
353 | 351 |
readWVT <- function(con, ...) { |
354 | 352 |
name <- readWString(con); |
355 |
- param <- readRaw(con); |
|
353 |
+ raw <- readRaw(con); |
|
356 | 354 |
type <- readWString(con); |
357 | 355 |
|
358 | 356 |
# Update data types |
... | ... |
@@ -364,7 +362,11 @@ readCcg <- function(pathname, verbose=0, .filter=NULL, ...) { |
364 | 362 |
# * text/x-calvin-unsigned-integer-32 |
365 | 363 |
# * text/x-calvin-float |
366 | 364 |
# * text/plain |
365 |
+ |
|
367 | 366 |
n <- length(raw); |
367 |
+ |
|
368 |
+# cat(sprintf("Reading n=%d records of type '%s' named '%s'.\n", n, type, name)); |
|
369 |
+ |
|
368 | 370 |
value <- switch(type, |
369 | 371 |
"text/ascii" = { |
370 | 372 |
rawToChar(raw); |
... | ... |
@@ -387,23 +389,23 @@ readCcg <- function(pathname, verbose=0, .filter=NULL, ...) { |
387 | 389 |
}, |
388 | 390 |
|
389 | 391 |
"text/x-calvin-integer-16" = { |
390 |
- readBin(raw, what=integer(0), endian="big", size=2, signed=TRUE, n=n/2); |
|
392 |
+ readBin(raw, what=integer(0), endian="big", size=2, signed=TRUE, n=n); |
|
391 | 393 |
}, |
392 | 394 |
|
393 | 395 |
"text/x-calvin-unsigned-integer-16" = { |
394 |
- readBin(raw, what=integer(0), endian="big", size=2, signed=FALSE, n=n/2); |
|
396 |
+ readBin(raw, what=integer(0), endian="big", size=2, signed=FALSE, n=n); |
|
395 | 397 |
}, |
396 | 398 |
|
397 | 399 |
"text/x-calvin-integer-32" = { |
398 |
- readBin(raw, what=integer(0), endian="big", size=4, signed=TRUE, n=n/4); |
|
400 |
+ readBin(raw, what=integer(0), endian="big", size=4, signed=TRUE, n=n); |
|
399 | 401 |
}, |
400 | 402 |
|
401 | 403 |
"text/x-calvin-unsigned-integer-32" = { |
402 |
- readBin(raw, what=integer(0), endian="big", size=4, signed=FALSE, n=n/4); |
|
404 |
+ readBin(raw, what=integer(0), endian="big", size=4, signed=FALSE, n=n); |
|
403 | 405 |
}, |
404 | 406 |
|
405 | 407 |
"text/x-calvin-float" = { |
406 |
- readBin(raw, what=double(0), endian="big", size=4, n=n/4); |
|
408 |
+ readBin(raw, what=double(0), endian="big", size=4, n=n); |
|
407 | 409 |
}, |
408 | 410 |
|
409 | 411 |
{ |
... | ... |
@@ -459,7 +461,6 @@ readCcg <- function(pathname, verbose=0, .filter=NULL, ...) { |
459 | 461 |
nextDataSetStart=readUInt(con), |
460 | 462 |
name=readWString(con) |
461 | 463 |
) |
462 |
- |
|
463 | 464 |
# Reading parameters |
464 | 465 |
nbrOfParams <- readInt(con); |
465 | 466 |
params <- vector("list", nbrOfParams); |
... | ... |
@@ -492,6 +493,7 @@ readCcg <- function(pathname, verbose=0, .filter=NULL, ...) { |
492 | 493 |
attr(what, "size") <- size; |
493 | 494 |
colWhats[[cc]] <- what; |
494 | 495 |
} |
496 |
+ names(colWhats) <- names; |
|
495 | 497 |
bytesPerRow <- as.integer(bytesPerRow); |
496 | 498 |
|
497 | 499 |
nbrOfRows <- readUInt(con); |
... | ... |
@@ -511,23 +513,59 @@ readCcg <- function(pathname, verbose=0, .filter=NULL, ...) { |
511 | 513 |
size <- attr(what, "size"); |
512 | 514 |
|
513 | 515 |
if (what == "character") { |
514 |
- rawCol <- matrix(raw[1:4,], nrow=nbrOfRows, ncol=4); |
|
515 |
- nchars <- readInt(con=rawCol, n=nbrOfRows); |
|
516 |
- nchars <- nchars[1]; |
|
517 |
- ccs <- colsOffset + 4 + seq(from=2, to=2*nchars, by=2); |
|
518 |
- value <- intToChar(as.integer(raw[ccs,])); |
|
516 |
+ value <- matrix(raw[1:4,], nrow=nbrOfRows, ncol=4); |
|
517 |
+ raw <- raw[-c(1:4),,drop=FALSE]; |
|
518 |
+ |
|
519 |
+ # Get the number of characters per string (all equal) |
|
520 |
+## nchars <- readInt(con=value, n=nbrOfRows); |
|
521 |
+## nchars <- nchars[1]; |
|
522 |
+ nchars <- readInt(con=value, n=1); |
|
523 |
+ rm(value); |
|
524 |
+ gc <- gc(); |
|
525 |
+ |
|
526 |
+ ccs <- 1:(size-4); |
|
527 |
+ value <- raw[ccs,]; |
|
528 |
+ raw <- raw[-ccs,,drop=FALSE]; |
|
529 |
+ gc <- gc(); |
|
530 |
+ value <- rawToChar(value, multiple=TRUE); |
|
519 | 531 |
dim(value) <- c(nchars, nbrOfRows); |
520 |
- value <- apply(value, MARGIN=2, paste, collapse=""); |
|
532 |
+ gc <- gc(); |
|
533 |
+ |
|
534 |
+ # Build strings using vectorization (not apply()!) |
|
535 |
+ strs <- NULL; |
|
536 |
+ for (pp in seq(length=nrow(value))) { |
|
537 |
+ valuePP <- value[1,,drop=FALSE]; |
|
538 |
+ value <- value[-1,,drop=FALSE]; |
|
539 |
+# gc <- gc(); |
|
540 |
+ if (pp == 1) { |
|
541 |
+ strs <- valuePP; |
|
542 |
+ } else { |
|
543 |
+ strs <- paste(strs, valuePP, sep=""); |
|
544 |
+ } |
|
545 |
+ rm(valuePP); |
|
546 |
+ } |
|
547 |
+ value <- strs; |
|
548 |
+ rm(strs); |
|
521 | 549 |
} else { |
522 |
- ccs <- colsOffset + 1:size; |
|
523 |
- rawCol <- matrix(raw[ccs,], nrow=bytesPerRow, ncol=nbrOfRows, byrow=FALSE); |
|
524 |
- value <- readBin(con=rawCol, what=what, size=size, signed=signed, endian="big", n=nbrOfRows); |
|
550 |
+ ccs <- 1:size; |
|
551 |
+ value <- raw[ccs,,drop=FALSE]; |
|
552 |
+ raw <- raw[-ccs,,drop=FALSE]; |
|
553 |
+ gc <- gc(); |
|
554 |
+ value <- readBin(con=value, what=what, size=size, signed=signed, endian="big", n=nbrOfRows); |
|
525 | 555 |
} |
556 |
+ |
|
557 |
+ # Garbage collect |
|
558 |
+ gc <- gc(); |
|
559 |
+ |
|
526 | 560 |
table[[cc]] <- value; |
527 | 561 |
colsOffset <- colsOffset + size; |
528 | 562 |
} |
529 |
- table <- as.data.frame(table); |
|
530 |
- colnames(table) <- names; |
|
563 |
+ |
|
564 |
+ # Turn into a data frame |
|
565 |
+ attr(table, "row.names") <- .set_row_names(length(table[[1]])); |
|
566 |
+ attr(table, "names") <- names; |
|
567 |
+ class(table) <- "data.frame"; |
|
568 |
+ |
|
531 | 569 |
dataSet$table <- table; |
532 | 570 |
|
533 | 571 |
dataSet; |
... | ... |
@@ -536,6 +574,15 @@ readCcg <- function(pathname, verbose=0, .filter=NULL, ...) { |
536 | 574 |
|
537 | 575 |
############################################################################ |
538 | 576 |
# HISTORY: |
577 |
+# 2008-01-13 |
|
578 |
+# o Removed dependency on intToChar() in R.utils. |
|
579 |
+# o BUG FIX/UPDATE: The file format was updated between April 2006 and |
|
580 |
+# November 2007. More specifically, the so called "Value Types" were |
|
581 |
+# changed/corrected. Before values 7:9 were 'DOUBLE', 'STRING', and |
|
582 |
+# 'WSTRING'. Now 7:8 are 'STRING' and 'WSTRING' and there is no longer |
|
583 |
+# a 'DOUBLE'. |
|
584 |
+# This was detected while trying to read a CNCHP file outputted by the |
|
585 |
+# new Affymetrix Genotyping Console 2.0. We can now read these files. |
|
539 | 586 |
# 2007-08-16 |
540 | 587 |
# o Now it is only readCcg() and readCcgHeader() that are public. The |
541 | 588 |
# other readCcgNnn() functions are renamed to .readCcgNnn(). |