f1e18a54 |
#########################################################################/**
# @RdocFunction updateCel
#
# @title "Updates a CEL file"
#
|
95092fca |
# @synopsis
#
|
f1e18a54 |
# \description{
# @get "title".
# }
|
95092fca |
#
|
f1e18a54 |
# \arguments{
# \item{filename}{The filename of the CEL file.}
|
95092fca |
# \item{indices}{A @numeric @vector of cell (probe) indices specifying
|
8b5e38bd |
# which cells to updated. If @NULL, all indices are considered.}
|
f1e18a54 |
# \item{intensities}{A @numeric @vector of intensity values to be stored.
# Alternatively, it can also be a named @data.frame or @matrix (or @list)
# where the named columns (elements) are the fields to be updated.}
# \item{stdvs}{A optional @numeric @vector.}
# \item{pixels}{A optional @numeric @vector.}
|
9d93fc41 |
# \item{writeMap}{An optional write map.}
|
f1e18a54 |
# \item{...}{Not used.}
# \item{verbose}{An @integer specifying how much verbose details are
# outputted.}
# }
|
95092fca |
#
|
f1e18a54 |
# \value{
# Returns (invisibly) the pathname of the file updated.
# }
#
# \details{
# Currently only binary (v4) CEL files are supported.
# The current version of the method does not make use of the Fusion SDK,
# but its own code to navigate and update the CEL file.
# }
#
# @examples "../incl/updateCel.Rex"
#
|
76cf4b26 |
# @author "HB"
|
95092fca |
#
|
f1e18a54 |
# @keyword "file"
# @keyword "IO"
#*/#########################################################################
|
63b4c964 |
updateCel <- function(filename, indices=NULL, intensities=NULL, stdvs=NULL, pixels=NULL, writeMap=NULL, ..., verbose=0) {
|
95092fca |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
f1e18a54 |
# Validate arguments
|
95092fca |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
f1e18a54 |
# Argument 'filename':
if (!file.exists(filename)) {
stop("Cannot update CEL file. File not found: ", filename);
}
header <- readCelHeader(filename);
version <- header$version;
if (!version %in% 4) {
stop("Updating CEL v", version, " files is not supported: ", filename);
}
nbrOfCells <- header$total;
# Argument 'indices':
if (is.null(indices)) {
nbrOfIndices <- nbrOfCells;
} else {
# A CEL file has one-based indices
|
63b4c964 |
r <- range(indices);
if (r[1] < 1 || r[2] > nbrOfCells) {
stop("Argument 'indices' is out of range [1,", nbrOfCells, "]: ",
"[", r[1], ",", r[2], "]");
}
|
f1e18a54 |
nbrOfIndices <- length(indices);
}
# Argument 'intensities':
if (is.matrix(intensities)) {
intensities <- as.data.frame(intensities);
}
if (is.list(intensities) || is.data.frame(intensities)) {
if (is.list(intensities)) {
fields <- names(intensities);
} else {
fields <- colnames(intensities);
}
if (is.null(stdvs) && ("stdvs" %in% fields)) {
stdvs <- intensities[["stdvs"]];
}
if (is.null(pixels) && ("pixels" %in% fields)) {
pixels <- intensities[["pixels"]];
}
if ("intensities" %in% fields) {
intensities <- intensities[["intensities"]];
}
}
# Argument 'intensities':
if (!is.null(intensities)) {
|
f33805a7 |
if (!is.double(intensities))
intensities <- as.double(intensities);
|
f1e18a54 |
if (length(intensities) != nbrOfIndices) {
stop("Number of 'intensities' values does not match the number of cell indices: ", length(intensities), " != ", nbrOfIndices);
}
}
# Argument 'stdvs':
if (!is.null(stdvs)) {
|
f33805a7 |
if (!is.double(stdvs))
stdvs <- as.double(stdvs);
|
f1e18a54 |
if (length(stdvs) != nbrOfIndices) {
stop("Number of 'stdvs' values does not match the number of cell indices: ", length(stdvs), " != ", nbrOfIndices);
}
}
# Argument 'pixels':
if (!is.null(pixels)) {
|
f33805a7 |
if (!is.integer(pixels))
pixels <- as.integer(pixels);
|
f1e18a54 |
if (length(pixels) != nbrOfIndices) {
stop("Number of 'pixels' values does not match the number of cell indices: ", length(pixels), " != ", nbrOfIndices);
}
}
|
63b4c964 |
# Argument 'writeMap':
if (!is.null(writeMap)) {
writeMap <- .assertMap(writeMap, nbrOfCells);
}
|
f1e18a54 |
# Argument 'verbose':
if (length(verbose) != 1)
|
2a5f6145 |
stop("Argument 'verbose' must be a single integer.");
|
f1e18a54 |
verbose <- as.integer(verbose);
if (!is.finite(verbose))
|
2a5f6145 |
stop("Argument 'verbose' must be an integer: ", verbose);
|
f1e18a54 |
# Nothing to do?
if (nbrOfIndices == 0) {
return(invisible(filename));
}
|
95092fca |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
f1e18a54 |
# Reorder data such that it is written in optimal order
|
95092fca |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
63b4c964 |
reorder <- TRUE;
|
8b5e38bd |
if (is.null(indices)) {
|
63b4c964 |
# Has write map?
if (is.null(writeMap)) {
indices <- 1:nbrOfIndices;
reorder <- FALSE;
} else {
indices <- writeMap;
}
}
if (reorder) {
|
f33805a7 |
if (verbose >= 2)
cat("Re-ordering data for optimal write order...");
|
8b5e38bd |
o <- order(indices);
indices <- indices[o];
if (!is.null(intensities))
intensities <- intensities[o];
if (!is.null(stdvs))
stdvs <- stdvs[o];
if (!is.null(pixels))
pixels <- pixels[o];
|
95092fca |
o <- NULL; # Not needed anymore
|
f33805a7 |
if (verbose >= 2)
cat("done.\n");
|
8b5e38bd |
}
|
f1e18a54 |
|
95092fca |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
f1e18a54 |
# Write data to file
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
version <- header$version;
if (version == 4) {
# Open CEL file
con <- file(filename, open="r+b");
on.exit(close(con));
# Skip CEL header
|
f33805a7 |
if (verbose >= 2)
cat("Skipping to beginging of data section...");
|
5f7aff5e |
h <- .readCelHeaderV4(con);
|
f1e18a54 |
|
95092fca |
# "Cell entries - this consists of an intensity value, standard
|
f1e18a54 |
# deviation value and pixel count for each cell in the array.
|
95092fca |
# The values are stored by row then column starting with the X=0,
# Y=0 cell. As an example, the first five entries are for cells
|
f1e18a54 |
# defined by XY coordinates: (0,0), (1,0), (2,0), (3,0), (4,0).
# Type: (float, float, short) = 4 + 4 + 2 = 10 bytes / cell
# cellData <- c(readFloat(con), readFloat(con), readShort(con));
|
8b5e38bd |
sizeOfCell <- 10;
|
f1e18a54 |
# Current file position
|
c448bd63 |
dataOffset <- seek(con, origin="start", rw="read");
|
5f7aff5e |
|
f33805a7 |
if (verbose >= 2)
cat("done.\n");
|
f1e18a54 |
|
8b5e38bd |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
f33805a7 |
# Update in chunks
|
8b5e38bd |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
a92acde3 |
CHUNK.SIZE <- 2^19; # = 524288 indices == 5.2Mb
|
c448bd63 |
CHUNK.SIZE <- 2^20; # = 1048576 indices == 10.5Mb
|
8b5e38bd |
|
f33805a7 |
# Work with zero-based indices
indices <- indices - 1;
|
8b5e38bd |
|
f33805a7 |
count <- 1;
offset <- dataOffset;
|
ede2c2f5 |
nbrOfChunks <- ceiling(length(indices) / CHUNK.SIZE);
|
f33805a7 |
while (length(indices) > 0) {
if (verbose >= 1) {
|
ede2c2f5 |
cat(sprintf("Number of indices left: %d\n", length(indices)));
cat(sprintf("Updating chunk #%d of %d...\n", count, nbrOfChunks));
|
f33805a7 |
}
|
f1e18a54 |
|
f33805a7 |
# Recall: All indices are ordered!
|
f1e18a54 |
|
f33805a7 |
# Shift offset to the first index.
firstIndex <- indices[1];
offset <- offset + sizeOfCell*firstIndex;
|
f1e18a54 |
|
ddf7b0da |
# Shift indices such that first index is zero.
|
f33805a7 |
indices <- indices - firstIndex;
|
8b5e38bd |
|
f33805a7 |
# Get largest index
maxIndex <- indices[length(indices)];
|
a92acde3 |
# verbose && cat(verbose, "Largest index: ", maxIndex, "\n");
|
8b5e38bd |
|
f33805a7 |
# Identify the indices to update such no more than CHUNK.SIZE cells
# are read/updated.
n <- which.max(indices >= CHUNK.SIZE);
if (n == 1)
|
a92acde3 |
n <- length(indices);
|
f33805a7 |
subset <- 1:n;
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Read the data section of the CEL file
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (verbose >= 1)
cat("Reading chunk data section...");
|
c448bd63 |
seek(con, origin="start", where=offset, rw="read");
|
a92acde3 |
rawAll <- readBin(con=con, what="raw", n=sizeOfCell*(indices[n]+1));
|
f33805a7 |
if (verbose >= 1)
cat("done.\n");
# Common to all fields
raw <- NULL;
|
95092fca |
|
f33805a7 |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Update 'intensities'
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (!is.null(intensities)) {
if (verbose >= 1)
cat("Updating 'intensities'...");
# Write floats (size=4) to raw vector
raw <- raw(length=4*n);
raw <- writeBin(con=raw, intensities[subset], size=4, endian="little");
intensities <- intensities[-subset]; # Not needed anymore
# Updated 'rawAll' accordingly
idx <- rep(sizeOfCell*indices[subset], each=4) + 1:4;
rawAll[idx] <- raw;
|
95092fca |
idx <- NULL; # Not needed anymore
|
f33805a7 |
if (verbose >= 1)
cat("done.\n");
}
|
95092fca |
|
f33805a7 |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Update 'stdvs'
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (!is.null(stdvs)) {
if (verbose >= 1)
cat("Updating 'stdvs'...");
# Write floats (size=4) to raw vector
if (length(raw) != 4*n)
raw <- raw(length=4*n);
raw <- writeBin(con=raw, stdvs[subset], size=4, endian="little");
stdvs <- stdvs[-subset]; # Not needed anymore
|
95092fca |
|
f33805a7 |
# Updated 'rawAll' accordingly
idx <- rep(sizeOfCell*indices[subset], each=4) + 5:8;
rawAll[idx] <- raw;
|
95092fca |
idx <- NULL; # Not needed anymore
|
f33805a7 |
if (verbose >= 1)
cat("done.\n");
}
|
95092fca |
|
f33805a7 |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Update 'pixels'
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (!is.null(pixels)) {
|
95092fca |
raw <- NULL; # Not needed anymore
|
f33805a7 |
if (verbose >= 1)
cat("Updating 'pixels'...");
# Write short integers (size=2) to raw vector
raw <- raw(length=2*n);
raw <- writeBin(con=raw, pixels[subset], size=2, endian="little");
pixels <- pixels[-subset]; # Not needed anymore
|
95092fca |
|
f33805a7 |
# Updated 'rawAll' accordingly
idx <- rep(sizeOfCell*indices[subset], each=2) + 9:10;
rawAll[idx] <- raw;
|
95092fca |
idx <- NULL; # Not needed anymore
|
f33805a7 |
if (verbose >= 1)
cat("done.\n");
}
|
95092fca |
raw <- NULL; # Not needed anymore
|
a92acde3 |
|
f33805a7 |
# Remove updated indices
indices <- indices[-subset];
|
95092fca |
subset <- NULL; # Not needed anymore
|
f33805a7 |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Write raw data back to file
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (verbose >= 1)
cat("Writing chunk to data section...");
|
c448bd63 |
seek(con, origin="start", where=offset, rw="write");
|
f33805a7 |
writeBin(con=con, rawAll);
if (verbose >= 1)
cat("done.\n");
|
95092fca |
rawAll <- NULL; # Not needed anymore
|
f33805a7 |
if (verbose >= 1)
|
ede2c2f5 |
cat(sprintf("Updating chunk #%d of %d...done\n", count, nbrOfChunks));
|
f33805a7 |
count <- count + 1;
} # while (...)
|
f1e18a54 |
} # if (version ...)
invisible(filename);
}
############################################################################
# HISTORY:
|
63b4c964 |
# 2007-01-04
# o Added argument 'writeMap'.
|
c448bd63 |
# 2006-08-19
# o BUG FIX: Wow wow wow. This one was tricky to find. If not specifying
# the 'rw' argument in seek() it defaults to "", which is not "read" as
# I naively though (because I did not read the inner details of ?seek),
# but the latest call to seek. In other words, since I at the end of
# every "chunk" loop call seek(..., rw="write") the seek(..., [rw=""])
# was equal to a seek(..., rw="write"), but I wanted seek(..., rw="read")!
# That made updateCel() do funny things and write to the wrongs parts
# of the file etc.
|
a92acde3 |
# 2006-08-18
|
95092fca |
# o BUG FIX: The new implementation where data was written to raw vectors
|
a92acde3 |
# was incorrect. A lot of extra zeros was written. The "eastern egg" in
# the updated example contains an image from http://tpo.berkeley.edu/.
# 2006-08-14
|
95092fca |
# o BUG FIX: updateCel() would in some cases give "Error: subscript out of
|
a92acde3 |
# bounds" when writing the last chunk.
|
f33805a7 |
# 2006-07-22
|
95092fca |
# o Update updateCel() to update data in chunks, because updating the
|
f33805a7 |
# complete data section is expensive. For example, a 500K chip has
# 6553600 cells each of size 10 bytes, i.e. >65Mb or raw memory. With
# copying etc it costs >100-200Mb of memory to update a CEL file if only
# the first *and* the last cell is updated. Now it should only be of
# the order of 10-20Mb per chunk.
# o Added verbose output to updateCel().
# o Now updateCel() deallocates objects as soon as possible in order to
# free up as much memory as possible. Had memory problems with the 500K's.
|
8b5e38bd |
# 2006-07-21
# o updateCel() was really slow when updating a large number of cells.
# Now the idea is to write to raw vectors stored in memory. By reading
# the chunk of the CEL data section that is going to be updated as a raw
# data vector and then updating this in memory first, and the re-write
# that chuck of raw data to file, things are much faster.
# o BUG FIX: updateCel(..., indices=NULL) would generate an error, because
# we tried to reorder by order(indices).
|
f1e18a54 |
# 2006-06-19
# o Replace 'data' argument with arguments 'intensities', 'stdvs', and
# 'pixels'. /HB
# 2006-06-18
|
95092fca |
# o First version can update CEL v4 (binary) cell entries. Note that this
# code does not make use of the Fusion SDK library. This may updated in
|
f1e18a54 |
# the future, but for now we just want something that works.
# o Created. /HB
|
95092fca |
############################################################################
|