efd288a9 |
#########################################################################/**
# @RdocFunction readCdfUnitsWriteMap
#
# @title "Generates an Affymetrix cell-index write map from a CDF file"
#
|
95092fca |
# @synopsis
#
|
efd288a9 |
# \description{
# @get "title".
#
# The purpose of this method is to provide a re-ordering of cell elements
|
7ba76859 |
# such that cells in units (probesets) can be stored in contiguous blocks.
|
efd288a9 |
# When reading cell elements unit by unit, minimal file re-position is
# required resulting in a faster reading.
#
|
95092fca |
# Note: At the moment does this package not provide methods to
|
efd288a9 |
# write/reorder CEL files. In the meanwhile, you have to write
# and re-read using your own file format. That's not too hard using
# \code{writeBin()} and @see "base::readBin".
# }
|
95092fca |
#
|
efd288a9 |
# \arguments{
# \item{filename}{The pathname of the CDF file.}
# \item{units}{An @integer @vector of unit indices specifying which units
|
95092fca |
# to listed first. All other units are added in order at the end.
|
efd288a9 |
# If @NULL, units are in order.}
# \item{...}{Additional arguments passed to @see "readCdfUnits".}
# \item{verbose}{Either a @logical, a @numeric, or a @see "R.utils::Verbose"
# object specifying how much verbose/debug information is written to
# standard output. If a Verbose object, how detailed the information is
# is specified by the threshold level of the object. If a numeric, the
|
95092fca |
# value is used to set the threshold of a new Verbose object. If @TRUE,
|
efd288a9 |
# the threshold is set to -1 (minimal). If @FALSE, no output is written
# (and neither is the \pkg{R.utils} package required).}
# }
|
95092fca |
#
|
efd288a9 |
# \value{
# A @integer @vector which is a \emph{write} map.
# }
#
|
76cf4b26 |
# @author "HB"
|
95092fca |
#
|
efd288a9 |
# \examples{
# @include "../incl/readCdfUnitsWriteMap.Rex"
#
# @include "../incl/readCdfUnitsWriteMap.2.Rex"
# }
|
95092fca |
#
|
efd288a9 |
# \seealso{
# To invert maps, see @see "invertMap".
# @see "readCel" and @see "readCelUnits".
# }
|
95092fca |
#
|
efd288a9 |
# @keyword "file"
# @keyword "IO"
|
f1d6fcf0 |
# @keyword "internal"
|
efd288a9 |
#*/#########################################################################
readCdfUnitsWriteMap <- function(filename, units=NULL, ..., verbose=FALSE) {
|
0b615a31 |
# To please R CMD check
Arguments <- enter <- exit <- NULL;
rm(list=c("Arguments", "enter", "exit"));
|
95092fca |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
efd288a9 |
# Validate arguments
|
95092fca |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
efd288a9 |
# Argument 'filename':
# Replace '~':s
filename <- file.path(dirname(filename), basename(filename));
# Argument 'units':
if (!is.null(units)) {
units <- as.numeric(units);
if (any(is.na(units))) {
stop("Argument 'units' contains NAs");
}
nok <- (units < 1);
if (any(nok)) {
nok <- paste(units[nok], collapse=", ");
stop("Argument 'units' contains non-positive indices: ", nok);
}
nok <- duplicated(units);
if (any(nok)) {
nok <- paste(units[nok], collapse=", ");
stop("Argument 'units' contains duplicated indices: ", nok);
}
}
# Argument 'verbose':
if (!identical(verbose, FALSE)) {
|
0e455230 |
requireNamespace("R.utils") || stop("Package not loaded: R.utils");
Arguments <- R.utils::Arguments
enter <- R.utils::enter
exit <- R.utils::exit
|
efd288a9 |
verbose <- Arguments$getVerbose(verbose);
}
|
95092fca |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
efd288a9 |
# Read CDF header and process 'units' further
|
95092fca |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
efd288a9 |
header <- readCdfHeader(filename);
|
65301877 |
nbrOfCells <- header$ncols * header$nrows;
|
efd288a9 |
nbrOfUnits <- header$probesets;
nok <- (units > nbrOfUnits);
if (any(nok)) {
nok <- paste(units[nok], collapse=", ");
stop("Argument 'units' contains indices out of range [1,", nbrOfUnits,
"]: ", nok);
}
|
95092fca |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
efd288a9 |
# Read CDF file
|
95092fca |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
efd288a9 |
# Read cell indices unit by unit
verbose && enter(verbose, "Reading cell indices unit by unit from CDF file");
indices <- readCdfCellIndices(filename, units=units, ..., verbose=FALSE);
verbose && exit(verbose);
|
95092fca |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
efd288a9 |
# Return cell indices according to 'units'
|
95092fca |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
efd288a9 |
if (!is.null(units)) {
verbose && enter(verbose, "Reordering by units");
# Was only a subset of units specified?
if (length(units) != nbrOfUnits) {
verbose && enter(verbose, "Adding missing unit indices");
allUnits <- 1:nbrOfUnits;
missing <- setdiff(allUnits, units);
units <- c(units, missing);
|
95092fca |
missing <- allUnits <- NULL; # Not needed anymore
|
efd288a9 |
verbose && exit(verbose);
}
# Now, reorder the units (here 'indices') accordingly.
indices <- indices[units];
|
95092fca |
units <- NULL; # Not needed anymore
|
efd288a9 |
verbose && exit(verbose);
}
|
95092fca |
indices <- unlist(indices, use.names=FALSE);
|
efd288a9 |
|
95092fca |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
efd288a9 |
# Create index map
|
95092fca |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
efd288a9 |
verbose && enter(verbose, "Adding missing cell indices");
|
95092fca |
# Add non-probeset cells to the end.
|
efd288a9 |
# (Note that readCdfCellIndices() do not read these guys.)
allIndices <- 1:nbrOfCells;
missing <- setdiff(allIndices, indices);
indices <- c(indices, missing);
|
95092fca |
missing <- NULL; # Not needed anymore
|
efd288a9 |
verbose && exit(verbose);
# Returns the write map
indices;
}
############################################################################
# HISTORY:
|
65301877 |
# 2006-09-07
# o BUG FIX: Tried to access fields 'cols' and 'rows' instead of 'ncols'
# and 'nrows' in the CDF header.
|
efd288a9 |
# 2006-04-01
# o Now using readCdfCellIndices() to get cell indices.
# 2006-03-30
# o Redefined and renamed method to readCdfUnitsWriteMap().
# o Removed argument 'writeMap'. This is possible because the new
# invertMap() is so fast.
# 2006-03-28
# o Unit and cell indices are now one-based. /HB
# 2006-03-14
# o Updated code to make use of package R.utils only if it is available.
# o Added argument 'writeMap'.
# o Added more Rdoc comments and two examples with summaries etc.
# 2006-03-06
# o Added argument 'units' to read a subset of units or the units as, say,
# they are order by chromsomal position.
# 2006-03-04
# o Removed all gc(). They slow down quite a bit.
# o Created.
|
95092fca |
############################################################################
|