R/readCdfUnitsWriteMap.R
efd288a9
 #########################################################################/**
 # @RdocFunction readCdfUnitsWriteMap
 #
 # @title "Generates an Affymetrix cell-index write map from a CDF file"
 #
95092fca
 # @synopsis
 #
efd288a9
 # \description{
 #   @get "title".
 #
 #   The purpose of this method is to provide a re-ordering of cell elements
7ba76859
 #   such that cells in units (probesets) can be stored in contiguous blocks.
efd288a9
 #   When reading cell elements unit by unit, minimal file re-position is
 #   required resulting in a faster reading.
 #
95092fca
 #   Note: At the moment does this package not provide methods to
efd288a9
 #   write/reorder CEL files.  In the meanwhile, you have to write
 #   and re-read using your own file format.  That's not too hard using
 #   \code{writeBin()} and @see "base::readBin".
 # }
95092fca
 #
efd288a9
 # \arguments{
 #   \item{filename}{The pathname of the CDF file.}
 #   \item{units}{An @integer @vector of unit indices specifying which units
95092fca
 #     to listed first.  All other units are added in order at the end.
efd288a9
 #     If @NULL, units are in order.}
 #   \item{...}{Additional arguments passed to @see "readCdfUnits".}
 #   \item{verbose}{Either a @logical, a @numeric, or a @see "R.utils::Verbose"
 #     object specifying how much verbose/debug information is written to
 #     standard output. If a Verbose object, how detailed the information is
 #     is specified by the threshold level of the object. If a numeric, the
95092fca
 #     value is used to set the threshold of a new Verbose object. If @TRUE,
efd288a9
 #     the threshold is set to -1 (minimal). If @FALSE, no output is written
 #     (and neither is the \pkg{R.utils} package required).}
 # }
95092fca
 #
efd288a9
 # \value{
 #   A @integer @vector which is a \emph{write} map.
 # }
 #
76cf4b26
 # @author "HB"
95092fca
 #
efd288a9
 # \examples{
 #   @include "../incl/readCdfUnitsWriteMap.Rex"
 #
 #   @include "../incl/readCdfUnitsWriteMap.2.Rex"
 # }
95092fca
 #
efd288a9
 # \seealso{
 #   To invert maps, see @see "invertMap".
 #   @see "readCel" and @see "readCelUnits".
 # }
95092fca
 #
efd288a9
 # @keyword "file"
 # @keyword "IO"
f1d6fcf0
 # @keyword "internal"
efd288a9
 #*/#########################################################################
 readCdfUnitsWriteMap <- function(filename, units=NULL, ..., verbose=FALSE) {
0b615a31
   # To please R CMD check
   Arguments <- enter <- exit <- NULL;
   rm(list=c("Arguments", "enter", "exit"));
 
95092fca
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
efd288a9
   # Validate arguments
95092fca
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
efd288a9
   # Argument 'filename':
   # Replace '~':s
   filename <- file.path(dirname(filename), basename(filename));
 
   # Argument 'units':
   if (!is.null(units)) {
     units <- as.numeric(units);
     if (any(is.na(units))) {
       stop("Argument 'units' contains NAs");
     }
     nok <- (units < 1);
     if (any(nok)) {
       nok <- paste(units[nok], collapse=", ");
       stop("Argument 'units' contains non-positive indices: ", nok);
     }
     nok <- duplicated(units);
     if (any(nok)) {
       nok <- paste(units[nok], collapse=", ");
       stop("Argument 'units' contains duplicated indices: ", nok);
     }
   }
 
   # Argument 'verbose':
   if (!identical(verbose, FALSE)) {
0e455230
     requireNamespace("R.utils") || stop("Package not loaded: R.utils");
     Arguments <- R.utils::Arguments
     enter <- R.utils::enter
     exit <- R.utils::exit
efd288a9
     verbose <- Arguments$getVerbose(verbose);
   }
 
 
95092fca
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
efd288a9
   # Read CDF header and process 'units' further
95092fca
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
efd288a9
   header <- readCdfHeader(filename);
65301877
   nbrOfCells <- header$ncols * header$nrows;
efd288a9
   nbrOfUnits <- header$probesets;
 
   nok <- (units > nbrOfUnits);
   if (any(nok)) {
     nok <- paste(units[nok], collapse=", ");
     stop("Argument 'units' contains indices out of range [1,", nbrOfUnits,
                                                                "]: ", nok);
   }
 
95092fca
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
efd288a9
   # Read CDF file
95092fca
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
efd288a9
   # Read cell indices unit by unit
   verbose && enter(verbose, "Reading cell indices unit by unit from CDF file");
   indices <- readCdfCellIndices(filename, units=units, ..., verbose=FALSE);
   verbose && exit(verbose);
 
95092fca
 
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
efd288a9
   # Return cell indices according to 'units'
95092fca
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
efd288a9
   if (!is.null(units)) {
     verbose && enter(verbose, "Reordering by units");
     # Was only a subset of units specified?
     if (length(units) != nbrOfUnits) {
       verbose && enter(verbose, "Adding missing unit indices");
       allUnits <- 1:nbrOfUnits;
       missing <- setdiff(allUnits, units);
       units <- c(units, missing);
95092fca
       missing <- allUnits <- NULL; # Not needed anymore
efd288a9
       verbose && exit(verbose);
     }
 
     # Now, reorder the units (here 'indices') accordingly.
     indices <- indices[units];
 
95092fca
     units <- NULL; # Not needed anymore
efd288a9
     verbose && exit(verbose);
   }
 
95092fca
   indices <- unlist(indices, use.names=FALSE);
efd288a9
 
 
95092fca
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
efd288a9
   # Create index map
95092fca
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
efd288a9
   verbose && enter(verbose, "Adding missing cell indices");
95092fca
   # Add non-probeset cells to the end.
efd288a9
   # (Note that readCdfCellIndices() do not read these guys.)
   allIndices <- 1:nbrOfCells;
   missing <- setdiff(allIndices, indices);
   indices <- c(indices, missing);
95092fca
   missing <- NULL; # Not needed anymore
efd288a9
   verbose && exit(verbose);
 
   # Returns the write map
   indices;
 }
 
 
 ############################################################################
 # HISTORY:
65301877
 # 2006-09-07
 # o BUG FIX: Tried to access fields 'cols' and 'rows' instead of 'ncols'
 #   and 'nrows' in the CDF header.
efd288a9
 # 2006-04-01
 # o Now using readCdfCellIndices() to get cell indices.
 # 2006-03-30
 # o Redefined and renamed method to readCdfUnitsWriteMap().
 # o Removed argument 'writeMap'.  This is possible because the new
 #   invertMap() is so fast.
 # 2006-03-28
 # o Unit and cell indices are now one-based. /HB
 # 2006-03-14
 # o Updated code to make use of package R.utils only if it is available.
 # o Added argument 'writeMap'.
 # o Added more Rdoc comments and two examples with summaries etc.
 # 2006-03-06
 # o Added argument 'units' to read a subset of units or the units as, say,
 #   they are order by chromsomal position.
 # 2006-03-04
 # o Removed all gc(). They slow down quite a bit.
 # o Created.
95092fca
 ############################################################################