Bioconductor Code: affxparser

Browse code

Version: 1.35.1 [2014-02-27] o Same updates as in release v1.34.1.

Version: 1.34.1 [2014-02-27]
o BUG FIX: readCelUnits() could throw 'Error in vector("double", nbrOfCells *
nbrOfArrays) : vector size cannot be NA. In addition: Warning message:
In nbrOfCells * nbrOfArrays : NAs produced by integer overflow' when reading
from a large number of arrays and/or a large number of units. Previously
the limit of nbrOfCells*nbrOfArrays was .Machine$integer.max (=2147483647),
whereas now it is .Machine$double.xmax (=1.797693e+308). Thanks to
Damian Plichta at the Technical University of Denmark for reporting on this.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/affxparser@86898 bc3139a8-67e5-0310-9ffc-ced21a209358

H Bengtsson authored on 27/02/2014 21:37:45
Showing 3 changed files

Makefile index 7c4f8e0..f91433c 100644
inst/NEWS index a8478de..26e5a30 100644
R/readCelUnits.R index 74e17a5..90ac203 100644

Makefile

History View file @ 0257f39

@@ -11,6 +11,7 @@ MAKE=make
                      MV=mv
                      RM=rm -f
                      MKDIR=mkdir -p
                     +RMDIR=$(RM) -r
                      # PACKAGE MACROS
                      PKG_VERSION := $(shell grep -i ^version DESCRIPTION | cut -d : -d \  -f 2)
@@ -36,10 +37,11 @@ FILES_MAKEFILE := $(wildcard ../../Makefile)
                      DIR_VIGNS := $(wildcard vignettes inst/doc)
                      # R MACROS
                     -R_HOME := $(shell echo "$(R_HOME)" | tr "\\\\" "/")
                     -R = R --no-init-file
                     -R_CMD = $(R) CMD
                     +R = R
                      R_SCRIPT = Rscript
                     +R_HOME := $(shell echo "$(R_HOME)" | tr "\\\\" "/")
                     +## R_USE_CRAN := $(shell $(R_SCRIPT) -e "cat(Sys.getenv('R_USE_CRAN', 'FALSE'))")
                     +R_NO_INIT := --no-init-file
                      R_VERSION_STATUS := $(shell $(R_SCRIPT) -e "status <- tolower(R.version[['status']]); if (regexpr('unstable', status) != -1L) status <- 'devel'; cat(status)")
                      R_VERSION := $(shell $(R_SCRIPT) -e "cat(as.character(getRversion()))")
                      R_VERSION_FULL := $(R_VERSION)$(R_VERSION_STATUS)
@@ -48,10 +50,11 @@ R_OUTDIR := _R-$(R_VERSION_FULL)
                      ## R_BUILD_OPTS :=
                      ## R_BUILD_OPTS := $(R_BUILD_OPTS) --no-build-vignettes
                      R_CHECK_OUTDIR := $(R_OUTDIR)/$(PKG_NAME).Rcheck
                     -R_CHECK_CRAN_INCOMING = $(shell $(R_SCRIPT) -e "cat(Sys.getenv('R_CHECK_CRAN_INCOMING', 'TRUE'))")
                     -_R_CHECK_XREFS_REPOSITORIES_ = $(shell if $(R_CHECK_CRAN_INCOMING) == "TRUE"; then echo ""; else echo "invalidURL"; fi)
                     -R_CHECK_FULL = $(shell $(R_SCRIPT) -e "cat(Sys.getenv('R_CHECK_FULL', ''))")
                     +_R_CHECK_CRAN_INCOMING_ = $(shell $(R_SCRIPT) -e "cat(Sys.getenv('_R_CHECK_CRAN_INCOMING_', 'FALSE'))")
                     +_R_CHECK_XREFS_REPOSITORIES_ = $(shell if $(_R_CHECK_CRAN_INCOMING_) = "TRUE"; then echo ""; else echo "invalidURL"; fi)
                     +_R_CHECK_FULL_ = $(shell $(R_SCRIPT) -e "cat(Sys.getenv('_R_CHECK_FULL_', ''))")
                      R_CHECK_OPTS = --as-cran --timings
                     +R_RD4PDF = $(shell $(R_SCRIPT) -e "if (getRversion() < 3) cat('times,hyper')")
                      R_CRAN_OUTDIR := $(R_OUTDIR)/$(PKG_NAME)_$(PKG_VERSION).CRAN
                      HAS_ASPELL := $(shell $(R_SCRIPT) -e "cat(Sys.getenv('HAS_ASPELL', !is.na(utils:::aspell_find_program('aspell'))))")
@@ -72,7 +75,8 @@ debug:
                      	@echo HAS_ASPELL=\'$(HAS_ASPELL)\'
                      	@echo
                      	@echo R=\'$(R)\'
                     -	@echo R_CMD=\'$(R_CMD)\'
                     +##	@echo R_USE_CRAN=\'$(R_USE_CRAN)\'
                     +	@echo R_NO_INIT=\'$(R_NO_INIT)\'
                      	@echo R_SCRIPT=\'$(R_SCRIPT)\'
                      	@echo R_VERSION=\'$(R_VERSION)\'
                      	@echo R_VERSION_STATUS=\'$(R_VERSION_STATUS)\'
@@ -85,10 +89,11 @@ debug:
                      	@echo R_BUILD_OPTS=\'$(R_BUILD_OPTS)\'
                      	@echo
                      	@echo R_CHECK_OUTDIR=\'$(R_CHECK_OUTDIR)\'
                     -	@echo R_CHECK_CRAN_INCOMING=\'$(R_CHECK_CRAN_INCOMING)\'
                     +	@echo _R_CHECK_CRAN_INCOMING_=\'$(_R_CHECK_CRAN_INCOMING_)\'
                      	@echo _R_CHECK_XREFS_REPOSITORIES_=\'$(_R_CHECK_XREFS_REPOSITORIES_)\'
                     -	@echo R_CHECK_FULL=\'$(R_CHECK_FULL)\'
                     +	@echo _R_CHECK_FULL_=\'$(_R_CHECK_FULL_)\'
                      	@echo R_CHECK_OPTS=\'$(R_CHECK_OPTS)\'
                     +	@echo R_RD4PDF=\'$(R_RD4PDF)\'
                      	@echo
                      	@echo R_CRAN_OUTDIR=\'$(R_CRAN_OUTDIR)\'
@@ -123,11 +128,14 @@ setup:	update deps
                      	$(R_SCRIPT) -e "source('http://aroma-project.org/hbLite.R'); hbLite('R.oo')"
                     +ns:
                     +	$(R_SCRIPT) -e "library('$(PKG_NAME)'); source('X:/devtools/NAMESPACE.R'); writeNamespaceSection('$(PKG_NAME)'); writeNamespaceImports('$(PKG_NAME)');"
+                    +
                      # Build source tarball
                      ../$(R_OUTDIR)/$(PKG_TARBALL): $(PKG_FILES)
                      	$(MKDIR) ../$(R_OUTDIR)
                      	$(CD) ../$(R_OUTDIR);\
                     -	$(R_CMD) build $(R_BUILD_OPTS) ../$(PKG_DIR)
                     +	$(R) $(R_NO_INIT) CMD build $(R_BUILD_OPTS) ../$(PKG_DIR)
                      build: ../$(R_OUTDIR)/$(PKG_TARBALL)
@@ -139,7 +147,7 @@ build_force:
                      # Install on current system
                      $(R_LIBS_USER_X)/$(PKG_NAME)/DESCRIPTION: ../$(R_OUTDIR)/$(PKG_TARBALL)
                      	$(CD) ../$(R_OUTDIR);\
                     -	$(R_CMD) INSTALL $(PKG_TARBALL)
                     +	$(R) --no-init-file CMD INSTALL $(PKG_TARBALL)
                      install: $(R_LIBS_USER_X)/$(PKG_NAME)/DESCRIPTION
@@ -152,14 +160,15 @@ install_force:
                      ../$(R_CHECK_OUTDIR)/.check.complete: ../$(R_OUTDIR)/$(PKG_TARBALL)
                      	$(CD) ../$(R_OUTDIR);\
                      	$(RM) -r $(PKG_NAME).Rcheck;\
                     -	export _R_CHECK_CRAN_INCOMING_=$(R_CHECK_CRAN_INCOMING);\
                     +	export _R_CHECK_CRAN_INCOMING_=$(_R_CHECK_CRAN_INCOMING_);\
                      	export _R_CHECK_CRAN_INCOMING_USE_ASPELL_=$(HAS_ASPELL);\
                      	export _R_CHECK_XREFS_REPOSITORIES_=$(_R_CHECK_XREFS_REPOSITORIES_);\
                      	export _R_CHECK_DOT_INTERNAL_=1;\
                      	export _R_CHECK_USE_CODETOOLS_=1;\
                      	export _R_CHECK_FORCE_SUGGESTS_=0;\
                     -	export _R_CHECK_FULL_=$(R_CHECK_FULL);\
                     -	$(R_CMD) check $(R_CHECK_OPTS) $(PKG_TARBALL);\
                     +	export R_RD4PDF=$(R_RD4PDF);\
                     +	export _R_CHECK_FULL_=$(_R_CHECK_FULL_);\
                     +	$(R) --no-init-file CMD check $(R_CHECK_OPTS) $(PKG_TARBALL);\
                      	echo done > $(PKG_NAME).Rcheck/.check.complete
                      check: ../$(R_CHECK_OUTDIR)/.check.complete
@@ -173,7 +182,7 @@ check_force:
                      # Install and build binaries
                      binary: ../$(R_OUTDIR)/$(PKG_TARBALL)
                      	$(CD) ../$(R_OUTDIR);\
                     -	$(R_CMD) INSTALL --build --merge-multiarch $(PKG_TARBALL)
                     +	$(R) --no-init-file CMD INSTALL --build --merge-multiarch $(PKG_TARBALL)
                      # Check the line width of incl/*.(R|Rex) files [max 100 chars in R devel]
@@ -215,6 +224,7 @@ vignettes: ../$(R_OUTDIR)/vigns
                      # Run package tests
                      ../$(R_OUTDIR)/tests/%.R: $(FILES_TESTS)
                     +	$(RMDIR) ../$(R_OUTDIR)/tests
                      	$(MKDIR) ../$(R_OUTDIR)/tests
                      	$(CP) $? ../$(R_OUTDIR)/tests
@@ -224,6 +234,11 @@ test: ../$(R_OUTDIR)/tests/%.R
                      	$(CD) ../$(R_OUTDIR)/tests;\
                      	$(R_SCRIPT) -e "for (f in list.files(pattern='[.]R$$')) { source(f, echo=TRUE) }"
                     +test_full: ../$(R_OUTDIR)/tests/%.R
                     +	$(CD) ../$(R_OUTDIR)/tests;\
                     +	export _R_CHECK_FULL_=TRUE;\
                     +	$(R_SCRIPT) -e "for (f in list.files(pattern='[.]R$$')) { source(f, echo=TRUE) }"
+                    +
                      # Run extensive CRAN submission checks

inst/NEWS

History View file @ 0257f39

                     similarity index 97%
                     rename from inst/NEWS
                     rename to NEWS
@@ -1,6 +1,20 @@
                      Package: affxparser
                      ===================
                     +Version: 1.35.1 [2014-02-27]
                     +o Same updates as in release v1.34.1.
+                    +
+                    +
                     +Version: 1.34.1 [2014-02-27]
                     +o BUG FIX: readCelUnits() could throw 'Error in vector("double", nbrOfCells *
                     +  nbrOfArrays) : vector size cannot be NA.  In addition: Warning message:
                     +  In nbrOfCells * nbrOfArrays : NAs produced by integer overflow' when reading
                     +  from a large number of arrays and/or a large number of units.  Previously
                     +  the limit of nbrOfCells*nbrOfArrays was .Machine$integer.max (=2147483647),
                     +  whereas now it is .Machine$double.xmax (=1.797693e+308).  Thanks to
                     +  Damian Plichta at the Technical University of Denmark for reporting on this.
+                    +
+                    +
                      Version: 1.35.0 [2013-10-14]
                      o The version number was bumped for the Bioconductor devel version.

R/readCelUnits.R

History View file @ 0257f39

@@ -109,7 +109,7 @@ readCelUnits <- function(filenames, units=NULL, stratifyBy=c("nothing", "pmmm",
                        } else if (is.numeric(units)) {
                          units <- as.integer(units);
                          # Unit indices are one-based in R
                     -    if (any(units < 1))
                     +    if (any(units < 1L))
                            stop("Argument 'units' contains non-positive indices.");
                        } else {
                          stop("Argument 'units' must be numeric or NULL: ", class(units)[1]);
@@ -125,7 +125,7 @@ readCelUnits <- function(filenames, units=NULL, stratifyBy=c("nothing", "pmmm",
                            stop("File not found: ", cdfFile);
                          cdf <- NULL;
                        } else if (is.list(cdf)) {
                     -    aUnit <- cdf[[1]];
                     +    aUnit <- cdf[[1L]];
                          if (!is.list(aUnit))
                            stop("Argument 'cdf' is of unknown format: First unit is not a list.");
@@ -133,7 +133,7 @@ readCelUnits <- function(filenames, units=NULL, stratifyBy=c("nothing", "pmmm",
                          if (!is.list(groups))
                            stop("Argument 'cdf' is of unknown format: Units Does not contain the list 'groups'.");
                     -    extractGroups <- (length(names(aUnit)) > 1);
                     +    extractGroups <- (length(names(aUnit)) > 1L);
                          # Check for group fields 'indices' or 'x' & 'y' in one of the groups.
                          aGroup <- groups[[1]];
@@ -142,7 +142,7 @@ readCelUnits <- function(filenames, units=NULL, stratifyBy=c("nothing", "pmmm",
                          fields <- names(aGroup);
                          if ("indices" %in% fields) {
                            cdfType <- "indices";
                     -      extractFields <- (length(fields) > 1);
                     +      extractFields <- (length(fields) > 1L);
                          } else if (all(c("x", "y") %in% fields)) {
                            # The CDF is needed in order to know the (x,y) dimensions of the
                            # chip so that one can calculate (x,y) -> cell index.
@@ -206,22 +206,22 @@ readCelUnits <- function(filenames, units=NULL, stratifyBy=c("nothing", "pmmm",
                          verbose && enter(verbose, "Searching for CDF file");
                          verbose && enter(verbose, "Reading chip type from first CEL file");
                     -    celHeader <- readCelHeader(filenames[1]);
                     +    celHeader <- readCelHeader(filenames[1L]);
                          chipType <- celHeader$chiptype;
                          verbose && exit(verbose);
                          verbose && enter(verbose, "Searching for chip type '", chipType, "'");
                          cdfFile <- findCdf(chipType=chipType);
                     -    if (length(cdfFile) == 0) {
                     +    if (length(cdfFile) == 0L) {
                            # If not found, try also where the first CEL file is
                            opwd <- getwd();
                            on.exit(setwd(opwd));
                     -      setwd(dirname(filenames[1]));
                     +      setwd(dirname(filenames[1L]));
                            cdfFile <- findCdf(chipType=chipType);
                            setwd(opwd);
+                         }
                          verbose && exit(verbose);
                     -    if (length(cdfFile) == 0)
                     +    if (length(cdfFile) == 0L)
                            stop("No CDF file for chip type found: ", chipType);
                          verbose && exit(verbose);
@@ -310,6 +310,14 @@ readCelUnits <- function(filenames, units=NULL, stratifyBy=c("nothing", "pmmm",
                        nbrOfCells <- length(indices);
                        nbrOfUnits <- length(cdf);
                     +  # Because integer 'nbrOfCells*nbrOfArrays' may overflow to NA, we corce to double
                     +  # here.  See aroma.affymetrix thread 'Speeding up RmaBackgroundCorrection' on
                     +  # 2014-02-27 for background/details.
                     +  # FIXME: Ideally, this function should be rewritten to read signals and group them
                     +  # into CEL units in chunks. /HB 2014-02-27
                     +  nbrOfEntries <- as.double(nbrOfCells) * as.double(nbrOfArrays);
                     +  stopifnot(is.finite(nbrOfEntries));
+                    +
                        verbose && enter(verbose, "Reading ", nbrOfUnits, "*", nbrOfCells/nbrOfUnits, "=", nbrOfCells, " cells from ", nbrOfArrays, " CEL files");
                        # Cell-value elements
@@ -317,6 +325,9 @@ readCelUnits <- function(filenames, units=NULL, stratifyBy=c("nothing", "pmmm",
                        integerFields <- "pixels";
                        doubleFields <- setdiff(cellValueFields, integerFields);
                     +  # Local environment where to store the temporary variables
                     +  env <- environment();
+                    +
                        for (kk in seq(length=nbrOfArrays)) {
                          filename <- filenames[kk];
@@ -324,7 +335,7 @@ readCelUnits <- function(filenames, units=NULL, stratifyBy=c("nothing", "pmmm",
                          celTmp <- readCel(filename, indices=indices, readHeader=FALSE, readOutliers=FALSE, readMasked=FALSE, ..., readMap=NULL, verbose=cVerbose, .checkArgs=FALSE);
                          verbose && exit(verbose);
                     -    if (kk == 1) {
                     +    if (kk == 1L) {
                            verbose && enter(verbose, "Allocating return structure");
                            # Allocate the return list structure
                      #      celTmp$header <- NULL;
@@ -337,15 +348,17 @@ readCelUnits <- function(filenames, units=NULL, stratifyBy=c("nothing", "pmmm",
                            # Allocate all field variables
                            dim <- c(nbrOfCells, nbrOfArrays);
                     -      value <- vector("double", nbrOfCells*nbrOfArrays);
                     +      value <- vector("double", length=nbrOfEntries);
                            dim(value) <- dim;
                            for (name in doubleFields)
                     -        assign(name, value);
                     +        assign(name, value, envir=env, inherits=FALSE);
                     +      value <- NULL; # Not needed anymore
                     -      value <- vector("integer", nbrOfCells*nbrOfArrays);
                     +      value <- vector("integer", length=nbrOfEntries);
                            dim(value) <- dim;
                            for (name in integerFields)
                     -        assign(name, value);
                     +        assign(name, value, envir=env, inherits=FALSE);
                     +      value <- NULL; # Not needed anymore
                            verbose && exit(verbose);
+                         }
@@ -370,7 +383,8 @@ readCelUnits <- function(filenames, units=NULL, stratifyBy=c("nothing", "pmmm",
+                           }
                            eval(substitute(name[,kk] <- value, list(name=as.name(name))));
                     -    }
                     +      value <- NULL; # Not needed anymore
                     +    } # for (name ...)
                          celTmp <- NULL; # Not needed anymore
+                       }
@@ -384,7 +398,7 @@ readCelUnits <- function(filenames, units=NULL, stratifyBy=c("nothing", "pmmm",
                        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                        verbose && enter(verbose, "Structuring data by units and groups");
                     -  fields <- vector("list", length(cellValueFields));
                     +  fields <- vector("list", length=length(cellValueFields));
                        names(fields) <- cellValueFields;
                        # Keep a copy for groups with empty fields
@@ -392,19 +406,19 @@ readCelUnits <- function(filenames, units=NULL, stratifyBy=c("nothing", "pmmm",
                        # Add a dimension for the arrays, unless only one array is read
                        # and the array dimension is not wanted.
                     -  addArrayDim <- (nbrOfArrays >= 2 || !dropArrayDim);
                     +  addArrayDim <- (nbrOfArrays >= 2L || !dropArrayDim);
                        seqOfArrays <- list(1:nbrOfArrays);
                     -  offset <- 0;
                     +  offset <- 0L;
                        res <- lapply(cdf, FUN=function(u) {
                          lapply(.subset2(u, "groups"), FUN=function(g) {
                            # Same dimensions of all fields
                     -      field <- .subset2(g, 1);  # Faster than g[[1]]
                     +      field <- .subset2(g, 1L);  # Faster than g[[1L]]
                            ncells <- length(field);
                            # Empty unit group?
                     -      if (ncells == 0)
                     +      if (ncells == 0L)
                              return(emptyFields);
                            idxs <- offset + 1:ncells;
@@ -427,10 +441,10 @@ readCelUnits <- function(filenames, units=NULL, stratifyBy=c("nothing", "pmmm",
+                             }
                              # Update all fields with dimensions
                     -        setDim <- (length(dim) > 1);
                     +        setDim <- (length(dim) > 1L);
                              for (name in cellValueFields) {
                                # Faster to drop dimensions.
                     -          values <- get(name)[idxs,,drop=TRUE];
                     +          values <- get(name, envir=env, inherits=FALSE)[idxs,,drop=TRUE];
                                if (setDim) {
                                  dim(values) <- dim;
                                  dimnames(values) <- dimnames;
@@ -438,6 +452,7 @@ readCelUnits <- function(filenames, units=NULL, stratifyBy=c("nothing", "pmmm",
                                  names(values) <- dimnames;
+                               }
                                fields[[name]] <- values;
                     +          values <- NULL; # Not needed anymore
+                             }
                            } else {
                             # Add an extra dimension for arrays?
@@ -445,19 +460,20 @@ readCelUnits <- function(filenames, units=NULL, stratifyBy=c("nothing", "pmmm",
                                dim <- c(dim, nbrOfArrays);
                              # Update all fields with dimensions
                     -        setDim <- (length(dim) > 1);
                     +        setDim <- (length(dim) > 1L);
                              for (name in cellValueFields) {
                                # Faster to drop dimensions.
                     -          values <- get(name)[idxs,,drop=TRUE];
                     +          values <- get(name, envir=env, inherits=FALSE)[idxs,,drop=TRUE];
                                if (setDim)
                                  dim(values) <- dim;
                                fields[[name]] <- values;
                     +          values <- NULL; # Not needed anymore
+                             }
                            } # if (addDimnames)
                            fields;
                     -    });
                     -  })
                     +    }) # lapply(.subset2(u, "groups"), ...);
                     +  }) # lapply(cdf, ...)
                        verbose && exit(verbose);
@@ -467,6 +483,9 @@ readCelUnits <- function(filenames, units=NULL, stratifyBy=c("nothing", "pmmm",
                      ############################################################################
                      # HISTORY:
                     +# 2014-02-27 [HB]
                     +# o ROBUSTNESS: Using integer constants (e.g. 1L) where applicable.
                     +# o ROBUSTNESS: Using explicitly named arguments in more places.
                      # 2012-05-22 [HB]
                      # o CRAN POLICY: readCel() and readCelUnits() are no longer calling
                      #   .Internal(qsort(...)).