Browse code

Version: 1.35.1 [2014-02-27] o Same updates as in release v1.34.1.

Version: 1.34.1 [2014-02-27]
o BUG FIX: readCelUnits() could throw 'Error in vector("double", nbrOfCells *
nbrOfArrays) : vector size cannot be NA. In addition: Warning message:
In nbrOfCells * nbrOfArrays : NAs produced by integer overflow' when reading
from a large number of arrays and/or a large number of units. Previously
the limit of nbrOfCells*nbrOfArrays was .Machine$integer.max (=2147483647),
whereas now it is .Machine$double.xmax (=1.797693e+308). Thanks to
Damian Plichta at the Technical University of Denmark for reporting on this.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/affxparser@86898 bc3139a8-67e5-0310-9ffc-ced21a209358

H Bengtsson authored on 27/02/2014 21:37:45
Showing 3 changed files

... ...
@@ -11,6 +11,7 @@ MAKE=make
11 11
 MV=mv
12 12
 RM=rm -f
13 13
 MKDIR=mkdir -p
14
+RMDIR=$(RM) -r
14 15
 
15 16
 # PACKAGE MACROS
16 17
 PKG_VERSION := $(shell grep -i ^version DESCRIPTION | cut -d : -d \  -f 2)
... ...
@@ -36,10 +37,11 @@ FILES_MAKEFILE := $(wildcard ../../Makefile)
36 37
 DIR_VIGNS := $(wildcard vignettes inst/doc)
37 38
 
38 39
 # R MACROS
39
-R_HOME := $(shell echo "$(R_HOME)" | tr "\\\\" "/")
40
-R = R --no-init-file
41
-R_CMD = $(R) CMD
40
+R = R
42 41
 R_SCRIPT = Rscript
42
+R_HOME := $(shell echo "$(R_HOME)" | tr "\\\\" "/")
43
+## R_USE_CRAN := $(shell $(R_SCRIPT) -e "cat(Sys.getenv('R_USE_CRAN', 'FALSE'))")
44
+R_NO_INIT := --no-init-file
43 45
 R_VERSION_STATUS := $(shell $(R_SCRIPT) -e "status <- tolower(R.version[['status']]); if (regexpr('unstable', status) != -1L) status <- 'devel'; cat(status)")
44 46
 R_VERSION := $(shell $(R_SCRIPT) -e "cat(as.character(getRversion()))")
45 47
 R_VERSION_FULL := $(R_VERSION)$(R_VERSION_STATUS)
... ...
@@ -48,10 +50,11 @@ R_OUTDIR := _R-$(R_VERSION_FULL)
48 50
 ## R_BUILD_OPTS := 
49 51
 ## R_BUILD_OPTS := $(R_BUILD_OPTS) --no-build-vignettes
50 52
 R_CHECK_OUTDIR := $(R_OUTDIR)/$(PKG_NAME).Rcheck
51
-R_CHECK_CRAN_INCOMING = $(shell $(R_SCRIPT) -e "cat(Sys.getenv('R_CHECK_CRAN_INCOMING', 'TRUE'))")
52
-_R_CHECK_XREFS_REPOSITORIES_ = $(shell if $(R_CHECK_CRAN_INCOMING) == "TRUE"; then echo ""; else echo "invalidURL"; fi)
53
-R_CHECK_FULL = $(shell $(R_SCRIPT) -e "cat(Sys.getenv('R_CHECK_FULL', ''))")
53
+_R_CHECK_CRAN_INCOMING_ = $(shell $(R_SCRIPT) -e "cat(Sys.getenv('_R_CHECK_CRAN_INCOMING_', 'FALSE'))")
54
+_R_CHECK_XREFS_REPOSITORIES_ = $(shell if $(_R_CHECK_CRAN_INCOMING_) = "TRUE"; then echo ""; else echo "invalidURL"; fi)
55
+_R_CHECK_FULL_ = $(shell $(R_SCRIPT) -e "cat(Sys.getenv('_R_CHECK_FULL_', ''))")
54 56
 R_CHECK_OPTS = --as-cran --timings
57
+R_RD4PDF = $(shell $(R_SCRIPT) -e "if (getRversion() < 3) cat('times,hyper')")
55 58
 R_CRAN_OUTDIR := $(R_OUTDIR)/$(PKG_NAME)_$(PKG_VERSION).CRAN
56 59
 
57 60
 HAS_ASPELL := $(shell $(R_SCRIPT) -e "cat(Sys.getenv('HAS_ASPELL', !is.na(utils:::aspell_find_program('aspell'))))")
... ...
@@ -72,7 +75,8 @@ debug:
72 75
 	@echo HAS_ASPELL=\'$(HAS_ASPELL)\'
73 76
 	@echo
74 77
 	@echo R=\'$(R)\'
75
-	@echo R_CMD=\'$(R_CMD)\'
78
+##	@echo R_USE_CRAN=\'$(R_USE_CRAN)\'
79
+	@echo R_NO_INIT=\'$(R_NO_INIT)\'
76 80
 	@echo R_SCRIPT=\'$(R_SCRIPT)\'
77 81
 	@echo R_VERSION=\'$(R_VERSION)\'
78 82
 	@echo R_VERSION_STATUS=\'$(R_VERSION_STATUS)\'
... ...
@@ -85,10 +89,11 @@ debug:
85 89
 	@echo R_BUILD_OPTS=\'$(R_BUILD_OPTS)\'
86 90
 	@echo
87 91
 	@echo R_CHECK_OUTDIR=\'$(R_CHECK_OUTDIR)\'
88
-	@echo R_CHECK_CRAN_INCOMING=\'$(R_CHECK_CRAN_INCOMING)\'
92
+	@echo _R_CHECK_CRAN_INCOMING_=\'$(_R_CHECK_CRAN_INCOMING_)\'
89 93
 	@echo _R_CHECK_XREFS_REPOSITORIES_=\'$(_R_CHECK_XREFS_REPOSITORIES_)\'
90
-	@echo R_CHECK_FULL=\'$(R_CHECK_FULL)\'
94
+	@echo _R_CHECK_FULL_=\'$(_R_CHECK_FULL_)\'
91 95
 	@echo R_CHECK_OPTS=\'$(R_CHECK_OPTS)\'
96
+	@echo R_RD4PDF=\'$(R_RD4PDF)\'
92 97
 	@echo
93 98
 	@echo R_CRAN_OUTDIR=\'$(R_CRAN_OUTDIR)\'
94 99
 
... ...
@@ -123,11 +128,14 @@ setup:	update deps
123 128
 	$(R_SCRIPT) -e "source('http://aroma-project.org/hbLite.R'); hbLite('R.oo')"
124 129
 
125 130
 
131
+ns:
132
+	$(R_SCRIPT) -e "library('$(PKG_NAME)'); source('X:/devtools/NAMESPACE.R'); writeNamespaceSection('$(PKG_NAME)'); writeNamespaceImports('$(PKG_NAME)');"
133
+
126 134
 # Build source tarball
127 135
 ../$(R_OUTDIR)/$(PKG_TARBALL): $(PKG_FILES)
128 136
 	$(MKDIR) ../$(R_OUTDIR)
129 137
 	$(CD) ../$(R_OUTDIR);\
130
-	$(R_CMD) build $(R_BUILD_OPTS) ../$(PKG_DIR)
138
+	$(R) $(R_NO_INIT) CMD build $(R_BUILD_OPTS) ../$(PKG_DIR)
131 139
 
132 140
 build: ../$(R_OUTDIR)/$(PKG_TARBALL)
133 141
 
... ...
@@ -139,7 +147,7 @@ build_force:
139 147
 # Install on current system
140 148
 $(R_LIBS_USER_X)/$(PKG_NAME)/DESCRIPTION: ../$(R_OUTDIR)/$(PKG_TARBALL)
141 149
 	$(CD) ../$(R_OUTDIR);\
142
-	$(R_CMD) INSTALL $(PKG_TARBALL)
150
+	$(R) --no-init-file CMD INSTALL $(PKG_TARBALL)
143 151
 
144 152
 install: $(R_LIBS_USER_X)/$(PKG_NAME)/DESCRIPTION
145 153
 
... ...
@@ -152,14 +160,15 @@ install_force:
152 160
 ../$(R_CHECK_OUTDIR)/.check.complete: ../$(R_OUTDIR)/$(PKG_TARBALL)
153 161
 	$(CD) ../$(R_OUTDIR);\
154 162
 	$(RM) -r $(PKG_NAME).Rcheck;\
155
-	export _R_CHECK_CRAN_INCOMING_=$(R_CHECK_CRAN_INCOMING);\
163
+	export _R_CHECK_CRAN_INCOMING_=$(_R_CHECK_CRAN_INCOMING_);\
156 164
 	export _R_CHECK_CRAN_INCOMING_USE_ASPELL_=$(HAS_ASPELL);\
157 165
 	export _R_CHECK_XREFS_REPOSITORIES_=$(_R_CHECK_XREFS_REPOSITORIES_);\
158 166
 	export _R_CHECK_DOT_INTERNAL_=1;\
159 167
 	export _R_CHECK_USE_CODETOOLS_=1;\
160 168
 	export _R_CHECK_FORCE_SUGGESTS_=0;\
161
-	export _R_CHECK_FULL_=$(R_CHECK_FULL);\
162
-	$(R_CMD) check $(R_CHECK_OPTS) $(PKG_TARBALL);\
169
+	export R_RD4PDF=$(R_RD4PDF);\
170
+	export _R_CHECK_FULL_=$(_R_CHECK_FULL_);\
171
+	$(R) --no-init-file CMD check $(R_CHECK_OPTS) $(PKG_TARBALL);\
163 172
 	echo done > $(PKG_NAME).Rcheck/.check.complete
164 173
 
165 174
 check: ../$(R_CHECK_OUTDIR)/.check.complete
... ...
@@ -173,7 +182,7 @@ check_force:
173 182
 # Install and build binaries
174 183
 binary: ../$(R_OUTDIR)/$(PKG_TARBALL)
175 184
 	$(CD) ../$(R_OUTDIR);\
176
-	$(R_CMD) INSTALL --build --merge-multiarch $(PKG_TARBALL)
185
+	$(R) --no-init-file CMD INSTALL --build --merge-multiarch $(PKG_TARBALL)
177 186
 
178 187
 
179 188
 # Check the line width of incl/*.(R|Rex) files [max 100 chars in R devel]
... ...
@@ -215,6 +224,7 @@ vignettes: ../$(R_OUTDIR)/vigns
215 224
 
216 225
 # Run package tests
217 226
 ../$(R_OUTDIR)/tests/%.R: $(FILES_TESTS)
227
+	$(RMDIR) ../$(R_OUTDIR)/tests
218 228
 	$(MKDIR) ../$(R_OUTDIR)/tests
219 229
 	$(CP) $? ../$(R_OUTDIR)/tests
220 230
 
... ...
@@ -224,6 +234,11 @@ test: ../$(R_OUTDIR)/tests/%.R
224 234
 	$(CD) ../$(R_OUTDIR)/tests;\
225 235
 	$(R_SCRIPT) -e "for (f in list.files(pattern='[.]R$$')) { source(f, echo=TRUE) }"
226 236
 
237
+test_full: ../$(R_OUTDIR)/tests/%.R
238
+	$(CD) ../$(R_OUTDIR)/tests;\
239
+	export _R_CHECK_FULL_=TRUE;\
240
+	$(R_SCRIPT) -e "for (f in list.files(pattern='[.]R$$')) { source(f, echo=TRUE) }"
241
+
227 242
 
228 243
 
229 244
 # Run extensive CRAN submission checks
230 245
similarity index 97%
231 246
rename from inst/NEWS
232 247
rename to NEWS
... ...
@@ -1,6 +1,20 @@
1 1
 Package: affxparser
2 2
 ===================
3 3
 
4
+Version: 1.35.1 [2014-02-27]
5
+o Same updates as in release v1.34.1.
6
+
7
+
8
+Version: 1.34.1 [2014-02-27]
9
+o BUG FIX: readCelUnits() could throw 'Error in vector("double", nbrOfCells *
10
+  nbrOfArrays) : vector size cannot be NA.  In addition: Warning message:
11
+  In nbrOfCells * nbrOfArrays : NAs produced by integer overflow' when reading
12
+  from a large number of arrays and/or a large number of units.  Previously
13
+  the limit of nbrOfCells*nbrOfArrays was .Machine$integer.max (=2147483647),
14
+  whereas now it is .Machine$double.xmax (=1.797693e+308).  Thanks to
15
+  Damian Plichta at the Technical University of Denmark for reporting on this.
16
+
17
+
4 18
 Version: 1.35.0 [2013-10-14]
5 19
 o The version number was bumped for the Bioconductor devel version.
6 20
 
... ...
@@ -109,7 +109,7 @@ readCelUnits <- function(filenames, units=NULL, stratifyBy=c("nothing", "pmmm",
109 109
   } else if (is.numeric(units)) {
110 110
     units <- as.integer(units);
111 111
     # Unit indices are one-based in R
112
-    if (any(units < 1))
112
+    if (any(units < 1L))
113 113
       stop("Argument 'units' contains non-positive indices.");
114 114
   } else {
115 115
     stop("Argument 'units' must be numeric or NULL: ", class(units)[1]);
... ...
@@ -125,7 +125,7 @@ readCelUnits <- function(filenames, units=NULL, stratifyBy=c("nothing", "pmmm",
125 125
       stop("File not found: ", cdfFile);
126 126
     cdf <- NULL;
127 127
   } else if (is.list(cdf)) {
128
-    aUnit <- cdf[[1]];
128
+    aUnit <- cdf[[1L]];
129 129
     if (!is.list(aUnit))
130 130
       stop("Argument 'cdf' is of unknown format: First unit is not a list.");
131 131
 
... ...
@@ -133,7 +133,7 @@ readCelUnits <- function(filenames, units=NULL, stratifyBy=c("nothing", "pmmm",
133 133
     if (!is.list(groups))
134 134
       stop("Argument 'cdf' is of unknown format: Units Does not contain the list 'groups'.");
135 135
 
136
-    extractGroups <- (length(names(aUnit)) > 1);
136
+    extractGroups <- (length(names(aUnit)) > 1L);
137 137
 
138 138
     # Check for group fields 'indices' or 'x' & 'y' in one of the groups.
139 139
     aGroup <- groups[[1]];
... ...
@@ -142,7 +142,7 @@ readCelUnits <- function(filenames, units=NULL, stratifyBy=c("nothing", "pmmm",
142 142
     fields <- names(aGroup);
143 143
     if ("indices" %in% fields) {
144 144
       cdfType <- "indices";
145
-      extractFields <- (length(fields) > 1);
145
+      extractFields <- (length(fields) > 1L);
146 146
     } else if (all(c("x", "y") %in% fields)) {
147 147
       # The CDF is needed in order to know the (x,y) dimensions of the
148 148
       # chip so that one can calculate (x,y) -> cell index.
... ...
@@ -206,22 +206,22 @@ readCelUnits <- function(filenames, units=NULL, stratifyBy=c("nothing", "pmmm",
206 206
     verbose && enter(verbose, "Searching for CDF file");
207 207
 
208 208
     verbose && enter(verbose, "Reading chip type from first CEL file");
209
-    celHeader <- readCelHeader(filenames[1]);
209
+    celHeader <- readCelHeader(filenames[1L]);
210 210
     chipType <- celHeader$chiptype;
211 211
     verbose && exit(verbose);
212 212
 
213 213
     verbose && enter(verbose, "Searching for chip type '", chipType, "'");
214 214
     cdfFile <- findCdf(chipType=chipType);
215
-    if (length(cdfFile) == 0) {
215
+    if (length(cdfFile) == 0L) {
216 216
       # If not found, try also where the first CEL file is
217 217
       opwd <- getwd();
218 218
       on.exit(setwd(opwd));
219
-      setwd(dirname(filenames[1]));
219
+      setwd(dirname(filenames[1L]));
220 220
       cdfFile <- findCdf(chipType=chipType);
221 221
       setwd(opwd);
222 222
     }
223 223
     verbose && exit(verbose);
224
-    if (length(cdfFile) == 0)
224
+    if (length(cdfFile) == 0L)
225 225
       stop("No CDF file for chip type found: ", chipType);
226 226
 
227 227
     verbose && exit(verbose);
... ...
@@ -310,6 +310,14 @@ readCelUnits <- function(filenames, units=NULL, stratifyBy=c("nothing", "pmmm",
310 310
   nbrOfCells <- length(indices);
311 311
   nbrOfUnits <- length(cdf);
312 312
 
313
+  # Because integer 'nbrOfCells*nbrOfArrays' may overflow to NA, we corce to double
314
+  # here.  See aroma.affymetrix thread 'Speeding up RmaBackgroundCorrection' on
315
+  # 2014-02-27 for background/details.
316
+  # FIXME: Ideally, this function should be rewritten to read signals and group them
317
+  # into CEL units in chunks. /HB 2014-02-27
318
+  nbrOfEntries <- as.double(nbrOfCells) * as.double(nbrOfArrays);
319
+  stopifnot(is.finite(nbrOfEntries));
320
+
313 321
   verbose && enter(verbose, "Reading ", nbrOfUnits, "*", nbrOfCells/nbrOfUnits, "=", nbrOfCells, " cells from ", nbrOfArrays, " CEL files");
314 322
 
315 323
   # Cell-value elements
... ...
@@ -317,6 +325,9 @@ readCelUnits <- function(filenames, units=NULL, stratifyBy=c("nothing", "pmmm",
317 325
   integerFields <- "pixels";
318 326
   doubleFields <- setdiff(cellValueFields, integerFields);
319 327
 
328
+  # Local environment where to store the temporary variables
329
+  env <- environment();
330
+
320 331
   for (kk in seq(length=nbrOfArrays)) {
321 332
     filename <- filenames[kk];
322 333
 
... ...
@@ -324,7 +335,7 @@ readCelUnits <- function(filenames, units=NULL, stratifyBy=c("nothing", "pmmm",
324 335
     celTmp <- readCel(filename, indices=indices, readHeader=FALSE, readOutliers=FALSE, readMasked=FALSE, ..., readMap=NULL, verbose=cVerbose, .checkArgs=FALSE);
325 336
     verbose && exit(verbose);
326 337
 
327
-    if (kk == 1) {
338
+    if (kk == 1L) {
328 339
       verbose && enter(verbose, "Allocating return structure");
329 340
       # Allocate the return list structure
330 341
 #      celTmp$header <- NULL;
... ...
@@ -337,15 +348,17 @@ readCelUnits <- function(filenames, units=NULL, stratifyBy=c("nothing", "pmmm",
337 348
 
338 349
       # Allocate all field variables
339 350
       dim <- c(nbrOfCells, nbrOfArrays);
340
-      value <- vector("double", nbrOfCells*nbrOfArrays);
351
+      value <- vector("double", length=nbrOfEntries);
341 352
       dim(value) <- dim;
342 353
       for (name in doubleFields)
343
-        assign(name, value);
354
+        assign(name, value, envir=env, inherits=FALSE);
355
+      value <- NULL; # Not needed anymore
344 356
 
345
-      value <- vector("integer", nbrOfCells*nbrOfArrays);
357
+      value <- vector("integer", length=nbrOfEntries);
346 358
       dim(value) <- dim;
347 359
       for (name in integerFields)
348
-        assign(name, value);
360
+        assign(name, value, envir=env, inherits=FALSE);
361
+      value <- NULL; # Not needed anymore
349 362
 
350 363
       verbose && exit(verbose);
351 364
     }
... ...
@@ -370,7 +383,8 @@ readCelUnits <- function(filenames, units=NULL, stratifyBy=c("nothing", "pmmm",
370 383
       }
371 384
 
372 385
       eval(substitute(name[,kk] <- value, list(name=as.name(name))));
373
-    }
386
+      value <- NULL; # Not needed anymore
387
+    } # for (name ...)
374 388
 
375 389
     celTmp <- NULL; # Not needed anymore
376 390
   }
... ...
@@ -384,7 +398,7 @@ readCelUnits <- function(filenames, units=NULL, stratifyBy=c("nothing", "pmmm",
384 398
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
385 399
   verbose && enter(verbose, "Structuring data by units and groups");
386 400
 
387
-  fields <- vector("list", length(cellValueFields));
401
+  fields <- vector("list", length=length(cellValueFields));
388 402
   names(fields) <- cellValueFields;
389 403
 
390 404
   # Keep a copy for groups with empty fields
... ...
@@ -392,19 +406,19 @@ readCelUnits <- function(filenames, units=NULL, stratifyBy=c("nothing", "pmmm",
392 406
 
393 407
   # Add a dimension for the arrays, unless only one array is read
394 408
   # and the array dimension is not wanted.
395
-  addArrayDim <- (nbrOfArrays >= 2 || !dropArrayDim);
409
+  addArrayDim <- (nbrOfArrays >= 2L || !dropArrayDim);
396 410
 
397 411
   seqOfArrays <- list(1:nbrOfArrays);
398
-  offset <- 0;
412
+  offset <- 0L;
399 413
 
400 414
   res <- lapply(cdf, FUN=function(u) {
401 415
     lapply(.subset2(u, "groups"), FUN=function(g) {
402 416
       # Same dimensions of all fields
403
-      field <- .subset2(g, 1);  # Faster than g[[1]]
417
+      field <- .subset2(g, 1L);  # Faster than g[[1L]]
404 418
       ncells <- length(field);
405 419
 
406 420
       # Empty unit group?
407
-      if (ncells == 0)
421
+      if (ncells == 0L)
408 422
         return(emptyFields);
409 423
 
410 424
       idxs <- offset + 1:ncells;
... ...
@@ -427,10 +441,10 @@ readCelUnits <- function(filenames, units=NULL, stratifyBy=c("nothing", "pmmm",
427 441
         }
428 442
 
429 443
         # Update all fields with dimensions
430
-        setDim <- (length(dim) > 1);
444
+        setDim <- (length(dim) > 1L);
431 445
         for (name in cellValueFields) {
432 446
           # Faster to drop dimensions.
433
-          values <- get(name)[idxs,,drop=TRUE];
447
+          values <- get(name, envir=env, inherits=FALSE)[idxs,,drop=TRUE];
434 448
           if (setDim) {
435 449
             dim(values) <- dim;
436 450
             dimnames(values) <- dimnames;
... ...
@@ -438,6 +452,7 @@ readCelUnits <- function(filenames, units=NULL, stratifyBy=c("nothing", "pmmm",
438 452
             names(values) <- dimnames;
439 453
           }
440 454
           fields[[name]] <- values;
455
+          values <- NULL; # Not needed anymore
441 456
         }
442 457
       } else {
443 458
        # Add an extra dimension for arrays?
... ...
@@ -445,19 +460,20 @@ readCelUnits <- function(filenames, units=NULL, stratifyBy=c("nothing", "pmmm",
445 460
           dim <- c(dim, nbrOfArrays);
446 461
 
447 462
         # Update all fields with dimensions
448
-        setDim <- (length(dim) > 1);
463
+        setDim <- (length(dim) > 1L);
449 464
         for (name in cellValueFields) {
450 465
           # Faster to drop dimensions.
451
-          values <- get(name)[idxs,,drop=TRUE];
466
+          values <- get(name, envir=env, inherits=FALSE)[idxs,,drop=TRUE];
452 467
           if (setDim)
453 468
             dim(values) <- dim;
454 469
           fields[[name]] <- values;
470
+          values <- NULL; # Not needed anymore
455 471
         }
456 472
       } # if (addDimnames)
457 473
 
458 474
       fields;
459
-    });
460
-  })
475
+    }) # lapply(.subset2(u, "groups"), ...);
476
+  }) # lapply(cdf, ...)
461 477
 
462 478
   verbose && exit(verbose);
463 479
 
... ...
@@ -467,6 +483,9 @@ readCelUnits <- function(filenames, units=NULL, stratifyBy=c("nothing", "pmmm",
467 483
 
468 484
 ############################################################################
469 485
 # HISTORY:
486
+# 2014-02-27 [HB]
487
+# o ROBUSTNESS: Using integer constants (e.g. 1L) where applicable.
488
+# o ROBUSTNESS: Using explicitly named arguments in more places.
470 489
 # 2012-05-22 [HB]
471 490
 # o CRAN POLICY: readCel() and readCelUnits() are no longer calling
472 491
 #   .Internal(qsort(...)).