R/findFiles.R
ad4b2701
 ########################################################################/**
 # @RdocFunction findFiles
 #
 # @title "Finds one or several files in multiple directories"
 #
 # \description{
 #  @get "title".
 # }
 #
 # @synopsis
 #
 # \arguments{
 #  \item{pattern}{A regular expression file name pattern to match.}
 #  \item{paths}{A @character @vector of paths to be searched.}
375edf7f
 #  \item{recursive}{If @TRUE, the directory structure is searched
ff50459d
 #    breath-first, in lexicographic order.}
ad4b2701
 #  \item{firstOnly}{If @TRUE, the method returns as soon as a matching
 #    file is found, otherwise not.}
df94bcaa
 #  \item{allFiles}{If @FALSE, files and directories starting with
 #    a period will be skipped, otherwise not.}
ad4b2701
 #  \item{...}{Arguments passed to @see "base::list.files".}
 # }
 #
 # \value{
 #  Returns a @vector of the full pathnames of the files found.
 # }
 #
 # \section{Paths}{
 #   The \code{paths} argument may also contain paths specified as
375edf7f
 #   semi-colon (\code{";"}) separated paths, e.g.
ad4b2701
 #   \code{"/usr/;usr/bin/;.;"}.
 # }
 #
ff50459d
 # \section{Windows Shortcut links}{
375edf7f
 #   If package \pkg{R.utils} is available and loaded , Windows Shortcut links (*.lnk)
7ba76859
 #   are recognized and can be used to imitate links to directories
ff50459d
 #   elsewhere.  For more details, see @see "R.utils::filePath".
 # }
ad4b2701
 #
76cf4b26
 # @author "HB"
ad4b2701
 #
 # @keyword file
 # @keyword IO
f1d6fcf0
 # @keyword internal
ad4b2701
 #**/#######################################################################
df94bcaa
 findFiles <- function(pattern=NULL, paths=NULL, recursive=FALSE, firstOnly=TRUE, allFiles=TRUE, ...) {
0b615a31
   # To please R CMD check
   filePath <- NULL; rm(list="filePath");
 
ad4b2701
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   # Local functions
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   splitPaths <- function(paths, ...) {
     if (length(paths) == 0)
       return(NULL);
     # If in format "path1; path2;path3", split it to multiple strings.
     paths <- unlist(strsplit(paths, split=";"));
     paths <- gsub("[ \t]*$", "", gsub("^[ \t]*", "", paths));
     paths <- paths[nchar(paths) > 0];
     if (length(paths) == 0)
       return(NULL);
     paths;
df94bcaa
   } # splitPaths()
 
ad4b2701
 
c65b9393
   # Checks if a package is loaded or not (cut'n'paste from R.utils)
6f53f3e1
   isPackageLoaded <- function(package, version=NULL, ...) {
df94bcaa
     s <- search();
     if (is.null(version)) {
       s <- sub("_[0-9.-]*", "", s);
     } else {
c65b9393
       package <- paste(package, version, sep="_");
df94bcaa
     }
6f53f3e1
     pattern <- sprintf("package:%s", package);
c65b9393
    (pattern %in% s);
   }
ad4b2701
 
0b615a31
 
ad4b2701
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   # Validate arguments
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   # Argument 'paths':
   paths <- splitPaths(paths);
   if (is.null(paths))
     paths <- ".";
 
ff50459d
   # Argument 'pattern':
   if (!is.null(pattern))
     pattern <- as.character(pattern);
 
   # Argument 'recursive':
   recursive <- as.logical(recursive);
ad4b2701
 
   # Argument 'firstOnly':
   firstOnly <- as.logical(firstOnly);
 
 
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   # Prepare list of paths to be scanned
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
6f53f3e1
   hasRutilsLoaded <- isPackageLoaded("R.utils");
375edf7f
   ## hasRutils <- suppressWarnings(require(R.utils, quietly=TRUE));
ff50459d
 
   # Don't search the same path twice
   paths <- unique(paths);
ad4b2701
 
   # Don't search non-existing paths
24277d85
   for (kk in seq_along(paths)) {
ad4b2701
     path <- paths[kk];
     # Example any '~':s
     path <- file.path(dirname(path), basename(path));
     path <- gsub("^[.][/\\]", "", path);
 
     # Follow Windows shortcut links?
375edf7f
     if (hasRutilsLoaded)
ad4b2701
       path <- filePath(path, expandLinks="any");
 
     # Does the path exist and is it a directory
afc977b8
     # Note, isdir is TRUE for directories, FALSE for files,
     # *and* NA for non-existing files, e.g. items found by
     # list.files() but are broken Unix links.
     isDirectory <- identical(file.info(path)$isdir, TRUE);
     if (!file.exists(path) || !isDirectory)
ad4b2701
       path <- NA;
 
     paths[kk] <- path;
   }
   if (length(paths) > 0)
     paths <- paths[!is.na(paths)];
 
 
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   # Search for files
   # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   pathnames <- c();
   for (path in paths) {
df94bcaa
     files <- list.files(path, all.files=allFiles, full.names=TRUE);
ff50459d
 
     # Exclude listings that are neither files nor directories
ad4b2701
     files <- gsub("^[.][/\\]", "", files);
ff50459d
     files <- files[nchar(files) > 0];
     if (length(files) > 0) {
       excl <- (basename(files) %in% c(".", "..", "/", "\\"));
       files <- files[!excl];
     }
 
     # Nothing to do?
     if (length(files) == 0)
       next;
 
     # Expand Windows shortcut links?
b5bdf4cb
     files0 <- files;
375edf7f
     if (hasRutilsLoaded) {
b5bdf4cb
       # Remember these
ff50459d
       files <- sapply(files, FUN=filePath, expandLinks="any", USE.NAMES=FALSE);
     }
375edf7f
 
b5bdf4cb
     # Keep only existing files and directories
ff50459d
     ok <- sapply(files, FUN=function(file) {
       (file.exists(path) && !is.na(file.info(file)$isdir));
b5bdf4cb
     }, USE.NAMES=FALSE);
ff50459d
     files <- files[ok];
b5bdf4cb
     files0 <- files0[ok];
ff50459d
 
     # Nothing to do?
     if (length(files) == 0)
       next;
 
b5bdf4cb
     # First search the files, then the directories, so...
afc977b8
     # Note, isdir is TRUE for directories, FALSE for files,
     # *and* NA for non-existing files, e.g. items found by
     # list.files() but are broken Unix links.
ff50459d
     isDir <- sapply(files, FUN=function(file) {
afc977b8
       identical(file.info(file)$isdir, TRUE);
ff50459d
       file.info(file)$isdir;
     }, USE.NAMES=FALSE);
 
     # In case some files are non-accessible, exclude them
     ok <- (!is.na(isDir));
     files <- files[ok];
b5bdf4cb
     files0 <- files0[ok];
ff50459d
     isDir <- isDir[ok];
 
     # Nothing to do?
     if (length(files) == 0)
       next;
 
     # Directories and files in lexicographic order
     dirs <- files[isDir];
     files <- files[!isDir];
b5bdf4cb
     files0 <- files0[!isDir];
ff50459d
 
     # Keep only files that match the filename pattern
b5bdf4cb
     # of the non-expanded filename.
ab6df98e
     if (!is.null(pattern)) {
b5bdf4cb
       keep <- grep(pattern, basename(files0));
ab6df98e
       files <- files[keep];
     }
ff50459d
 
     if (length(files) > 0) {
       files <- sort(files);
       if (firstOnly)
         return(files[1]);
 
       # Store results
       pathnames <- c(pathnames, files);
     }
 
     # Search directories recursively?
     if (recursive) {
       if (length(dirs) == 0)
         next;
 
       for (dir in sort(dirs)) {
         files <- findFiles(pattern=pattern, paths=dir, recursive=recursive,
                                                  firstOnly=firstOnly, ...);
         if (length(files) > 0 && firstOnly)
           return(files[1]);
 
         pathnames <- c(pathnames, files);
ad4b2701
       }
     }
ff50459d
   } # for (path ...)
ad4b2701
 
   pathnames;
ff50459d
 } # findFiles()
ad4b2701
 
 
 ############################################################################
 # HISTORY:
c65b9393
 # 2013-03-18 [HB]
 # o Internal isPackageLoaded() no longer uses defunct manglePackageName().
df94bcaa
 # 2008-02-21 [HB]
6f53f3e1
 # o Added an internal generic isPackageLoaded() function.
df94bcaa
 # 2008-02-20 [KH]
 # o Replaced require("R.utils") with a "isLoaded()" feature.
 # 2008-02-14
 # o Added argument 'allFiles=TRUE' to findFiles().
afc977b8
 # 2007-09-17
 # o ROBUSTNESS: Now findFiles() are robust against broken Unix links.
ab6df98e
 # 2007-08-30
b5bdf4cb
 # o BUG FIX: Pattern matching was done on expanded filenames, whereas they
 #   should really be done on the non-expanded ones.  This, only applies to
375edf7f
 #   Windows shortcuts, but it is not the destination file that is of
b5bdf4cb
 #   interest, but the name of the shortcut file.
ab6df98e
 # o BUG FIX: The recent update was not grep():ing correctly; forgot to
 #   extract the basename().
ff50459d
 # 2007-08-27
 # o Now findFiles(..., recursive=TRUE) does a breath-first search in
 #   lexicographic order.
 # o Now findFiles() don't search replicated directories.
ea64d2b2
 # 2006-11-01
ff50459d
 # o Removed usage of R.utils for now.
ad4b2701
 # 2006-03-14
 # o Created from findCdf.R.
375edf7f
 ############################################################################