Bioconductor Code: ScaledMatrix

Browse code

Initial commit.

LTLA authored on 13/12/2020 01:02:11
Showing 15 changed files

.Rbuildignore index 0000000..c8d83a1
.gitignore index 0000000..2d19fc7
DESCRIPTION index 0000000..fe912db
NAMESPACE index 0000000..b4f6487
R/AllClasses.R index 0000000..1c0cdd4
R/ScaledMatrix.R index 0000000..aa62cff
R/ScaledMatrixSeed.R index 0000000..4fe41be
R/multiplication.R index 0000000..b89bc91
man/ScaledMatrix.Rd index 0000000..eed5858
tests/testthat.R index 0000000..f0b5081
tests/testthat/setup.R index 0000000..f91ba79
tests/testthat/test-class.R index 0000000..e4133cd
tests/testthat/test-mult.R index 0000000..59d1db5
tests/testthat/test-scale.R index 0000000..1e82596
vignettes/ScaledMatrix.Rmd index 0000000..4555e13

History View file @ 508ea1b

                     new file mode 100644
@@ -0,0 +1 @@
                     +^\.gitignore$

.gitignore

History View file @ 508ea1b

                     new file mode 100644
@@ -0,0 +1 @@
                     +*.html

DESCRIPTION

History View file @ 508ea1b

                     new file mode 100644
@@ -0,0 +1,30 @@
                     +Package: ScaledMatrix
                     +Version: 0.99.0
                     +Date: 2020-12-12
                     +Title: Creating a DelayedMatrix of Scaled and Centered Values
                     +Authors@R: person("Aaron", "Lun", role=c("aut", "cre", "cph"),
                     +        email="[email protected]")
                     +Imports:
                     +    methods,
                     +    Matrix,
                     +    S4Vectors,
                     +    DelayedArray
                     +Suggests:
                     +    testthat,
                     +    BiocStyle,
                     +    knitr,
                     +    rmarkdown,
                     +    BiocSingular
                     +biocViews:
                     +    Software,
                     +    DataRepresentation
                     +Description:
                     +    Provides delayed computation of a matrix of scaled and centered values.
                     +    The result is equivalent to using the scale() function but avoids explicit
                     +    realization of a dense matrix during block processing. This permits greater
                     +    efficiency in common operations, most notably matrix multiplication.
                     +License: GPL-3
                     +VignetteBuilder: knitr
                     +RoxygenNote: 7.1.1
                     +BugReports: https://github.com/LTLA/ScaledMatrix/issues
                     +URL: https://github.com/LTLA/ScaledMatrix

NAMESPACE

History View file @ 508ea1b

                     new file mode 100644
@@ -0,0 +1,39 @@
                     +# Generated by roxygen2: do not edit by hand
+                    +
                     +export(ScaledMatrix)
                     +export(ScaledMatrixSeed)
                     +exportClasses(ScaledMatrix)
                     +exportClasses(ScaledMatrixSeed)
                     +exportMethods("%*%")
                     +exportMethods("[")
                     +exportMethods("dimnames<-")
                     +exportMethods(DelayedArray)
                     +exportMethods(colMeans)
                     +exportMethods(colSums)
                     +exportMethods(crossprod)
                     +exportMethods(dim)
                     +exportMethods(dimnames)
                     +exportMethods(extract_array)
                     +exportMethods(rowMeans)
                     +exportMethods(rowSums)
                     +exportMethods(show)
                     +exportMethods(t)
                     +exportMethods(tcrossprod)
                     +importClassesFrom(DelayedArray,DelayedMatrix)
                     +importFrom(DelayedArray,DelayedArray)
                     +importFrom(DelayedArray,extract_array)
                     +importFrom(DelayedArray,new_DelayedArray)
                     +importFrom(DelayedArray,seed)
                     +importFrom(DelayedArray,sweep)
                     +importFrom(Matrix,colMeans)
                     +importFrom(Matrix,colSums)
                     +importFrom(Matrix,crossprod)
                     +importFrom(Matrix,drop)
                     +importFrom(Matrix,rowMeans)
                     +importFrom(Matrix,rowSums)
                     +importFrom(Matrix,t)
                     +importFrom(Matrix,tcrossprod)
                     +importFrom(S4Vectors,setValidity2)
                     +importFrom(methods,is)
                     +importFrom(methods,new)
                     +importFrom(methods,show)

R/AllClasses.R

History View file @ 508ea1b

                     new file mode 100644
@@ -0,0 +1,6 @@
                     +#' @export
                     +setClass("ScaledMatrixSeed", slots=c(.matrix="ANY", center="numeric", scale="numeric", use_center="logical", use_scale="logical", transposed="logical"))
+                    +
                     +#' @export
                     +#' @importClassesFrom DelayedArray DelayedMatrix
                     +setClass("ScaledMatrix", contains="DelayedMatrix", slots=c(seed="ScaledMatrixSeed"))

R/ScaledMatrix.R

History View file @ 508ea1b

                     new file mode 100644
@@ -0,0 +1,197 @@
                     +#' The ScaledMatrix class
                     +#'
                     +#' Defines the ScaledMatrixSeed and ScaledMatrix classes and their associated methods.
                     +#' These classes support delayed centering and scaling of the columns in the same manner as \code{\link{scale}},
                     +#' but preserving the original data structure for more efficient operations like matrix multiplication.
                     +#'
                     +#' @param x A matrix or any matrix-like object (e.g., from the \pkg{Matrix} package).
                     +#'
                     +#' This can alternatively be a ScaledMatrixSeed, in which case any values of \code{center} and \code{scale} are ignored.
                     +#' @param center A numeric vector of length equal to \code{ncol(x)}, where each element is to be subtracted from the corresponding column of \code{x}.
                     +#' A \code{NULL} value indicates that no subtraction is to be performed.
                     +#' Alternatively \code{TRUE}, in which case it is set to the column means of \code{x}.
                     +#' @param scale A numeric vector of length equal to \code{ncol(x)}, where each element is to divided from the corresponding column of \code{x} (after subtraction).
                     +#' A \code{NULL} value indicates that no division is to be performed.
                     +#' Alternatively \code{TRUE}, in which case it is set to the column-wise root-mean-squared differences from \code{center}
                     +#' (interpretable as standard deviations if \code{center} is set to the column means, see \code{\link{scale}} for commentary).
                     +#'
                     +#' @return
                     +#' The \code{ScaledMatrixSeed} constructor will return a ScaledMatrixSeed object.
                     +#'
                     +#' The \code{ScaledMatrix} constructor will return a ScaledMatrix object equivalent to \code{t((t(x) - center)/scale)}.
                     +#'
                     +#' @section Methods for ScaledMatrixSeed objects:
                     +#' ScaledMatrixSeed objects are implemented as \linkS4class{DelayedMatrix} backends.
                     +#' They support standard operations like \code{dim}, \code{dimnames} and \code{extract_array}.
                     +#'
                     +#' Passing a ScaledMatrixSeed object to the \code{\link{DelayedArray}} constructor will create a ScaledMatrix object.
                     +#'
                     +#' It is possible for \code{x} to contain a ScaledMatrix, thus nesting one ScaledMatrix inside another.
                     +#' This can occasionally be useful in combination with transposition to achieve centering/scaling in both dimensions.
                     +#'
                     +#' @section Methods for ScaledMatrix objects:
                     +#' ScaledMatrix objects are derived from \linkS4class{DelayedMatrix} objects and support all of valid operations on the latter.
                     +#' Several functions are specialized for greater efficiency when operating on ScaledMatrix instances, including:
                     +#' \itemize{
                     +#'     \item Subsetting, transposition and replacement of row/column names.
                     +#'         These will return a new ScaledMatrix rather than a DelayedMatrix.
                     +#'     \item Matrix multiplication via \code{\%*\%}, \code{crossprod} and \code{tcrossprod}.
                     +#'         These functions will return a DelayedMatrix.
                     +#'     \item Calculation of row and column sums and means by \code{colSums}, \code{rowSums}, etc.
                     +#' }
                     +#'
                     +#' All other operations applied to a ScaledMatrix will use the underlying \pkg{DelayedArray} machinery.
                     +#' Unary or binary operations will generally create a new DelayedMatrix instance containing a ScaledMatrixSeed.
                     +#'
                     +#' Tranposition can effectively be used to allow centering/scaling on the rows if the input \code{x} is transposed.
                     +#'
                     +#' @section Efficiency vs precision:
                     +#' The raison d'etre of the ScaledMatrix is that it can offer faster matrix multiplication by avoiding the \pkg{DelayedArray} block processing.
                     +#' This is done by refactoring the scaling/centering operations to use the (hopefully more efficient) multiplication operator of the original matrix \code{x}.
                     +#' Unfortunately, the speed-up comes at the cost of increasing the risk of catastrophic cancellation.
                     +#' The procedure requires subtraction of one large intermediate number from another to obtain the values of the final matrix product.
                     +#' This could result in a loss of numerical precision that compromises the accuracy of downstream algorithms.
                     +#' In practice, this does not seem to be a major concern though one should be careful if the input \code{x} contains very large positive/negative values.
                     +#'
                     +#' @author
                     +#' Aaron Lun
                     +#'
                     +#' @examples
                     +#' library(Matrix)
                     +#' y <- ScaledMatrix(rsparsematrix(10, 20, 0.1),
                     +#'     center=rnorm(20), scale=1+runif(20))
                     +#' y
                     +#'
                     +#' crossprod(y)
                     +#' tcrossprod(y)
                     +#' y %*% rnorm(20)
                     +#'
                     +#' @aliases
                     +#' ScaledMatrixSeed
                     +#' ScaledMatrixSeed-class
                     +#'
                     +#' dim,ScaledMatrixSeed-method
                     +#' dimnames,ScaledMatrixSeed-method
                     +#' extract_array,ScaledMatrixSeed-method
                     +#' DelayedArray,ScaledMatrixSeed-method
                     +#' show,ScaledMatrixSeed-method
                     +#'
                     +#' ScaledMatrix
                     +#' ScaledMatrix-class
                     +#'
                     +#' dimnames<-,ScaledMatrix,ANY-method
                     +#' t,ScaledMatrix-method
                     +#' [,ScaledMatrix,ANY,ANY,ANY-method
                     +#'
                     +#' colSums,ScaledMatrix-method
                     +#' rowSums,ScaledMatrix-method
                     +#' colMeans,ScaledMatrix-method
                     +#' rowMeans,ScaledMatrix-method
                     +#'
                     +#' %*%,ANY,ScaledMatrix-method
                     +#' %*%,ScaledMatrix,ANY-method
                     +#' %*%,ScaledMatrix,ScaledMatrix-method
                     +#'
                     +#' crossprod,ScaledMatrix,missing-method
                     +#' crossprod,ScaledMatrix,ANY-method
                     +#' crossprod,ANY,ScaledMatrix-method
                     +#' crossprod,ScaledMatrix,ScaledMatrix-method
                     +#'
                     +#' tcrossprod,ScaledMatrix,missing-method
                     +#' tcrossprod,ScaledMatrix,ANY-method
                     +#' tcrossprod,ANY,ScaledMatrix-method
                     +#' tcrossprod,ScaledMatrix,ScaledMatrix-method
                     +#'
                     +#' @docType class
                     +#' @name ScaledMatrix
                     +NULL
+                    +
                     +#' @export
                     +#' @rdname ScaledMatrix
                     +#' @importFrom DelayedArray DelayedArray
                     +#' @importFrom Matrix colMeans rowSums t
                     +ScaledMatrix <- function(x, center=NULL, scale=NULL) {
                     +    if (isTRUE(center)) {
                     +        center <- colMeans(x)
                     +    }
                     +    if (isTRUE(scale)) {
                     +        tx <- t(DelayedArray(x))
                     +        if (!is.null(center)) {
                     +            tx <- tx - center
                     +        }
                     +        ss <- rowSums(tx^2)
                     +        scale <- sqrt(ss / (nrow(x) - 1))
                     +    }
                     +    DelayedArray(ScaledMatrixSeed(x, center=center, scale=scale))
                     +}
+                    +
                     +#' @export
                     +#' @importFrom DelayedArray DelayedArray new_DelayedArray
                     +setMethod("DelayedArray", "ScaledMatrixSeed",
                     +    function(seed) new_DelayedArray(seed, Class="ScaledMatrix")
                     +)
+                    +
                     +###################################
                     +# Overridden utilities from DelayedArray, for efficiency.
+                    +
                     +#' @export
                     +#' @importFrom DelayedArray DelayedArray seed
                     +setReplaceMethod("dimnames", "ScaledMatrix", function(x, value) {
                     +    DelayedArray(rename_ScaledMatrixSeed(seed(x), value))
                     +})
+                    +
                     +#' @export
                     +#' @importFrom DelayedArray DelayedArray seed
                     +setMethod("t", "ScaledMatrix", function(x) {
                     +    DelayedArray(transpose_ScaledMatrixSeed(seed(x)))
                     +})
+                    +
                     +#' @export
                     +#' @importFrom DelayedArray DelayedArray seed
                     +setMethod("[", "ScaledMatrix", function(x, i, j, ..., drop=TRUE) {
                     +    if (missing(i)) i <- NULL
                     +    if (missing(j)) j <- NULL
                     +    out <- DelayedArray(subset_ScaledMatrixSeed(seed(x), i=i, j=j))
+                    +
                     +    if (drop && any(dim(out)==1L)) {
                     +        return(drop(out))
                     +    }
                     +    out
                     +})
+                    +
                     +###################################
                     +# Basic matrix stats.
+                    +
                     +#' @export
                     +#' @importFrom Matrix colSums rowSums drop
                     +setMethod("colSums", "ScaledMatrix", function(x, na.rm = FALSE, dims = 1L) {
                     +    if (is_transposed(seed(x))) {
                     +        return(rowSums(t(x)))
                     +    }
+                    +
                     +    out <- rep(1, nrow(x)) %*% x
                     +    out <- drop(out)
                     +    names(out) <- colnames(x)
                     +    out
                     +})
+                    +
                     +#' @export
                     +#' @importFrom Matrix colSums rowSums drop
                     +setMethod("rowSums", "ScaledMatrix", function(x, na.rm = FALSE, dims = 1L) {
                     +    if (is_transposed(seed(x))) {
                     +        return(colSums(t(x)))
                     +    }
+                    +
                     +    out <- x %*% rep(1, ncol(x))
                     +    out <- drop(out)
                     +    names(out) <- rownames(x)
                     +    out
                     +})
+                    +
                     +#' @export
                     +#' @importFrom Matrix colMeans colSums
                     +setMethod("colMeans", "ScaledMatrix", function(x, na.rm = FALSE, dims = 1L) colSums(x)/nrow(x))
+                    +
                     +#' @export
                     +#' @importFrom Matrix rowMeans rowSums
                     +setMethod("rowMeans", "ScaledMatrix", function(x, na.rm = FALSE, dims = 1L) rowSums(x)/ncol(x))

R/ScaledMatrixSeed.R

History View file @ 508ea1b

                     new file mode 100644
@@ -0,0 +1,165 @@
                     +#' @export
                     +#' @importFrom methods new is
                     +ScaledMatrixSeed <- function(x, center=NULL, scale=NULL) {
                     +    if (missing(x)) {
                     +        x <- matrix(0, 0, 0)
                     +    } else if (is(x, "ScaledMatrixSeed")) {
                     +        return(x)
                     +    }
+                    +
                     +    use_center <- !is.null(center)
                     +    use_scale <- !is.null(scale)
                     +    new("ScaledMatrixSeed", .matrix=x, center=as.numeric(center), scale=as.numeric(scale), use_center=use_center, use_scale=use_scale, transposed=FALSE)
                     +}
+                    +
                     +#' @importFrom S4Vectors setValidity2
                     +setValidity2("ScaledMatrixSeed", function(object) {
                     +    msg <- character(0)
+                    +
                     +    # Checking scalars.
                     +    if (length(use_center(object))!=1L) {
                     +        msg <- c(msg, "'use_center' must be a logical scalar")
                     +    }
                     +    if (length(use_scale(object))!=1L) {
                     +        msg <- c(msg, "'use_scale' must be a logical scalar")
                     +    }
                     +    if (length(is_transposed(object))!=1L) {
                     +        msg <- c(msg, "'transposed' must be a logical scalar")
                     +    }
+                    +
                     +    # Checking vectors.
                     +    if (use_center(object) && length(get_center(object))!=ncol(object)) {
                     +        msg <- c(msg, "length of 'center' must equal 'ncol(object)'")
                     +    }
                     +    if (use_scale(object) && length(get_scale(object))!=ncol(object)) {
                     +        msg <- c(msg, "length of 'scale' must equal 'ncol(object)'")
                     +    }
+                    +
                     +    if (length(msg)) {
                     +        return(msg)
                     +    }
                     +    return(TRUE)
                     +})
+                    +
                     +#' @export
                     +#' @importFrom methods show
                     +setMethod("show", "ScaledMatrixSeed", function(object) {
                     +    cat(sprintf("%i x %i ScaledMatrixSeed object", nrow(object), ncol(object)),
                     +        sprintf("representation: %s", class(get_matrix2(object))),
                     +        sprintf("centering: %s", if (use_center(object)) "yes" else "no"),
                     +        sprintf("scaling: %s", if (use_scale(object)) "yes" else "no"),
                     +    sep="\n")
                     +})
+                    +
                     +###################################
                     +# Internal getters.
+                    +
                     +get_matrix2 <- function(x) [email protected]
+                    +
                     +get_center <- function(x) x@center
+                    +
                     +get_scale <- function(x) x@scale
+                    +
                     +use_center <- function(x) x@use_center
+                    +
                     +use_scale <- function(x) x@use_scale
+                    +
                     +is_transposed <- function(x) x@transposed
+                    +
                     +###################################
                     +# DelayedArray support utilities.
+                    +
                     +#' @export
                     +setMethod("dim", "ScaledMatrixSeed", function(x) {
                     +    d <- dim(get_matrix2(x))
                     +    if (is_transposed(x)) { d <- rev(d) }
                     +    d
                     +})
+                    +
                     +#' @export
                     +setMethod("dimnames", "ScaledMatrixSeed", function(x) {
                     +    d <- dimnames(get_matrix2(x))
                     +    if (is_transposed(x)) { d <- rev(d) }
                     +    d
                     +})
+                    +
                     +#' @export
                     +#' @importFrom DelayedArray extract_array
                     +setMethod("extract_array", "ScaledMatrixSeed", function(x, index) {
                     +    x2 <- subset_ScaledMatrixSeed(x, index[[1]], index[[2]])
                     +    realize_ScaledMatrixSeed(x2)
                     +})
+                    +
                     +###################################
                     +# Other utilities.
+                    +
                     +rename_ScaledMatrixSeed <- function(x, value) {
                     +    if (is_transposed(x)) value <- rev(value)
                     +    dimnames([email protected]) <- value
                     +    x
                     +}
+                    +
                     +transpose_ScaledMatrixSeed <- function(x) {
                     +    x@transposed <- !is_transposed(x)
                     +    x
                     +}
+                    +
                     +#' @importFrom Matrix t
                     +#' @importFrom methods is
                     +realize_ScaledMatrixSeed <- function(x, ...) {
                     +    out <- get_matrix2(x)
+                    +
                     +    if (use_scale(x) || use_center(x)) {
                     +        if (is(out, "ScaledMatrix")) {
                     +            # Any '-' and '/' would collapse this to a DelayedArray,
                     +            # which would then call extract_array, which would then
                     +            # call realize_ScaledMatrixSeed, forming an infinite loop.
                     +            # So we might as well realize it now.
                     +            out <- realize_ScaledMatrixSeed(seed(out))
                     +        }
+                    +
                     +        out <- t(out)
                     +        if (use_center(x)) {
                     +            out <- out - get_center(x)
                     +        }
                     +        if (use_scale(x)) {
                     +            out <- out / get_scale(x)
                     +        }
+                    +
                     +        if (!is_transposed(x)) out <- t(out)
                     +    } else {
                     +        if (is_transposed(x)) out <- t(out)
                     +    }
+                    +
                     +    as.matrix(out)
                     +}
+                    +
                     +subset_ScaledMatrixSeed <- function(x, i, j) {
                     +    if (is_transposed(x)) {
                     +        x2 <- transpose_ScaledMatrixSeed(x)
                     +        x2 <- subset_ScaledMatrixSeed(x2, i=j, j=i)
                     +        return(transpose_ScaledMatrixSeed(x2))
                     +    }
+                    +
                     +    if (!is.null(i)) {
                     +        [email protected] <- get_matrix2(x)[i,,drop=FALSE]
                     +    }
+                    +
                     +    if (!is.null(j)) {
                     +        if (is.character(j)) {
                     +            j <- match(j, colnames(x))
                     +        }
+                    +
                     +        [email protected] <- get_matrix2(x)[,j,drop=FALSE]
+                    +
                     +        if (use_scale(x)) {
                     +            x@scale <- get_scale(x)[j]
                     +        }
+                    +
                     +        if (use_center(x)) {
                     +            x@center <- get_center(x)[j]
                     +        }
                     +    }
+                    +
                     +    return(x)
                     +}

R/multiplication.R

History View file @ 508ea1b

                     new file mode 100644
@@ -0,0 +1,634 @@
                     +# We attempt to use operators defined for '.matrix' in the 'ScaledMatrixSeed'.
                     +# This avoids expensive modifications such as loss of sparsity.
                     +# Centering and scaling are factored out into separate operations.
                     +#
                     +# We assume that the non-'ScaledMatrix' argument is small and can be modified cheaply.
                     +# We also assume that the matrix product is small and can be modified cheaply.
                     +# This allows centering and scaling to be applied *after* multiplication.
                     +#
                     +# Here are some ground rules for how these functions must work:
                     +#
                     +#  - NO arithmetic operations shall be applied to a ScaledMatrix.
                     +#    This includes nested ScaledMatrices that are present in '.matrix'.
                     +#    Such operations collapses the ScaledMatrix to a DelayedMatrix,
                     +#    resulting in slow block processing during multiplication.
                     +#
                     +#  - NO addition/subtraction operations shall be applied to '.matrix'.
                     +#    This is necessary to avoid loss of sparsity for sparse '.matrix',
                     +#    as well as to avoid block processing for ScaledMatrix '.matrix'.
                     +#
                     +#  - NO division/multiplication operations should be applied to '.matrix'.
                     +#    This is largely a consequence of the first point above.
                     +#    Exceptions are only allowed when this is unavoidable, e.g., in '.internal_tcrossprod'.
                     +#
                     +#  - NO calling of %*% or (t)crossprod on a ScaledMatrix of the same nesting depth as an input ScaledMatrix.
                     +#    Internal multiplication should always be applied to '.matrix', to avoid infinite S4 recursion.
                     +#    Each method call should strip away one nesting level, i.e., operate on the seed.
                     +#    Exceptions are allowed for dual ScaledMatrix multiplication,
                     +#    where one argument is allowed to be of the same depth.
+                    +
                     +#' @export
                     +#' @importFrom Matrix t
                     +#' @importFrom DelayedArray seed DelayedArray
                     +setMethod("%*%", c("ScaledMatrix", "ANY"), function(x, y) {
                     +    x_seed <- seed(x)
                     +    if (is_transposed(x_seed)) {
                     +        out <- t(.leftmult_ScaledMatrix(t(y), x_seed))
                     +    } else {
                     +        out <- .rightmult_ScaledMatrix(x_seed, y)
                     +    }
                     +    DelayedArray(out)
                     +})
+                    +
                     +#' @importFrom DelayedArray sweep
                     +.rightmult_ScaledMatrix <- function(x_seed, y) {
                     +    if (use_scale(x_seed)) {
                     +        y <- y / get_scale(x_seed)
                     +    }
+                    +
                     +    out <- as.matrix(get_matrix2(x_seed) %*% y)
+                    +
                     +    if (use_center(x_seed)) {
                     +        out <- sweep(out, 2, as.numeric(get_center(x_seed) %*% y), "-", check.margin=FALSE)
                     +    }
+                    +
                     +    out
                     +}
+                    +
                     +#' @export
                     +#' @importFrom Matrix t
                     +#' @importFrom DelayedArray seed DelayedArray
                     +setMethod("%*%", c("ANY", "ScaledMatrix"), function(x, y) {
                     +    y_seed <- seed(y)
                     +    if (is_transposed(y_seed)) {
                     +        if (!is.null(dim(x))) {
                     +            # Vectors don't quite behave as 1-column matrices here.
                     +            # so we need to be a bit more careful.
                     +            x <- t(x)
                     +        }
                     +        out <- t(.rightmult_ScaledMatrix(y_seed, x))
                     +    } else {
                     +        out <- .leftmult_ScaledMatrix(x, y_seed)
                     +    }
                     +    DelayedArray(out)
                     +})
+                    +
                     +#' @importFrom Matrix rowSums
                     +#' @importFrom DelayedArray sweep
                     +.leftmult_ScaledMatrix <- function(x, y_seed) {
                     +    out <- as.matrix(x %*% get_matrix2(y_seed))
+                    +
                     +    if (use_center(y_seed)) {
                     +        if (is.null(dim(x))) {
                     +            out <- out - get_center(y_seed) * sum(x)
                     +        } else {
                     +            out <- out - outer(rowSums(x), get_center(y_seed), "*")
                     +        }
                     +    }
+                    +
                     +    if (use_scale(y_seed)) {
                     +        out <- sweep(out, 2, get_scale(y_seed), "/", check.margin=FALSE)
                     +    }
+                    +
                     +    out
                     +}
+                    +
                     +#' @export
                     +#' @importFrom DelayedArray seed DelayedArray
                     +setMethod("%*%", c("ScaledMatrix", "ScaledMatrix"), function(x, y) {
                     +    x_seed <- seed(x)
                     +    y_seed <- seed(y)
                     +    res <- .dual_mult_dispatcher(x_seed, y_seed, is_transposed(x_seed), is_transposed(y_seed))
                     +    DelayedArray(res)
                     +})
+                    +
                     +#' @importFrom Matrix t
                     +.dual_mult_dispatcher <- function(x_seed, y_seed, x_trans, y_trans) {
                     +    if (!x_trans) {
                     +        if (!y_trans) {
                     +            res <- .multiply_u2u(x_seed, y_seed)
                     +        } else {
                     +            res <- .multiply_u2t(x_seed, y_seed)
                     +        }
                     +    } else {
                     +        if (!y_trans) {
                     +            res <- .multiply_t2u(x_seed, y_seed)
                     +        } else {
                     +            res <- .multiply_u2u(y_seed, x_seed)
                     +            res <- t(res)
                     +        }
                     +    }
                     +    res
                     +}
+                    +
                     +###################################
                     +# ScMat %*% ScMat utilities.
+                    +
                     +# We do not implement ScMat %*% ScMat in terms of left/right %*%.
                     +# This would cause scaling to be applied on one of the ScMats,
                     +# collapsing it into a DelayedMatrix. Subsequent multiplication
                     +# would use block processing, which would be too slow.
+                    +
                     +#' @importFrom Matrix drop rowSums
                     +#' @importFrom DelayedArray sweep
                     +.multiply_u2u <- function(x_seed, y_seed)
                     +# Considering the problem of (X - C_x)S_x (Y - C_y)S_y.
                     +{
                     +    # Computing X S_x Y S_y
                     +    x0 <- get_matrix2(x_seed)
                     +    if (use_scale(x_seed)) {
                     +        x0 <- ScaledMatrix(x0, scale=get_scale(x_seed))
                     +    }
+                    +
                     +    result <- as.matrix(x0 %*% get_matrix2(y_seed))
                     +    if (use_scale(y_seed)) {
                     +        result <- sweep(result, 2, get_scale(y_seed), "/", check.margin=FALSE)
                     +    }
+                    +
                     +    # Computing C_x S_x Y S_y, and subtracting it from 'result'.
                     +    if (use_center(x_seed)) {
                     +        x.center <- get_center(x_seed)
                     +        if (use_scale(x_seed)) {
                     +            x.center <- x.center / get_scale(x_seed)
                     +        }
+                    +
                     +        component2 <- drop(x.center %*% get_matrix2(y_seed))
                     +        if (use_scale(y_seed)) {
                     +            component2 <- component2 / get_scale(y_seed)
                     +        }
+                    +
                     +        result <- sweep(result, 2, component2, "-", check.margin=FALSE)
                     +    }
+                    +
                     +    # Computing C_x S_x C_y S_y, and adding it to 'result'.
                     +    if (use_center(x_seed) && use_center(y_seed)) {
                     +        x.center <- get_center(x_seed)
                     +        if (use_scale(x_seed)) {
                     +            x.center <- x.center / get_scale(x_seed)
                     +        }
+                    +
                     +        y.center <- get_center(y_seed)
                     +        if (use_scale(y_seed)) {
                     +            y.center <- y.center / get_scale(y_seed)
                     +        }
+                    +
                     +        component4 <- sum(x.center) * y.center
                     +        result <- sweep(result, 2, component4, "+", check.margin=FALSE)
                     +    }
+                    +
                     +    # Computing X S_x C_y S_y, and subtracting it from 'result'.
                     +    # This is done last to avoid subtracting large values.
                     +    if (use_center(y_seed)) {
                     +        y.center <- get_center(y_seed)
                     +        if (use_scale(y_seed)) {
                     +            y.center <- y.center / get_scale(y_seed)
                     +        }
+                    +
                     +        component3 <- outer(rowSums(x0), y.center)
                     +        result <- result - component3
                     +    }
+                    +
                     +    result
                     +}
+                    +
                     +#' @importFrom Matrix tcrossprod drop
                     +#' @importFrom DelayedArray sweep
                     +.multiply_u2t <- function(x_seed, y_seed)
                     +# Considering the problem of (X - C_x)S_x S_y(Y' - C_y')
                     +{
                     +    # Computing X S_x S_y Y'
                     +    x0 <- get_matrix2(x_seed)
                     +    if (use_scale(x_seed) || use_scale(y_seed)) {
                     +        scaling <- 1
                     +        if (use_scale(x_seed)) {
                     +            scaling <- scaling * get_scale(x_seed)
                     +        }
                     +        if (use_scale(y_seed)) {
                     +            scaling <- scaling * get_scale(y_seed)
                     +        }
                     +        x0 <- ScaledMatrix(x0, scale=scaling)
                     +    }
                     +    result <- as.matrix(tcrossprod(x0, get_matrix2(y_seed)))
+                    +
                     +    # Computing C_x S_x S_y Y', and subtracting it from 'result'.
                     +    if (use_center(x_seed)) {
                     +        x.center <- get_center(x_seed)
                     +        if (use_scale(x_seed)) {
                     +            x.center <- x.center / get_scale(x_seed)
                     +        }
                     +        if (use_scale(y_seed)) {
                     +            x.center <- x.center / get_scale(y_seed)
                     +        }
+                    +
                     +        component2 <- drop(tcrossprod(x.center, get_matrix2(y_seed)))
                     +        result <- sweep(result, 2, component2, "-", check.margin=FALSE)
                     +    }
+                    +
                     +    # Computing C_x S_x S_y C_y', and adding it to 'result'.
                     +    if (use_center(x_seed) && use_center(y_seed)) {
                     +        x.center <- get_center(x_seed)
                     +        if (use_scale(x_seed)) {
                     +            x.center <- x.center / get_scale(x_seed)
                     +        }
+                    +
                     +        y.center <- get_center(y_seed)
                     +        if (use_scale(y_seed)) {
                     +            y.center <- y.center / get_scale(y_seed)
                     +        }
+                    +
                     +        component4 <- sum(x.center*y.center)
                     +        result <- result + component4
                     +    }
+                    +
                     +    # Computing X S_x S_y C_y', and subtracting it from 'result'.
                     +    # This is done last to avoid subtracting large values.
                     +    if (use_center(y_seed)) {
                     +        component3 <- drop(x0 %*% get_center(y_seed))
                     +        result <- result - component3
                     +    }
+                    +
                     +    result
                     +}
+                    +
                     +#' @importFrom Matrix crossprod colSums
                     +#' @importFrom DelayedArray sweep
                     +.multiply_t2u <- function(x_seed, y_seed)
                     +# Considering the problem of S_x(X' - C_x') (Y - C_y)S_y
                     +{
                     +    # C mputing X' Y
                     +    x0 <- get_matrix2(x_seed)
                     +    y0 <- get_matrix2(y_seed)
                     +    result <- as.matrix(crossprod(x0, y0))
+                    +
                     +    # Computing C_x' Y, and subtracting it from 'result'.
                     +    if (use_center(x_seed)) {
                     +        x.center <- get_center(x_seed)
                     +        component2 <- outer(x.center, colSums(y0))
                     +        result <- result - component2
                     +    }
+                    +
                     +    # Computing C_x' C_y, and adding it to 'result'.
                     +    if (use_center(x_seed) && use_center(y_seed)) {
                     +        x.center <- get_center(x_seed)
                     +        y.center <- get_center(y_seed)
                     +        component4 <- outer(x.center, y.center) * nrow(y0)
                     +        result <- result + component4
                     +    }
+                    +
                     +    # Computing X' C_y, and subtracting it from 'result'.
                     +    # This is done last to avoid subtracting large values.
                     +    if (use_center(y_seed)) {
                     +        component3 <- outer(colSums(x0), get_center(y_seed))
                     +        result <- result - component3
                     +    }
+                    +
                     +    if (use_scale(x_seed)) {
                     +        result <- result / get_scale(x_seed)
                     +    }
                     +    if (use_scale(y_seed)) {
                     +        result <- sweep(result, 2, get_scale(y_seed), "/", check.margin=FALSE)
                     +    }
+                    +
                     +    result
                     +}
+                    +
                     +###################################
                     +# Cross-product.
+                    +
                     +# Technically, we could implement this in terms of '%*%',
                     +# but we use specializations to exploit native crossprod() for '.matrix',
                     +# which is probably more efficient.
+                    +
                     +#' @export
                     +#' @importFrom Matrix crossprod
                     +#' @importFrom DelayedArray seed DelayedArray
                     +setMethod("crossprod", c("ScaledMatrix", "missing"), function(x, y) {
                     +    x_seed <- seed(x)
                     +    if (is_transposed(x_seed)) {
                     +        # No need to t(), the output is symmetric anyway.
                     +        out <- .tcp_ScaledMatrix(x_seed)
                     +    } else {
                     +        out <- .cross_ScaledMatrix(x_seed)
                     +    }
+                    +
                     +    DelayedArray(out)
                     +})
+                    +
                     +#' @importFrom Matrix crossprod colSums
                     +#' @importFrom DelayedArray sweep
                     +.cross_ScaledMatrix <- function(x_seed) {
                     +    x0 <- get_matrix2(x_seed)
                     +    out <- as.matrix(crossprod(x0))
+                    +
                     +    if (use_center(x_seed)) {
                     +        centering <- get_center(x_seed)
                     +        colsums <- colSums(x0)
+                    +
                     +        # Minus, then add, then minus, to mitigate cancellation.
                     +        out <- out - outer(centering, colsums)
                     +        out <- out + outer(centering, centering) * nrow(x0)
                     +        out <- out - outer(colsums, centering)
                     +    }
+                    +
                     +    if (use_scale(x_seed)) {
                     +        scaling <- get_scale(x_seed)
                     +        out <- sweep(out / scaling, 2, scaling, "/", check.margin=FALSE)
                     +    }
                     +    out
                     +}
+                    +
                     +#' @export
                     +#' @importFrom Matrix crossprod
                     +#' @importFrom DelayedArray seed DelayedArray
                     +setMethod("crossprod", c("ScaledMatrix", "ANY"), function(x, y) {
                     +    x_seed <- seed(x)
                     +    if (is_transposed(x_seed)) {
                     +        out <- .rightmult_ScaledMatrix(x_seed, y)
                     +    } else {
                     +        out <- .rightcross_ScaledMatrix(x_seed, y)
                     +    }
                     +    DelayedArray(out)
                     +})
+                    +
                     +#' @importFrom Matrix crossprod colSums
                     +.rightcross_ScaledMatrix <- function(x_seed, y) {
                     +    out <- as.matrix(crossprod(get_matrix2(x_seed), y))
+                    +
                     +    if (use_center(x_seed)) {
                     +        if (is.null(dim(y))) {
                     +            out <- out - get_center(x_seed) * sum(y)
                     +        } else {
                     +            out <- out - outer(get_center(x_seed), colSums(y))
                     +        }
                     +    }
+                    +
                     +    if (use_scale(x_seed)) {
                     +        out <- out / get_scale(x_seed)
                     +    }
+                    +
                     +    out
                     +}
+                    +
                     +#' @export
                     +#' @importFrom Matrix crossprod
                     +#' @importFrom DelayedArray seed DelayedArray
                     +setMethod("crossprod", c("ANY", "ScaledMatrix"), function(x, y) {
                     +    y_seed <- seed(y)
                     +    if (is_transposed(y_seed)) {
                     +        out <- t(.rightmult_ScaledMatrix(y_seed, x))
                     +    } else {
                     +        out <- .leftcross_ScaledMatrix(x, y_seed)
                     +    }
                     +    DelayedArray(out)
                     +})
+                    +
                     +#' @importFrom Matrix crossprod colSums
                     +#' @importFrom DelayedArray sweep
                     +.leftcross_ScaledMatrix <- function(x, y_seed) {
                     +    out <- as.matrix(crossprod(x, get_matrix2(y_seed)))
+                    +
                     +    if (use_center(y_seed)) {
                     +        if (is.null(dim(x))) {
                     +            out <- sweep(out, 2, sum(x) * get_center(y_seed), "-", check.margin=FALSE)
                     +        } else {
                     +            out <- out - outer(colSums(x), get_center(y_seed))
                     +        }
                     +    }
+                    +
                     +    if (use_scale(y_seed)) {
                     +        out <- sweep(out, 2, get_scale(y_seed), "/", check.margin=FALSE)
                     +    }
+                    +
                     +    out
                     +}
+                    +
                     +#' @export
                     +#' @importFrom Matrix crossprod
                     +#' @importFrom DelayedArray DelayedArray seed
                     +setMethod("crossprod", c("ScaledMatrix", "ScaledMatrix"), function(x, y) {
                     +    x_seed <- seed(x)
                     +    y_seed <- seed(y)
                     +    res <- .dual_mult_dispatcher(x_seed, y_seed, !is_transposed(x_seed), is_transposed(y_seed))
                     +    DelayedArray(res)
                     +})
+                    +
                     +###################################
                     +# Transposed cross-product.
+                    +
                     +# Technically, we could implement this in terms of '%*%',
                     +# but we use specializations to exploit native tcrossprod() for '.matrix',
                     +# which is probably more efficient.
+                    +
                     +#' @export
                     +#' @importFrom Matrix tcrossprod
                     +#' @importFrom DelayedArray seed DelayedArray sweep
                     +setMethod("tcrossprod", c("ScaledMatrix", "missing"), function(x, y) {
                     +    x_seed <- seed(x)
                     +    if (is_transposed(x_seed)) {
                     +        out <- .cross_ScaledMatrix(x_seed)
                     +    } else {
                     +        out <- .tcp_ScaledMatrix(x_seed)
                     +    }
                     +    DelayedArray(out)
                     +})
+                    +
                     +#' @importFrom Matrix tcrossprod
                     +.tcp_ScaledMatrix <- function(x_seed) {
                     +    x0 <- get_matrix2(x_seed)
+                    +
                     +    if (use_scale(x_seed)) {
                     +        out <- as.matrix(.internal_tcrossprod(x0, get_scale(x_seed)))
                     +    } else {
                     +        out <- as.matrix(tcrossprod(x0))
                     +    }
+                    +
                     +    if (use_center(x_seed)) {
                     +        centering <- get_center(x_seed)
+                    +
                     +        if (use_scale(x_seed)) {
                     +            centering <- centering / get_scale(x_seed)
                     +            extra <- centering / get_scale(x_seed)
                     +        } else {
                     +            extra <- centering
                     +        }
+                    +
                     +        # With scaling, the use of 'extra' mimics sweep(x0, 2, get_scale(x), "/"),
                     +        # except that the scaling is applied to 'centering' rather than directly to 'x0'.
                     +        # Without scaling, 'extra' and 'centering' are interchangeable.
                     +        component <- tcrossprod(extra, x0)
+                    +
                     +        # Minus, then add, then minus, to mitigate cancellation.
                     +        out <- sweep(out, 2, as.numeric(component), "-", check.margin=FALSE)
                     +        out <- out + sum(centering^2)
                     +        out <- out - as.numeric(x0 %*% extra)
                     +    }
+                    +
                     +    out
                     +}
+                    +
                     +#' @export
                     +#' @importFrom Matrix tcrossprod t
                     +#' @importFrom DelayedArray seed DelayedArray sweep
                     +setMethod("tcrossprod", c("ScaledMatrix", "ANY"), function(x, y) {
                     +    if (is.null(dim(y))) { # for consistency with base::tcrossprod.
                     +        stop("non-conformable arguments")
                     +    }
+                    +
                     +    x_seed <- seed(x)
                     +    if (is_transposed(x_seed)) {
                     +        out <- t(.leftmult_ScaledMatrix(y, x_seed))
                     +    } else {
                     +        out <- .righttcp_ScaledMatrix(x_seed, y)
                     +    }
                     +    DelayedArray(out)
                     +})
+                    +
                     +#' @importFrom Matrix tcrossprod
                     +.righttcp_ScaledMatrix <- function(x_seed, y) {
                     +    if (use_scale(x_seed)) {
                     +        # 'y' cannot be a vector anymore, due to the check above.
                     +        y <- sweep(y, 2, get_scale(x_seed), "/", check.margin=FALSE)
                     +    }
+                    +
                     +    out <- as.matrix(tcrossprod(get_matrix2(x_seed), y))
+                    +
                     +    if (use_center(x_seed)) {
                     +        out <- sweep(out, 2, as.numeric(tcrossprod(get_center(x_seed), y)), "-", check.margin=FALSE)
                     +    }
+                    +
                     +    out
                     +}
+                    +
                     +#' @export
                     +#' @importFrom Matrix tcrossprod t
                     +#' @importFrom DelayedArray seed DelayedArray
                     +setMethod("tcrossprod", c("ANY", "ScaledMatrix"), function(x, y) {
                     +    y_seed <- seed(y)
                     +    if (is_transposed(y_seed)) {
                     +        out <- .leftmult_ScaledMatrix(x, y_seed)
                     +    } else {
                     +        out <- .lefttcp_ScaledMatrix(x, y_seed)
                     +    }
                     +    DelayedArray(out)
                     +})
+                    +
                     +#' @importFrom Matrix tcrossprod
                     +.lefttcp_ScaledMatrix <- function(x, y_seed) {
                     +    if (use_scale(y_seed)) {
                     +        if (is.null(dim(x))) {
                     +            x <- x / get_scale(y_seed)
                     +        } else {
                     +            x <- sweep(x, 2, get_scale(y_seed), "/", check.margin=FALSE)
                     +        }
                     +    }
+                    +
                     +    out <- as.matrix(tcrossprod(x, get_matrix2(y_seed)))
+                    +
                     +    if (use_center(y_seed)) {
                     +        out <- out - as.numeric(x %*% get_center(y_seed))
                     +    }
                     +    out
                     +}
+                    +
                     +#' @export
                     +#' @importFrom Matrix tcrossprod
                     +#' @importFrom DelayedArray DelayedArray seed
                     +setMethod("tcrossprod", c("ScaledMatrix", "ScaledMatrix"), function(x, y) {
                     +    x_seed <- seed(x)
                     +    y_seed <- seed(y)
                     +    res <- .dual_mult_dispatcher(x_seed, y_seed, is_transposed(x_seed), !is_transposed(y_seed))
                     +    DelayedArray(res)
                     +})
+                    +
                     +###################################
                     +# Extra code for corner-case calculations of the transposed cross-product.
+                    +
                     +#' @importFrom DelayedArray seed DelayedArray
                     +.update_scale <- function(x, s) {
                     +    x_seed <- seed(x)
                     +    if (use_scale(x_seed)) {
                     +        s <- s * get_scale(x_seed)
                     +    }
                     +    x_seed@scale <- s
                     +    x_seed@use_scale <- TRUE
                     +    DelayedArray(x_seed)
                     +}
+                    +
                     +#' @importFrom Matrix tcrossprod
                     +#' @importFrom methods is
                     +#' @importFrom DelayedArray seed
                     +.internal_tcrossprod <- function(x, scale.)
                     +# Computes tcrossprod(sweep(x, 2, scale, "/")) when 'x' is a matrix-like object.
                     +# 'scale' can be assumed to be non-NULL here.
                     +# This will always return a dense ordinary matrix.
                     +{
                     +    if (!is(x, "ScaledMatrix")) {
                     +        x <- sweep(x, 2, scale., "/", check.margin=FALSE)
                     +        return(as.matrix(tcrossprod(x)))
                     +    }
+                    +
                     +    x_seed <- seed(x)
                     +    if (!is_transposed(x_seed)) {
                     +        x <- .update_scale(x, scale.)
                     +        return(as.matrix(tcrossprod(x)))
                     +    }
+                    +
                     +    inner <- get_matrix2(x_seed)
                     +    if (is(inner, "ScaledMatrix")) {
                     +        if (is_transposed(seed(inner))) {
                     +            component1 <- as.matrix(crossprod(.update_scale(inner, scale.)))
                     +        } else {
                     +            component1 <- .internal_tcrossprod(t(inner), scale.) # recurses.
                     +        }
                     +    } else {
                     +        component1 <- as.matrix(crossprod(inner/scale.))
                     +    }
+                    +
                     +    if (use_center(x_seed)) {
                     +        centering <- get_center(x_seed)
                     +        component2 <- .internal_mult_special(centering, scale., inner)
                     +        component3 <- t(component2)
                     +        component4 <- outer(centering, centering) * sum(1/scale.^2)
                     +        final <- (component1 - component2) + (component4 - component3)
                     +    } else {
                     +        final <- component1
                     +    }
+                    +
                     +    if (use_scale(x_seed)) {
                     +        x.scale <- get_scale(x_seed)
                     +        final <- final / x.scale
                     +        final <- sweep(final, 2, x.scale, "/", check.margin=FALSE)
                     +    }
+                    +
                     +    final
                     +}
+                    +
                     +#' @importFrom methods is
                     +#' @importFrom DelayedArray seed
                     +.internal_mult_special <- function(center, scale., Z)
                     +# Computes C^T * S^2 * Z where C is a matrix of 'centers' copied byrow=TRUE;
                     +# S is a diagonal matrix filled with '1/scale'; and 'Z' is a ScaledMatrix.
                     +# This will always return a dense ordinary matrix.
                     +{
                     +    if (!is(Z, "ScaledMatrix")) {
                     +        return(outer(center, colSums(Z / scale.^2)))
                     +    }
+                    +
                     +    Z_seed <- seed(Z)
                     +    if (is_transposed(Z_seed)) {
                     +        Z <- .update_scale(Z, scale.^2)
                     +        return(outer(center, colSums(Z)))
                     +    }
+                    +
                     +    output <- .internal_mult_special(center, scale., get_matrix2(Z_seed)) # recurses.
+                    +
                     +    if (use_center(Z_seed)) {
                     +        output <- output - outer(center, get_center(Z_seed)) * sum(1/scale.^2)
                     +    }
+                    +
                     +    if (use_scale(Z_seed)) {
                     +        output <- sweep(output, 2, get_scale(Z_seed), "/")
                     +    }
+                    +
                     +    output
                     +}

man/ScaledMatrix.Rd

History View file @ 508ea1b

                     new file mode 100644
@@ -0,0 +1,112 @@
                     +% Generated by roxygen2: do not edit by hand
                     +% Please edit documentation in R/ScaledMatrix.R
                     +\docType{class}
                     +\name{ScaledMatrix}
                     +\alias{ScaledMatrix}
                     +\alias{ScaledMatrixSeed}
                     +\alias{ScaledMatrixSeed-class}
                     +\alias{dim,ScaledMatrixSeed-method}
                     +\alias{dimnames,ScaledMatrixSeed-method}
                     +\alias{extract_array,ScaledMatrixSeed-method}
                     +\alias{DelayedArray,ScaledMatrixSeed-method}
                     +\alias{show,ScaledMatrixSeed-method}
                     +\alias{ScaledMatrix-class}
                     +\alias{dimnames<-,ScaledMatrix,ANY-method}
                     +\alias{t,ScaledMatrix-method}
                     +\alias{[,ScaledMatrix,ANY,ANY,ANY-method}
                     +\alias{colSums,ScaledMatrix-method}
                     +\alias{rowSums,ScaledMatrix-method}
                     +\alias{colMeans,ScaledMatrix-method}
                     +\alias{rowMeans,ScaledMatrix-method}
                     +\alias{\%*\%,ANY,ScaledMatrix-method}
                     +\alias{\%*\%,ScaledMatrix,ANY-method}
                     +\alias{\%*\%,ScaledMatrix,ScaledMatrix-method}
                     +\alias{crossprod,ScaledMatrix,missing-method}
                     +\alias{crossprod,ScaledMatrix,ANY-method}
                     +\alias{crossprod,ANY,ScaledMatrix-method}
                     +\alias{crossprod,ScaledMatrix,ScaledMatrix-method}
                     +\alias{tcrossprod,ScaledMatrix,missing-method}
                     +\alias{tcrossprod,ScaledMatrix,ANY-method}
                     +\alias{tcrossprod,ANY,ScaledMatrix-method}
                     +\alias{tcrossprod,ScaledMatrix,ScaledMatrix-method}
                     +\title{The ScaledMatrix class}
                     +\usage{
                     +ScaledMatrix(x, center = NULL, scale = NULL)
                     +}
                     +\arguments{
                     +\item{x}{A matrix or any matrix-like object (e.g., from the \pkg{Matrix} package).
+                    +
                     +This can alternatively be a ScaledMatrixSeed, in which case any values of \code{center} and \code{scale} are ignored.}
+                    +
                     +\item{center}{A numeric vector of length equal to \code{ncol(x)}, where each element is to be subtracted from the corresponding column of \code{x}.
                     +A \code{NULL} value indicates that no subtraction is to be performed.
                     +Alternatively \code{TRUE}, in which case it is set to the column means of \code{x}.}
+                    +
                     +\item{scale}{A numeric vector of length equal to \code{ncol(x)}, where each element is to divided from the corresponding column of \code{x} (after subtraction).
                     +A \code{NULL} value indicates that no division is to be performed.
                     +Alternatively \code{TRUE}, in which case it is set to the column-wise root-mean-squared differences from \code{center}
                     +(interpretable as standard deviations if \code{center} is set to the column means, see \code{\link{scale}} for commentary).}
                     +}
                     +\value{
                     +The \code{ScaledMatrixSeed} constructor will return a ScaledMatrixSeed object.
+                    +
                     +The \code{ScaledMatrix} constructor will return a ScaledMatrix object equivalent to \code{t((t(x) - center)/scale)}.
                     +}
                     +\description{
                     +Defines the ScaledMatrixSeed and ScaledMatrix classes and their associated methods.
                     +These classes support delayed centering and scaling of the columns in the same manner as \code{\link{scale}},
                     +but preserving the original data structure for more efficient operations like matrix multiplication.
                     +}
                     +\section{Methods for ScaledMatrixSeed objects}{
+                    +
                     +ScaledMatrixSeed objects are implemented as \linkS4class{DelayedMatrix} backends.
                     +They support standard operations like \code{dim}, \code{dimnames} and \code{extract_array}.
+                    +
                     +Passing a ScaledMatrixSeed object to the \code{\link{DelayedArray}} constructor will create a ScaledMatrix object.
+                    +
                     +It is possible for \code{x} to contain a ScaledMatrix, thus nesting one ScaledMatrix inside another.
                     +This can occasionally be useful in combination with transposition to achieve centering/scaling in both dimensions.
                     +}
+                    +
                     +\section{Methods for ScaledMatrix objects}{
+                    +
                     +ScaledMatrix objects are derived from \linkS4class{DelayedMatrix} objects and support all of valid operations on the latter.
                     +Several functions are specialized for greater efficiency when operating on ScaledMatrix instances, including:
                     +\itemize{
                     +    \item Subsetting, transposition and replacement of row/column names.
                     +        These will return a new ScaledMatrix rather than a DelayedMatrix.
                     +    \item Matrix multiplication via \code{\%*\%}, \code{crossprod} and \code{tcrossprod}.
                     +        These functions will return a DelayedMatrix.
                     +    \item Calculation of row and column sums and means by \code{colSums}, \code{rowSums}, etc.
                     +}
+                    +
                     +All other operations applied to a ScaledMatrix will use the underlying \pkg{DelayedArray} machinery.
                     +Unary or binary operations will generally create a new DelayedMatrix instance containing a ScaledMatrixSeed.
+                    +
                     +Tranposition can effectively be used to allow centering/scaling on the rows if the input \code{x} is transposed.
                     +}
+                    +
                     +\section{Efficiency vs precision}{
+                    +
                     +The raison d'etre of the ScaledMatrix is that it can offer faster matrix multiplication by avoiding the \pkg{DelayedArray} block processing.
                     +This is done by refactoring the scaling/centering operations to use the (hopefully more efficient) multiplication operator of the original matrix \code{x}.
                     +Unfortunately, the speed-up comes at the cost of increasing the risk of catastrophic cancellation.
                     +The procedure requires subtraction of one large intermediate number from another to obtain the values of the final matrix product.
                     +This could result in a loss of numerical precision that compromises the accuracy of downstream algorithms.
                     +In practice, this does not seem to be a major concern though one should be careful if the input \code{x} contains very large positive/negative values.
                     +}
+                    +
                     +\examples{
                     +library(Matrix)
                     +y <- ScaledMatrix(rsparsematrix(10, 20, 0.1),
                     +    center=rnorm(20), scale=1+runif(20))
                     +y
+                    +
                     +crossprod(y)
                     +tcrossprod(y)
                     +y \%*\% rnorm(20)
+                    +
                     +}
                     +\author{
                     +Aaron Lun
                     +}

tests/testthat.R

History View file @ 508ea1b

                     new file mode 100644
@@ -0,0 +1,3 @@
                     +library(testthat)
                     +library(ScaledMatrix)
                     +test_check("ScaledMatrix")

tests/testthat/setup.R

History View file @ 508ea1b

                     new file mode 100644
@@ -0,0 +1,76 @@
                     +scale_and_center <- function(y, ref, code) {
                     +    center <- scale <- NULL
+                    +
                     +    if (code==1L) {
                     +        center <- colMeans(ref)
                     +        scale <- runif(ncol(ref))
                     +        ref <- scale(ref, center=center, scale=scale)
                     +    } else if (code==2L) {
                     +        center <- rnorm(ncol(ref))
                     +        ref <- scale(ref, center=center, scale=FALSE)
                     +    } else if (code==3L) {
                     +        scale <- runif(ncol(ref))
                     +        ref <- scale(ref, center=FALSE, scale=scale)
                     +    }
+                    +
                     +    # Getting rid of excess attributes.
                     +    attr(ref, "scaled:center") <- NULL
                     +    attr(ref, "scaled:scale") <- NULL
+                    +
                     +    def <- ScaledMatrix(y, center=center, scale=scale)
                     +    list(def=def, ref=ref)
                     +}
+                    +
                     +spawn_scenarios_basic <- function(NR, NC, CREATOR, REALIZER) {
                     +    output <- vector("list", 8)
                     +    counter <- 1L
+                    +
                     +    for (trans in c(FALSE, TRUE)) {
                     +        for (it in 1:4) {
                     +            if (trans) {
                     +                # Ensure output matrix has NR rows and NC columns after t().
                     +                y <- CREATOR(NC, NR)
                     +            } else {
                     +                y <- CREATOR(NR, NC)
                     +            }
                     +            ref <- REALIZER(y)
+                    +
                     +            adjusted <- scale_and_center(y, ref, it)
                     +            if (trans) {
                     +                adjusted$def <- t(adjusted$def)
                     +                adjusted$ref <- t(adjusted$ref)
                     +            }
+                    +
                     +            output[[counter]] <- adjusted
                     +            counter <- counter+1L
                     +        }
                     +    }
                     +    output
                     +}
+                    +
                     +spawn_scenarios <- function(NR=50, NC=20) {
                     +    c(
                     +        spawn_scenarios_basic(NR, NC,
                     +            CREATOR=function(r, c) {
                     +                matrix(rnorm(r*c), ncol=c)
                     +            },
                     +            REALIZER=identity
                     +        ),
                     +        spawn_scenarios_basic(NR, NC,
                     +            CREATOR=function(r, c) {
                     +                Matrix::rsparsematrix(r, c, 0.1)
                     +            },
                     +            REALIZER=as.matrix
                     +        )
                     +    )
                     +}
+                    +
                     +expect_equal_product <- function(x, y) {
                     +    expect_s4_class(x, "DelayedMatrix")
                     +    X <- as.matrix(x)
+                    +
                     +    # standardize NULL dimnames.
                     +    if (all(lengths(dimnames(X))==0L)) dimnames(X) <- NULL
                     +    if (all(lengths(dimnames(y))==0L)) dimnames(y) <- NULL
                     +    expect_equal(X, y)
                     +}

tests/testthat/test-class.R

History View file @ 508ea1b

                     new file mode 100644
@@ -0,0 +1,101 @@
                     +# Tests the ScaledMatrix implementation.
                     +# library(testthat); library(ScaledMatrix); source("setup.R"); source("test-class.R")
+                    +
                     +set.seed(100001)
                     +test_that("ScaledMatrix utility functions work as expected", {
                     +    possibles <- spawn_scenarios()
                     +    for (test in possibles) {
                     +        expect_s4_class(test$def, "ScaledMatrix")
                     +        expect_identical(test$def, ScaledMatrix(DelayedArray::seed(test$def)))
+                    +
                     +        expect_identical(dim(test$def), dim(test$ref))
                     +        expect_identical(extract_array(test$def, list(1:10, 1:10)), test$ref[1:10, 1:10])
                     +        expect_identical(extract_array(test$def, list(1:10, NULL)), test$ref[1:10,])
                     +        expect_identical(extract_array(test$def, list(NULL, 1:10)), test$ref[,1:10])
                     +        expect_identical(as.matrix(test$def), test$ref)
+                    +
                     +        expect_equal(rowSums(test$def), rowSums(test$ref))
                     +        expect_equal(colSums(test$def), colSums(test$ref))
                     +        expect_equal(rowMeans(test$def), rowMeans(test$ref))
                     +        expect_equal(colMeans(test$def), colMeans(test$ref))
+                    +
                     +        tdef <- t(test$def)
                     +        expect_s4_class(tdef, "ScaledMatrix") # still a DefMat!
                     +        expect_identical(t(tdef), test$def)
                     +        expect_identical(as.matrix(tdef), t(test$ref))
+                    +
                     +        # Checking column names getting and setting.
                     +        spawn_names <- sprintf("THING_%i", seq_len(ncol(test$def)))
                     +        colnames(test$def) <- spawn_names
                     +        expect_identical(spawn_names, colnames(test$def))
                     +        expect_s4_class(test$def, "ScaledMatrix") # still a DefMat!
                     +    }
                     +})
+                    +
                     +set.seed(10000101)
                     +test_that("ScaledMatrix silly inputs work as expected", {
                     +    default <- ScaledMatrix()
                     +    expect_identical(dim(default), c(0L, 0L))
                     +    val <- as.matrix(default)
                     +    dimnames(val) <- NULL
                     +    expect_identical(val, matrix(0,0,0))
+                    +
                     +    # Checking erronious inputs.
                     +    y <- matrix(rnorm(400), ncol=20)
                     +    expect_error(ScaledMatrix(y, center=1), "length of 'center' must equal")
                     +    expect_error(ScaledMatrix(y, scale=1), "length of 'scale' must equal")
                     +})
+                    +
                     +set.seed(1000011)
                     +test_that("ScaledMatrix subsetting works as expected", {
                     +    expect_identical_and_defmat <- function(x, y) {
                     +        expect_s4_class(x, "ScaledMatrix") # class is correctly preserved by direct seed modification.
                     +        expect_identical(as.matrix(x), y)
                     +    }
+                    +
                     +    possibles <- spawn_scenarios()
                     +    for (test in possibles) {
                     +        i <- sample(nrow(test$def))
                     +        j <- sample(ncol(test$def))
                     +        expect_identical_and_defmat(test$def[i,], test$ref[i,])
                     +        expect_identical_and_defmat(test$def[,j], test$ref[,j])
                     +        expect_identical_and_defmat(test$def[i,j], test$ref[i,j])
+                    +
                     +        # Works with zero dimensions.
                     +        expect_identical_and_defmat(test$def[0,], test$ref[0,])
                     +        expect_identical_and_defmat(test$def[,0], test$ref[,0])
                     +        expect_identical_and_defmat(test$def[0,0], test$ref[0,0])
+                    +
                     +        # Dimension dropping works as expected.
                     +        expect_identical(test$def[i[1],], test$ref[i[1],])
                     +        expect_identical(test$def[,j[1]], test$ref[,j[1]])
                     +        expect_identical_and_defmat(test$def[i[1],drop=FALSE], test$ref[i[1],,drop=FALSE])
                     +        expect_identical_and_defmat(test$def[,j[1],drop=FALSE], test$ref[,j[1],drop=FALSE])
+                    +
                     +        # Transposition with subsetting works as expected.
                     +        alt <- t(test$def)
                     +        expect_identical(t(alt[,i]), test$def[i,])
                     +        expect_identical(t(alt[j,]), test$def[,j])
+                    +
                     +        # Subsetting behaves with column names.
                     +        spawn_names <- sprintf("THING_%i", seq_len(ncol(test$def)))
                     +        colnames(test$def) <- spawn_names
                     +        colnames(test$ref) <- spawn_names
                     +        ch <- sample(spawn_names)
                     +        expect_identical_and_defmat(test$def[,ch], test$ref[,ch])
                     +    }
                     +})
+                    +
                     +test_that("DelayedMatrix wrapping works", {
                     +    possibles <- spawn_scenarios(80, 50)
                     +    for (test in possibles) {
                     +        expect_equal_product(test$def+1, test$ref+1)
+                    +
                     +        v <- rnorm(nrow(test$def))
                     +        expect_equal_product(test$def+v, test$ref+v)
                     +        expect_equal_product(test$def*v, test$ref*v)
+                    +
                     +        w <- rnorm(ncol(test$def))
                     +        expect_equal_product(sweep(test$def, 2, w, "*"), sweep(test$ref, 2, w, "*"))
                     +    }
                     +})

tests/testthat/test-mult.R

History View file @ 508ea1b

                     new file mode 100644
@@ -0,0 +1,381 @@
                     +# Tests the ScaledMatrix implementation.
                     +# library(testthat); library(ScaledMatrix); source("setup.R"); source("test-mult.R")
+                    +
                     +##########################
                     +# Defining a class that can't do anything but get multiplied.
                     +# This checks that there isn't any hidden DelayedArray realization
                     +# happening, which would give the same results but slower.
+                    +
                     +setClass("CrippledMatrix", slots=c(x="matrix"))
+                    +
                     +setMethod("dim", c("CrippledMatrix"), function(x) dim(x@x))
+                    +
                     +setMethod("colSums", c("CrippledMatrix"), function(x) colSums(x@x))
+                    +
                     +setMethod("rowSums", c("CrippledMatrix"), function(x) rowSums(x@x))
+                    +
                     +setMethod("sweep", c("CrippledMatrix"), function (x, MARGIN, STATS, FUN = "-", check.margin = TRUE, ...) {
                     +    sweep(x@x, MARGIN, STATS, FUN, check.margin, ...)
                     +})
+                    +
                     +setMethod("%*%", c("CrippledMatrix", "ANY"), function(x, y) x@x %*% y)
+                    +
                     +setMethod("%*%", c("ANY", "CrippledMatrix"), function(x, y) x %*% y@x)
+                    +
                     +setMethod("crossprod", c("CrippledMatrix", "missing"), function(x, y) crossprod(x@x))
+                    +
                     +setMethod("crossprod", c("CrippledMatrix", "ANY"), function(x, y) crossprod(x@x, y))
+                    +
                     +setMethod("crossprod", c("ANY", "CrippledMatrix"), function(x, y) crossprod(x, y@x))
+                    +
                     +setMethod("tcrossprod", c("CrippledMatrix", "missing"), function(x, y) tcrossprod(x@x))
+                    +
                     +setMethod("tcrossprod", c("CrippledMatrix", "ANY"), function(x, y) tcrossprod(x@x, y))
+                    +
                     +setMethod("tcrossprod", c("ANY", "CrippledMatrix"), function(x, y) tcrossprod(x, y@x))
+                    +
                     +spawn_extra_scenarios <- function(NR=50, NC=20) {
                     +    c(
                     +        spawn_scenarios(NR, NC),
                     +        spawn_scenarios_basic(NR, NC,
                     +            CREATOR=function(r, c) {
                     +                new("CrippledMatrix", x=matrix(runif(NR*NC), ncol=NC))
                     +            },
                     +            REALIZER=function(x) x@x
                     +        )
                     +    )
                     +}
+                    +
                     +##########################
+                    +
                     +test_that("ScaledMatrix right multiplication works as expected", {
                     +    possibles <- spawn_extra_scenarios(100, 50)
                     +    for (test in possibles) {
                     +        ref.y <- test$ref
                     +        bs.y <- test$def
+                    +
                     +        # Multiply by a vector.
                     +        z <- rnorm(ncol(ref.y))
                     +        expect_equal_product(bs.y %*% z, ref.y %*% z)
+                    +
                     +        # Multiply by a matrix.
                     +        z <- matrix(rnorm(ncol(ref.y)*10), ncol=10)
                     +        expect_equal_product(bs.y %*% z, ref.y %*% z)
+                    +
                     +        # Multiply by an empty matrix.
                     +        z <- matrix(0, ncol=0, nrow=ncol(ref.y))
                     +        expect_equal_product(bs.y %*% z, ref.y %*% z)
                     +    }
                     +})
+                    +
                     +test_that("ScaledMatrix left multiplication works as expected", {
                     +    possibles <- spawn_extra_scenarios(50, 80)
                     +    for (test in possibles) {
                     +        ref.y <- test$ref
                     +        bs.y <- test$def
+                    +
                     +        # Multiply by a vector.
                     +        z <- rnorm(nrow(ref.y))
                     +        expect_equal_product(z %*% bs.y, z %*% ref.y)
+                    +
                     +        # Multiply by a matrix.
                     +        z <- matrix(rnorm(nrow(ref.y)*10), nrow=10)
                     +        expect_equal_product(z %*% bs.y, z %*% ref.y)
+                    +
                     +        # Multiply by an empty matrix.
                     +        z <- matrix(0, nrow=0, ncol=nrow(ref.y))
                     +        expect_equal_product(z %*% bs.y, z %*% ref.y)
                     +    }
                     +})
+                    +
                     +test_that("ScaledMatrix dual multiplication works as expected", {
                     +    # Not using the CrippledMatrix here; some scaling of the inner matrix is unavoidable
                     +    # when the inner matrix is _not_ a ScaledMatrix but is being multiplied by one.
                     +    possibles1 <- spawn_scenarios(10, 20)
                     +    for (test1 in possibles1) {
                     +        possibles2 <- spawn_scenarios(20, 15)
                     +        for (test2 in possibles2) {
+                    +
                     +            expect_equal_product(test1$def %*% test2$def, test1$ref %*% test2$ref)
+                    +
                     +            # Checking that zero-dimension behaviour is as expected.
                     +            expect_equal_product(test1$def[0,] %*% test2$def, test1$ref[0,] %*% test2$ref)
                     +            expect_equal_product(test1$def %*% test2$def[,0], test1$ref %*% test2$ref[,0])
                     +            expect_equal_product(test1$def[,0] %*% test2$def[0,], test1$ref[,0] %*% test2$ref[0,])
                     +            expect_equal_product(test1$def[0,] %*% test2$def[,0], test1$ref[0,] %*% test2$ref[,0])
                     +        }
                     +    }
                     +})
+                    +
                     +##########################
+                    +
                     +test_that("ScaledMatrix lonely crossproduct works as expected", {
                     +    possibles <- spawn_extra_scenarios(90, 30)
                     +    for (test in possibles) {
                     +        ref.y <- test$ref
                     +        bs.y <- test$def
                     +        expect_equal_product(crossprod(bs.y), crossprod(ref.y))
                     +    }
                     +})
+                    +
                     +test_that("ScaledMatrix crossproduct from right works as expected", {
                     +    possibles <- spawn_extra_scenarios(60, 50)
                     +    for (test in possibles) {
                     +        ref.y <- test$ref
                     +        bs.y <- test$def
+                    +
                     +        # Multiply by a vector.
                     +        z <- rnorm(nrow(ref.y))
                     +        expect_equal_product(crossprod(bs.y, z), crossprod(ref.y, z))
+                    +
                     +        # Multiply by a matrix.
                     +        z <- matrix(rnorm(nrow(ref.y)*10), ncol=10)
                     +        expect_equal_product(crossprod(bs.y, z), crossprod(ref.y, z))
+                    +
                     +        # Multiply by an empty matrix.
                     +        z <- matrix(0, ncol=0, nrow=nrow(ref.y))
                     +        expect_equal_product(crossprod(bs.y, z), crossprod(ref.y, z))
                     +    }
                     +})
+                    +
                     +test_that("ScaledMatrix crossproduct from left works as expected", {
                     +    possibles <- spawn_extra_scenarios(40, 100)
                     +    for (test in possibles) {
                     +        ref.y <- test$ref
                     +        bs.y <- test$def
+                    +
                     +        # Multiply by a vector.
                     +        z <- rnorm(nrow(ref.y))
                     +        expect_equal_product(crossprod(z, bs.y), crossprod(z, ref.y))
+                    +
                     +        # Multiply by a matrix.
                     +        z <- matrix(rnorm(nrow(ref.y)*10), ncol=10)
                     +        expect_equal_product(crossprod(z, bs.y), crossprod(z, ref.y))
+                    +
                     +        # Multiply by an empty matrix.
                     +        z <- matrix(0, ncol=0, nrow=nrow(ref.y))
                     +        expect_equal_product(crossprod(z, bs.y), crossprod(z, ref.y))
                     +    }
                     +})
+                    +
                     +test_that("ScaledMatrix dual crossprod works as expected", {
                     +    possibles1 <- spawn_scenarios(20, 50)
                     +    for (test1 in possibles1) {
                     +        possibles2 <- spawn_scenarios(20, 15)
                     +        for (test2 in possibles2) {
+                    +
                     +            expect_equal_product(crossprod(test1$def, test2$def), crossprod(test1$ref, test2$ref))
+                    +
                     +            # Checking that zero-dimension behaviour is as expected.
                     +            expect_equal_product(crossprod(test1$def[,0], test2$def), crossprod(test1$ref[,0], test2$ref))
                     +            expect_equal_product(crossprod(test1$def, test2$def[,0]), crossprod(test1$ref, test2$ref[,0]))
                     +            expect_equal_product(crossprod(test1$def[0,], test2$def[0,]), crossprod(test1$ref[0,], test2$ref[0,]))
                     +        }
                     +    }
                     +})
+                    +
                     +##########################
+                    +
                     +test_that("ScaledMatrix lonely tcrossproduct works as expected", {
                     +    possibles <- spawn_extra_scenarios(50, 80)
                     +    for (test in possibles) {
                     +        ref.y <- test$ref
                     +        bs.y <- test$def
                     +        expect_equal_product(tcrossprod(bs.y), tcrossprod(ref.y))
                     +    }
                     +})
+                    +
                     +test_that("ScaledMatrix tcrossproduct from right works as expected", {
                     +    possibles <- spawn_extra_scenarios(60, 70)
                     +    for (test in possibles) {
                     +        ref.y <- test$ref
                     +        bs.y <- test$def
+                    +
                     +        # Multiply by a vector (this doesn't work).
                     +        z <- rnorm(ncol(ref.y))
                     +        expect_error(tcrossprod(bs.y, z), "non-conformable")
                     +        expect_error(tcrossprod(ref.y, z), "non-conformable")
+                    +
                     +        # Multiply by a matrix.
                     +        z <- matrix(rnorm(ncol(ref.y)*10), nrow=10)
                     +        expect_equal_product(tcrossprod(bs.y, z), tcrossprod(ref.y, z))
+                    +
                     +        # Multiply by an empty matrix.
                     +        z <- matrix(0, nrow=0, ncol=ncol(ref.y))
                     +        expect_equal_product(tcrossprod(bs.y, z), tcrossprod(ref.y, z))
                     +    }
                     +})
+                    +
                     +test_that("ScaledMatrix tcrossproduct from left works as expected", {
                     +    possibles <- spawn_extra_scenarios(80, 50)
                     +    for (test in possibles) {
                     +        ref.y <- test$ref
                     +        bs.y <- test$def
+                    +
                     +        # Multiply by a vector.
                     +        z <- rnorm(ncol(ref.y))
                     +        expect_equal_product(tcrossprod(z, bs.y), tcrossprod(z, ref.y))
+                    +
                     +        # Multiply by a matrix.
                     +        z <- matrix(rnorm(ncol(ref.y)*10), nrow=10)
                     +        expect_equal_product(tcrossprod(z, bs.y), tcrossprod(z, ref.y))
+                    +
                     +        # Multiply by an empty matrix.
                     +        z <- matrix(0, nrow=0, ncol=ncol(ref.y))
                     +        expect_equal_product(tcrossprod(z, bs.y), tcrossprod(z, ref.y))
                     +    }
                     +})
+                    +
                     +test_that("ScaledMatrix dual tcrossprod works as expected", {
                     +    possibles1 <- spawn_scenarios(20, 50)
                     +    for (test1 in possibles1) {
                     +        possibles2 <- spawn_scenarios(25, 50)
                     +        for (test2 in possibles2) {
+                    +
                     +            expect_equal_product(tcrossprod(test1$def, test2$def), tcrossprod(test1$ref, test2$ref))
+                    +
                     +            # Checking that zero-dimension behaviour is as expected.
                     +            expect_equal_product(tcrossprod(test1$def[0,], test2$def), tcrossprod(test1$ref[0,], test2$ref))
                     +            expect_equal_product(tcrossprod(test1$def, test2$def[0,]), tcrossprod(test1$ref, test2$ref[0,]))
                     +            expect_equal_product(tcrossprod(test1$def[,0], test2$def[,0]), tcrossprod(test1$ref[,0], test2$ref[,0]))
                     +        }
                     +    }
                     +})
+                    +
                     +##########################
+                    +
                     +wrap_in_ScMat <- function(input, reference)
                     +# Wrapping an input matrix in a ScaledMatrix.
                     +{
                     +    output <- vector("list", 8)
                     +    counter <- 1L
+                    +
                     +    for (trans in c(FALSE, TRUE)) {
                     +        for (it in 1:4) {
                     +            if (trans) {
                     +                y <- t(input)
                     +                ref <- t(reference)
                     +            } else {
                     +                ref <- reference
                     +                y <- input
                     +            }
+                    +
                     +            adjusted <- scale_and_center(y, ref, it)
                     +            if (trans) {
                     +                adjusted$def <- t(adjusted$def)
                     +                adjusted$ref <- t(adjusted$ref)
                     +            }
+                    +
                     +            output[[counter]] <- adjusted
                     +            counter <- counter+1L
                     +        }
                     +    }
                     +    output
                     +}
+                    +
                     +test_that("nested ScaledMatrix works as expected", {
                     +    basic <- matrix(rnorm(400), ncol=20)
+                    +
                     +    available <- list(list(def=basic, ref=basic))
                     +    for (nesting in 1:2) {
                     +        # Creating nested ScMats with and without scaling/centering/transposition.
                     +        next_available <- vector("list", length(available))
                     +        for (i in seq_along(available)) {
                     +            current <- available[[i]]
                     +            next_available[[i]] <- wrap_in_ScMat(current$def, current$ref)
                     +        }
+                    +
                     +        # Testing each one of the newly created ScMats.
                     +        available <- unlist(next_available, recursive=FALSE)
                     +        for (i in seq_along(available)) {
                     +            test <- available[[i]]
+                    +
                     +            # Coercion works.
                     +            expect_equal(as.matrix(test$def), test$ref)
+                    +
                     +            # Basic stats work.
                     +            expect_equal(rowSums(test$ref), rowSums(test$def))
                     +            expect_equal(colSums(test$ref), colSums(test$def))
+                    +
                     +            # Multiplication works.
                     +            y <- matrix(rnorm(20*2), ncol=2)
                     +            expect_equal_product(test$def %*% y, test$ref %*% y)
                     +            expect_equal_product(t(y) %*% test$def, t(y) %*% test$ref)
+                    +
                     +            # Cross product.
                     +            y <- matrix(rnorm(20*2), ncol=2)
                     +            expect_equal_product(crossprod(test$def), crossprod(test$ref))
                     +            expect_equal_product(crossprod(test$def, y), crossprod(test$ref, y))
                     +            expect_equal_product(crossprod(y, test$def), crossprod(y, test$ref))
+                    +
                     +            # Transposed cross product.
                     +            y <- matrix(rnorm(20*2), nrow=2)
                     +            expect_equal_product(tcrossprod(test$def), tcrossprod(test$ref))
                     +            expect_equal_product(tcrossprod(test$def, y), tcrossprod(test$ref, y))
                     +            expect_equal_product(tcrossprod(y, test$def), tcrossprod(y, test$ref))
                     +        }
                     +    }
                     +})
+                    +
                     +set.seed(1200001)
                     +test_that("deep testing of tcrossproduct internals: special mult", {
                     +    NR <- 20
                     +    NC <- 10
                     +    basic <- matrix(rnorm(NC*NR), ncol=NC)
                     +    c <- runif(NC)
                     +    s <- runif(NR)
+                    +
                     +    ref <- t(matrix(c, NR, NC, byrow=TRUE)) %*% (basic/s^2)
                     +    out <- BiocSingular:::.internal_mult_special(c, s, basic)
                     +    expect_equal(ref, out)
+                    +
                     +    available <- list(list(def=basic, ref=basic))
                     +    for (nesting in 1:2) {
                     +        # Creating nested ScMats with and without scaling/centering/transposition.
                     +        next_available <- vector("list", length(available))
                     +        for (i in seq_along(available)) {
                     +            current <- available[[i]]
                     +            next_available[[i]] <- wrap_in_ScMat(current$def, current$ref)
                     +        }
+                    +
                     +        # Testing each one of the newly created nested ScMats.
                     +        available <- unlist(next_available, recursive=FALSE)
                     +        for (i in seq_along(available)) {
                     +            test <- available[[i]]
                     +            ref <- t(matrix(c, NR, NC, byrow=TRUE)) %*% (test$ref/s^2)
                     +            out <- BiocSingular:::.internal_mult_special(c, s, test$def)
                     +            expect_equal(ref, out)
                     +        }
                     +    }
                     +})
+                    +
                     +set.seed(1200002)
                     +test_that("deep testing of tcrossproduct internals: scaled tcrossprod", {
                     +    NC <- 30
                     +    NR <- 15
                     +    s <- runif(NC)
                     +    basic <- matrix(rnorm(NC*NR), ncol=NC)
+                    +
                     +    ref <- crossprod(t(basic)/s)
                     +    out <- BiocSingular:::.internal_tcrossprod(basic, s)
                     +    expect_equal(ref, out)
+                    +
                     +    available <- list(list(def=basic, ref=basic))
                     +    for (nesting in 1:2) {
                     +        # Creating nested ScMats with and without scaling/centering/transposition.
                     +        next_available <- vector("list", length(available))
                     +        for (i in seq_along(available)) {
                     +            current <- available[[i]]
                     +            next_available[[i]] <- wrap_in_ScMat(current$def, current$ref)
                     +        }
+                    +
                     +        # Testing each one of the newly created nested ScMats.
                     +        available <- unlist(next_available, recursive=FALSE)
                     +        for (i in seq_along(available)) {
                     +            test <- available[[i]]
                     +            ref <- crossprod(t(test$ref)/s)
                     +            out <- BiocSingular:::.internal_tcrossprod(test$def, s)
                     +            expect_equal(ref, out)
                     +        }
                     +    }
                     +})

tests/testthat/test-scale.R

History View file @ 508ea1b

                     new file mode 100644
@@ -0,0 +1,25 @@
                     +# This tests the auto-scaling in the constructor.
                     +# library(testthat); library(ScaledMatrix); source("test-scale.R")
+                    +
                     +stripscale <- function(mat, ...) {
                     +    out <- scale(mat, ...)
                     +    attr(out, "scaled:center") <- NULL
                     +    attr(out, "scaled:scale") <- NULL
                     +    out
                     +}
+                    +
                     +test_that("ScaledMatrix mimics scale()", {
                     +    mat <- matrix(rnorm(10000), ncol=10)
+                    +
                     +    out <- ScaledMatrix(mat, center=TRUE)
                     +    expect_identical(as.matrix(out), stripscale(mat, scale=FALSE))
+                    +
                     +    out <- ScaledMatrix(mat, scale=TRUE)
                     +    expect_identical(as.matrix(out), stripscale(mat, center=FALSE))
+                    +
                     +    out <- ScaledMatrix(mat, center=TRUE, scale=TRUE)
                     +    expect_identical(as.matrix(out), stripscale(mat))
+                    +
                     +    out <- ScaledMatrix(mat)
                     +    expect_identical(as.matrix(out), mat)
                     +})

vignettes/ScaledMatrix.Rmd

History View file @ 508ea1b

                     new file mode 100644
@@ -0,0 +1,128 @@
                     +---
                     +title: Using the `ScaledMatrix` class
                     +author:
                     +- name: Aaron Lun
                     +  email: [email protected]
                     +date: "Revised: 12 December 2020"
                     +output:
                     +  BiocStyle::html_document:
                     +    toc_float: true
                     +package: ResidualMatrix
                     +vignette: >
                     +  %\VignetteIndexEntry{Using the ScaledMatrix}
                     +  %\VignetteEngine{knitr::rmarkdown}
                     +  %\VignetteEncoding{UTF-8}
                     +---
+                    +
                     +```{r, echo=FALSE, results="hide", message=FALSE}
                     +knitr::opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE)
                     +```
+                    +
                     +# Overview
+                    +
                     +The `ScaledMatrix` provides yet another method of running `scale()` on a matrix.
                     +In other words, these three operations are equivalent:
+                    +
                     +```{r}
                     +mat <- matrix(rnorm(10000), ncol=10)
+                    +
                     +smat1 <- scale(mat)
                     +head(smat1)
+                    +
                     +library(DelayedArray)
                     +smat2 <- scale(DelayedArray(mat))
                     +head(smat2)
+                    +
                     +library(ScaledMatrix)
                     +smat3 <- ScaledMatrix(mat, center=TRUE, scale=TRUE)
                     +head(smat3)
                     +```
+                    +
                     +The biggest difference lies in how they behave in downstream matrix operations.
+                    +
                     +- `smat1` is an ordinary matrix, with the scaled and centered values fully realized in memory.
                     +Nothing too unusual here.
                     +- `smat2` is a `DelayedMatrix` and undergoes block processing whereby chunks are realized and operated on, one at a time.
                     +This sacrifices speed for greater memory efficiency by avoiding a copy of the entire matrix.
                     +In particular, it preserves the structure of the original `mat`, e.g., from a sparse or file-backed representation.
                     +- `smat3` is a `ScaledMatrix` that refactors certain operations so that they can be applied to the original `mat` without any scaling or centering.
                     +This takes advantage of the original data structure to speed up matrix multiplication and row/column sums,
                     +albeit at the cost of numerical precision.
+                    +
                     +# Matrix multiplication
+                    +
                     +Given an original matrix $\mathbf{X}$ with $n$ columns, a vector of column centers $\mathbf{c}$ and a vector of column scaling values $\mathbf{s}$,
                     +our scaled matrix can be written as:
+                    +
                     +$$
                     +\mathbf{Y} = (\mathbf{X} - \mathbf{c} \cdot \mathbf{1}_n^T) \mathbf{S}
                     +$$
+                    +
                     +where $\mathbf{S} = \text{diag}(s_1^{-1}, ..., s_n^{-1})$.
                     +If we wanted to right-multiply it with another matrix $\mathbf{A}$, we would have:
+                    +
                     +$$
                     +\mathbf{YA} = \mathbf{X}\mathbf{S}\mathbf{A} - \mathbf{c} \cdot \mathbf{1}_n^T \mathbf{S}\mathbf{A}
                     +$$
+                    +
                     +The right-most expression is simply the outer product of $\mathbf{c}$ with the column sums of $\mathbf{SA}$.
                     +More important is the fact that we can use the matrix multiplication operator for $\mathbf{X}$ with $\mathbf{SA}$,
                     +as this allows us to use highly efficient algorithms for certain data representations, e.g., sparse matrices.
+                    +
                     +```{r}
                     +library(Matrix)
                     +mat <- rsparsematrix(20000, 10000, density=0.01)
                     +smat <- ScaledMatrix(mat, center=TRUE, scale=TRUE)
+                    +
                     +blob <- matrix(runif(ncol(mat) * 5), ncol=5)
                     +system.time(out <- smat %*% blob)
+                    +
                     +# The slower way with block processing.
                     +da <- scale(DelayedArray(mat))
                     +system.time(out2 <- da %*% blob)
                     +```
+                    +
                     +The same logic applies for left-multiplication and cross-products.
                     +This allows us to easily speed up high-level operations involving matrix multiplication by just switching to a `ScaledMatrix`,
                     +e.g., in approximate PCA algorithms from the `r Biocpkg("BiocSingular")` package.
+                    +
                     +# Other utilities
+                    +
                     +Row and column sums are special cases of matrix multiplication and can be computed quickly:
+                    +
                     +```{r}
                     +system.time(rowSums(smat))
                     +system.time(rowSums(da))
                     +```
+                    +
                     +Subsetting, transposition and renaming of the dimensions are all supported without loss of the `ScaledMatrix` representation:
+                    +
                     +```{r}
                     +smat[,1:5]
                     +t(smat)
                     +rownames(smat) <- paste0("GENE_", 1:20000)
                     +smat
                     +```
+                    +
                     +Other operations will cause the `ScaledMatrix` to collapse to the general `DelayedMatrix` representation, after which point block processing will be used.
+                    +
                     +```{r}
                     +smat + 1
                     +```
+                    +
                     +# Caveats
+                    +
                     +For most part, the implementation of the multiplication assumes that the $\mathbf{A}$ matrix and the matrix product are small compared to $\mathbf{X}$.
                     +It is also possible to multiply two `ScaledMatrix`es together if the underlying matrices have efficient operators for their product.
                     +However, if this is not the case, the `ScaledMatrix` offers little benefit for increased overhead.
+                    +
                     +It is also worth noting that this speed-up is not entirely free.
                     +The expression above involves subtracting two matrix with potentially large values, which runs the risk of catastrophic cancellation.
                     +In most practical applications, though, this does not seem to be a major concern,
                     +especially as most values (e.g., log-normalized expression matrices) lie close to zero anyway.
+                    +
                     +# Session information {-}
+                    +
                     +```{r}
                     +sessionInfo()
                     +```