Bioconductor Code: SpatialFeatureExperiment

Raw Blame Patch Log History
#' Get all unique sample IDs
#'
#' The title is self-explanatory.
#'
#' @param sfe A \code{SpatialFeatureExperiment} object.
#' @return A character vector of all unique entries of the \code{sample_id}
#' column in \code{colData(x)}.
#' @export
#' @importFrom SummarizedExperiment colData colData<- rowData
#' @concept Utilities
#' @examples
#' library(SFEData)
#' sfe <- McKellarMuscleData(dataset = "small")
#' sampleIDs(sfe)
sampleIDs <- function(sfe) unique(colData(sfe)$sample_id)

.translate_value <- function(x, translate, value, sample_id = NULL) {
    if (translate && !is.null(int_metadata(x)$orig_bbox)) {
        if (anyNA(value$sample_id) || is.null(value$sample_id)) {
            if (nrow(value) == ncol(x))
                value$sample_id <- colData(x)$sample_id
            else if (nrow(value) == nrow(x))
                value$sample_id <- .check_sample_id(x, sample_id)
        }
        orig_bbox <- int_metadata(x)$orig_bbox
        # Don't translate if already translated
        curr_bbox <- st_bbox(value)
        samples <- unique(value$sample_id) %||% sample_id
        if (length(samples) > 1L) {
            value$ID_ <- seq_len(nrow(value)) # Unlikely name
            df <- value[,c("ID_", "sample_id", "geometry")]
            df_split <- split(df, value$sample_id)
            df_split <- lapply(samples, function(s) {
                out <- df_split[[s]]
                og <- out$geometry - orig_bbox[c("xmin", "ymin"), s]
                bb <- st_bbox(og)
                if ((bb["xmin"] < 0 || bb["ymin"] < 0) || anyNA(bb)) return(out)
                else
                    out$geometry <- og
                out
            })
            df <- do.call(rbind, df_split)
            df <- df[match(value$ID_, df$ID_),]
            value$geometry <- df$geometry
            value$ID_ <- NULL
        } else {
            if (curr_bbox["xmin"] - orig_bbox["xmin", samples] < 0 ||
                curr_bbox["ymin"] - orig_bbox["ymin", samples] < 0) {
                return(value)
            }
            value$geometry <- value$geometry - orig_bbox[c("xmin", "ymin"), samples]
        }
    }
    value
}

.path_valid2 <- function(x) {
    all(c(length(x) == 1, is.character(x), file.exists(x)))
}

#' Find center of bounding box
#'
#' Get x-y coordinates of the center of any bounding box
#'
#' @param bbox A numeric vector of length 4 with names xmin, xmax, ymin, ymax,
#' in any order.
#' @return A numeric vector of length 2.
#' @concept Utilities
#' @examples
#' bbox <- c(xmin = 0, xmax = 100, ymin = 0, ymax = 80)
#' bbox_center(bbox)
#' @export
bbox_center <- function(bbox) {
    c(mean(bbox[c("xmin", "xmax")]),
      mean(bbox[c("ymin", "ymax")]))
}

#' Get physical size of pixels
#'
#' This function gets physical size of pixels in each resolution of a OME-TIFF
#' pyramid in \code{\link{BioFormatsImage}}.
#'
#' @param file Path to an OME-TIFF file.
#' @param resolution Which resolution to query; 1 means the highest resolution.
#' The pixels will be larger for the lower resolutions.
#' @return Numeric vector of length 2 of pixel size in x and y. Usually they're
#' the same.
#' @export
#' @concept Utilities
#' @examples
#' library(SFEData)
#' fp <- tempfile()
#' dir_use <- XeniumOutput("v1", file_path = fp)
#' # RBioFormats null pointer error
#' try(getPixelSize(file.path(dir_use, "morphology_focus.ome.tif")))
#' getPixelSize(file.path(dir_use, "morphology_focus.ome.tif"))
#' unlink(dir_use, recursive = TRUE)
getPixelSize <- function(file, resolution = 1L) {
    check_installed(c("xml2", "RBioFormats"))
    xml_meta <- RBioFormats::read.omexml(file) |>
        xml2::read_xml() |> xml2::as_list()
    psx <- attr(xml_meta$OME$Image$Pixels, "PhysicalSizeX") |> as.numeric()
    psy <- attr(xml_meta$OME$Image$Pixels, "PhysicalSizeY") |> as.numeric()
    if (resolution == 1L) return(c(psx, psy))
    else {
        m <- RBioFormats::read.metadata(file)
        coreMetadata <- RBioFormats::coreMetadata
        meta <- coreMetadata(m, series = resolution)
        meta1 <- coreMetadata(m, series = 1L)
        sizeX_full <- meta1$sizeX
        sizeY_full <- meta1$sizeY
        fct_x <- sizeX_full/meta$sizeX
        fct_y <- sizeY_full/meta$sizeY
        fct_round <- round(fct_x) # Should be the same for x and y
        fctx2 <- fct_x/fct_round
        fcty2 <- fct_y/fct_round

        sfx2 <- meta$sizeX*fctx2/sizeX_full # Multiply to get from full res pixel to low res pixel
        sfy2 <- meta$sizeY*fcty2/sizeY_full
        psx_out <- psx/sfx2
        psy_out <- psy/sfy2
        return(c(psx_out, psy_out))
    }
}

#' Aggregate bounding boxes
#'
#' To find the bounding box of multiple bounding boxes.
#'
#' @param bboxes Either a matrix with 4 rows whose columns are the different
#' bounding boxes, with row names "xmin", "xmax", "ymin", and "ymax" in any order,
#' or a list of bounding boxes which are named numeric vectors.
#' @return A named numeric vector for the total bounding box.
#' @export
#' @concept Utilities
#' @examples
#' bboxes <- list(c(xmin = 5, xmax = 10, ymin = 2, ymax = 20),
#' c(xmin = 8, xmax = 18, ymin = 0, ymax = 15))
#' bbox_all <- aggBboxes(bboxes)
aggBboxes <- function(bboxes) {
    if (is.list(bboxes)) {
        bboxes <- lapply(bboxes, function(x) x[c("xmin", "ymin", "xmax", "ymax")])
        bboxes <- do.call(rbind, bboxes)
    }
    c(
        xmin = min(bboxes[, "xmin"], na.rm = TRUE),
        ymin = min(bboxes[, "ymin"], na.rm = TRUE),
        xmax = max(bboxes[, "xmax"], na.rm = TRUE),
        ymax = max(bboxes[, "ymax"], na.rm = TRUE)
    )
}

#' Show all image_ids in the SFE object
#'
#' The title is self-explanatory. Some functions require \code{image_id} to get
#' or set images.
#'
#' @inheritParams sampleIDs
#' @return A character vector of \code{image_ids}.
#' @export
#' @concept Utilities
#' @examples
#' fp <- system.file(file.path("extdata", "sample01"),
#' package = "SpatialFeatureExperiment")
#' sfe <- read10xVisiumSFE(fp, type = "sparse")
#' imageIDs(sfe)
imageIDs <- function(sfe) imgData(sfe)$image_id

#' Check if Parquet GDAL driver is available
#'
#' The GeoParquet files for geometries are typically written and read with the
#' \code{sfarrow} package, but to add only a select few genes to the SFE object
#' say for visualization purposes, the Parquet GDAL driver is required in order
#' to use GDAL's SQL to query the GeoParquet file to only load the few genes
#' requested. The transcript spots from a large dataset can take up a lot of
#' memory if all loaded.
#'
#' The Parquet driver has been supported since GDAL 3.5.0. The \code{arrow} C++
#' library must be installed in order to make the Parquet driver available. When
#' arrow is installed, newer versions of GDAL installed from Homebrew (Mac)
#' should have the Parquet driver. For Linux, the binary from \code{apt-get}'s
#' default repo is 3.4.1 (as of April 2024). To use the Parquet driver, GDAL may
#' need to be installed from source. See script from the \href{https://github.com/rocker-org/rocker-versioned2/blob/master/scripts/experimental/install_dev_osgeo.sh}{geospatial rocker}.
#' A Voyager docker container with the Parquet driver will soon be provided.
#'
#' @return Logical, indicating whether the Parquet driver is present.
#' @export
#' @concept Utilities
#' @examples
#' gdalParquetAvailable()
#'
gdalParquetAvailable <- function() {
    "Parquet" %in% rownames(sf::st_drivers())
}

.get_XOA_version <- function(data_dir) {
    experiment <- fromJSON(file = file.path(data_dir, "experiment.xenium"),
                           simplify = TRUE)
    xoa_version <- experiment$analysis_sw_version
    major_version <- substr(xoa_version, 8, 8) |> as.integer()
    minor_version <- substr(xoa_version, 10, 10) |> as.integer()
    instrument_version <- experiment$instrument_sw_version
    c(xoa = xoa_version, major = major_version, minor = minor_version,
      instrument = instrument_version)
}

.no_raw_bytes <- function(data_dir) {
    c(xoa_version, major_version, minor_version, instrument_version) %<-%
        .get_XOA_version(data_dir)
    (major_version == 1L && minor_version > 4L) || major_version == 2L
}

#' Get relevant fields and file paths for transcript spots
#'
#' Get column names for x, y, and z coordinates, gene IDs, and cell IDs from the
#' transcript file and get file paths for transcript spot coordinates given
#' technology.
#'
#' @param tech Name of the commercial technology, must be one of Vizgen, Xenium,
#' and CosMX.
#' @param data_dir Top level directory of the output.
#' @return A named list with elements:
#' \describe{
#' \item{\code{spatialCoordsNames}}{A character vector for column names for the
#' xyz coordinates of the transcript spots.}
#' \item{\code{gene_col}}{Column name for gene IDs.}
#' \item{\code{cell_col}}{Column name for cell IDs.}
#' \item{\code{fn}}{File path of the transcript spot file.}
#' }
#' @export
getTechTxFields <- function(tech, data_dir = NULL) {
    spatialCoordsNames <- switch(
        tech,
        Vizgen = c("global_x", "global_y", "global_z"),
        Xenium = c("x_location", "y_location", "z_location"),
        CosMX = c("x_global_px", "y_global_px", "z")
    )
    gene_col <- switch(
        tech,
        CosMX = "target",
        Xenium = "feature_name",
        Vizgen = "gene"
    )
    cell_col <- switch(
        tech,
        Vizgen = "barcode_id",
        Xenium = "cell_id",
        CosMX = "cell_ID"
    )
    if (is.null(data_dir)) fn <- NULL
    else {
        fn <- switch(
            tech,
            Vizgen = .check_vizgen_fns(data_dir, "detected_transcripts.csv"),
            CosMX = grep("tx_file.csv",
                         list.files(data_dir, pattern = "\\.csv.*$", full.names = TRUE),
                         value = TRUE)[1],
            Xenium = .check_xenium_fns(data_dir, "transcripts", .no_raw_bytes(data_dir))
        )
    }
    list(spatialCoordsNames = spatialCoordsNames,
         gene_col = gene_col,
         cell_col = cell_col,
         fn = fn)
}

.id2symbol <- function(x, ids, swap_rownames) {
    if (!is.null(swap_rownames) && swap_rownames %in% names(rowData(x))) {
        inds <- match(ids, rownames(x))
        features <- rowData(x)[[swap_rownames]][inds]
    } else features <- ids
    features
}

.check_tx_file <- function(file, spatialCoordsNames, gene_col, phred_col,
                           min_phred, flip, BPPARAM = SerialParam()) {
    if (is.character(file)) {
        ext <- file_ext(file)
        if (!ext %in% c("csv", "gz", "tsv", "txt", "parquet")) {
            stop("The file must be one of csv, gz, tsv, txt, or parquet")
        }
        if (ext == "parquet") {
            check_installed("arrow")
            mols <- arrow::read_parquet(file)
            # convert cols with raw bytes to character
            # NOTE: can take a while.
            mols <- .rawToChar_df(mols, BPPARAM = BPPARAM)
            # sanity, convert to data.table
            if (!inherits(mols, "data.table")) {
                mols <- data.table::as.data.table(mols)
            }
        } else {
            mols <- fread(file)
        }
    } else mols <- file

    names(mols)[names(mols) == gene_col] <- "gene"
    gene_col <- "gene"
    ind <- !spatialCoordsNames[1:2] %in% names(mols)
    if (any(ind)) {
        col_offending <- setdiff(spatialCoordsNames[1:2], names(mols))
        ax <- c("x", "y")
        stop(paste(ax[ind], collapse = ", "), " coordinate column(s) ",
             paste(col_offending, collapse = ", "), " not found.")
    }
    spatialCoordsNames <- intersect(spatialCoordsNames, names(mols))
    if (flip) {
        y_name <- spatialCoordsNames[2]
        if (!is.data.table(mols)) ..y_name <- y_name
        mols[,y_name] <- -mols[,..y_name]
    }
    if (phred_col %in% names(mols) && !is.null(min_phred)) {
        mols <- mols[mols[[phred_col]] >= min_phred,]
    }
    mols
}

.size_str2num <- function(x) {
    x <- toupper(x)
    unit <- gsub("^[0-9.]+\\s?", "", x)
    if (!unit %in% c("MB", "GB"))
        stop("x must be in either MB or GB.")
    x <- as.numeric(gsub("\\s?[A-Z.]+$", "", x))
    switch (unit,
            MB = x * 1024^2,
            GB = x * 1024^3
    )
}

.get_bbox_prop <- function(bbox, img) {
    tot_area <- ext(img) |> st_bbox() |> st_as_sfc() |> st_area()
    bb_area <- bbox |> st_bbox() |> st_as_sfc() |> st_area()
    bb_area/tot_area
}