.is0 <- function(x) { if (length(x)) { if (is.logical(x)) !any(x) else FALSE } else TRUE } .subset_listw <- function(x, subset, zero.policy=attr(x, "zero.policy"), ...) { nb <- x$neighbours vlist <- x$weights style <- x$style m_wts <- as_dgRMatrix_listw(x) m_wts <- m_wts[subset, subset] mat2listw(m_wts, style = style, zero.policy = zero.policy) } #' Subsetting SpatialFeatureExperiment objects #' #' The SFE method has special treatment for the spatial graphs. In \code{listw}, #' the neighbors are indicated by indices, which will change after subsetting. #' The \code{SFE_graph_subset} option determines whether the graphs are #' subsetted or reconstructed. In the default (\code{options(SFE_graph_subset = #' TRUE)}), the graphs are subsetted, in which case singletons may be produced. #' For \code{options(SFE_graph_subset = FALSE)}, which is the behavior of #' versions earlier than Bioc 3.20, the graphs are reconstructed with the #' parameters recorded in an attribute of the graphs. This option can result #' into different graphs. For example, suppose we start with a k nearest #' neighbor graph. After subsetting, cells at the boundary of the region used to #' subset the SFE object may lose some of their neighbors. In contrast, when the #' graph is reconstructed, these same edge cells will gain other cells that #' remain after subsetting as neighbors in the new KNN graph. #' #' The option \code{SFE_graph_subset} was introduced because subsetting is #' usually faster than reconstructing and in some cases such as distance-based #' neighbors and Visium spot adjacency give the same results. It was introduced #' also because of the development of \code{alabster.sfe} for a #' language-agnostic on-disk serialization of SFE objects and some parameters #' used to construct graphs have special classes whose \code{alabaster} methods #' have not been implemented, such as \code{BPPARAM} and \code{BNPARAM}, so when #' reconstructing, the defaults for those arguments will be used. #' #' The edge weights will be recomputed from the binary neighborhood indicator #' with the same normalization style as the original graph, such as "W" for row #' normalization. When distance-based edge weights are used instead of the #' binary indicator, the edge weights will be re-normalized, which is mostly #' some rescaling. This should give the same results as recomputing the distance #' based edge weights for styles "raw", "W", and "B" since the distances #' themselves don't change, but the effects of other more complicated styles of #' re-normalization on spatial statistics should be further investigated. #' #' By default, upon subsetting, the images are cropped to the bounding box of #' the remaining cells. However, when the image is large and the bounding box #' contains most of the original image, cropping is slow. Cropping can be #' disabled by \code{options(SFE_subset_crop = FALSE)}. Also, when the remaining #' part of the image is larger than a threshold, the image will not be cropped; #' the threshold can be set with the \code{SFE_subset_crop_max} option, such as #' \code{options(SFE_subset_crop_max = "100MB")}. #' #' @param x A \code{SpatialFeatureExperiment} object. #' @param i Row indices for subsetting. #' @param j column indices for subsetting. #' @param drop Only used if graphs are reconstructed #' (\code{options(SFE_graph_subset = FALSE)}). If \code{TRUE} then #' \code{colGraphs} are dropped but \code{annotGraphs} are kept. #' @param ... Passed to the \code{SingleCellExperiment} method of \code{[}. #' @importFrom methods callNextMethod #' @importFrom utils getFromNamespace #' @importFrom spdep mat2listw #' @importFrom spatialreg as_dgRMatrix_listw #' @return A subsetted \code{SpatialFeatureExperiment} object. #' @name SpatialFeatureExperiment-subset #' @aliases [,SpatialFeatureExperiment,ANY,ANY,ANY-method #' @concept Non-spatial operations #' @export #' @examples #' # Just like subsetting matrices and SingleCellExperiment #' library(SFEData) #' sfe <- McKellarMuscleData(dataset = "small") #' sfe_subset <- sfe[seq_len(10), seq_len(10), drop = TRUE] #' # Gives warning as graph reconstruction fails #' \donttest{ #' sfe_subset <- sfe[seq_len(10), seq_len(10)] #' } setMethod( "[", c("SpatialFeatureExperiment", "ANY", "ANY"), function(x, i, j, ..., drop = FALSE) { # Because the extra graphs and sample_ids result into invalid object old <- S4Vectors:::disableValidity() if (!isTRUE(old)) { S4Vectors:::disableValidity(TRUE) on.exit(S4Vectors:::disableValidity(old)) } cn <- colnames(x) sample_ids0 <- sampleIDs(x) old_sample_colnames <- lapply(sample_ids0, function(s) { cn[colData(x)$sample_id %in% s] }) # Subset the matrix and col and rowData # Suppress because `drop` will be used suppressWarnings(x <- callNextMethod()) sample_ids <- sampleIDs(x) # Subset annotGeometries based on sample_id if (length(annotGeometries(x))) { ag_sub <- annotGeometries(x) for (g in seq_along(ag_sub)) { ag_ind <- ag_sub[[g]] ag_ind <- ag_ind[ag_ind$sample_id %in% sample_ids, ] ag_sub[[g]] <- ag_ind } annotGeometries(x) <- ag_sub } # Remove rowGeometries when an entire sample got removed if (!is.null(rowGeometryNames(x))) { samples_rm <- setdiff(sample_ids0, sample_ids) if (length(samples_rm)) { rowGeometries(x, sample_id = samples_rm) <- NULL } } # Crop images with new bbox if (!missing(j)) { x <- .crop_imgs(x, bbox(x, sample_id = "all")) } # Subset *Graphs based on sample_id and reconstruct row and colGraphs if (!is.null(spatialGraphs(x)) && (!missing(j) && !.is0(j))) { do_subset <- getOption("SFE_graph_subset") graphs_sub <- int_metadata(x)$spatialGraphs graphs_sub <- graphs_sub[, names(graphs_sub) %in% sampleIDs(x), drop = FALSE ] if (drop && !do_subset) { message( "Node indices in the graphs are no longer valid after subsetting. ", "Dropping all row and col graphs." ) spatialGraphs(x) <- graphs_sub spatialGraphs(x, MARGIN = 1) <- NULL spatialGraphs(x, MARGIN = 2) <- NULL validObject(x) return(x) } # Check which graphs need to be subsetted # Wouldn't need reconstruction if the barcodes within one sample # are still in the same order cn2 <- colnames(x) new_sample_colnames <- lapply(sample_ids, function(s) { cn2[colData(x)$sample_id %in% s] }) old_sample_compare <- old_sample_colnames[sample_ids0 %in% sample_ids] samples_subset <- mapply( function(old, new) !isTRUE(all.equal(old, new)), old = old_sample_compare, new = new_sample_colnames, SIMPLIFY = TRUE ) for (s in which(samples_subset)) { j_sample <- old_sample_compare[[s]] %in% new_sample_colnames[[s]] if (sum(j_sample) == 1) { message("Only 1 column left in sample ", sample_ids[[s]], ", graphs are meaningless, dropping graphs.") graphs_sub[[s]] <- list(row = NULL, col = NULL, annot = NULL) next } for (m in seq_len(2)) { # Not subsetting annotGraphs # Not sure what to do differently with rowGraphs yet for (g in seq_along(graphs_sub[[s]][[m]])) { method_info <- attr(graphs_sub[[s]][[m]][[g]], "method") if (do_subset) { gr <- .subset_listw(graphs_sub[[s]][[m]][[g]], j_sample) attr(gr, "method") <- method_info graphs_sub[[s]][[m]][[g]] <- gr } else { if (is.null(method_info)) { warning( "Graph reconstruction info is missing for sample ", names(graphs_sub)[s], " ", .margin_name(m), "Graph ", names(graphs_sub[[s]][[m]])[g], ". ", "Dropping graph.\n" ) graphs_sub[[s]][[m]][[g]] <- NULL } else { if (requireNamespace(method_info$package[[1]], quietly = TRUE)) { fun <- getFromNamespace(method_info$FUN, method_info$package[[1]]) if ("row.names" %in% names(method_info$args)) { method_info$args[["row.names"]] <- method_info$args[["row.names"]][j] } tryCatch(graphs_sub[[s]][[m]][[g]] <- do.call(fun, c(list(x = x), method_info$args)), error = function(e) { warning( "Graph reconstruction failed for sample ", names(graphs_sub)[s], " ", .margin_name(m), "Graph ", names(graphs_sub[[s]][[m]])[g], ": ", e, "Dropping graph.\n" ) graphs_sub[[s]][[m]][[g]] <- NULL } ) } else { warning( "Package ", method_info$package[[1]], " used to construct graph for sample ", names(graphs_sub)[s], " ", .margin_name(m), "Graph ", names(graphs_sub[[s]][[m]])[g], " is not installed. ", "Dropping graph.\n" ) graphs_sub[[s]][[m]][[g]] <- NULL } } } } } } spatialGraphs(x) <- graphs_sub } if (!missing(j) && .is0(j)) spatialGraphs(x) <- NULL validObject(x) return(x) } ) .find_if_crop <- function(x, sample_id, bbox) { imgs <- imgData(x)[imgData(x)$sample_id == sample_id,] not_crop <- vapply(seq_len(nrow(imgs)), function(i) { img <- imgs$data[[i]] if (!inherits(img, "SpatRasterImage")) return(FALSE) fn <- imgSource(img) if (is.na(fn)) return(FALSE) size <- file.info(fn)[["size"]] bbox_prop <- .get_bbox_prop(bbox, img) th <- .size_str2num(getOption("SFE_subset_crop_max")) if (size * bbox_prop > th) return(TRUE) else FALSE }, FUN.VALUE = logical(1)) if (any(not_crop)) message("Some images are larger than ", getOption("SFE_subset_crop_max"), " not cropping. Change threshold with the SFE_subset_crop_max option.") getOption("SFE_subset_crop") & !any(not_crop) } .crop_imgs <- function(x, bboxes) { # Crop all images across samples in an SFE object if (nrow(imgData(x))) { if (!getOption("SFE_subset_crop")) message("SFE_subset_crop option set to FALSE, not cropping images") samples <- sort(sampleIDs(x)) imgData(x) <- imgData(x)[order(imgData(x)$sample_id),] if (length(samples) == 1L) { bboxes <- matrix(bboxes, ncol = 1, dimnames = list(names(bboxes), samples)) } do_crop <- vapply(samples, function(s) { .find_if_crop(x, s, bboxes[,s]) }, FUN.VALUE = logical(1)) |> all() if (do_crop) { new_imgs <- lapply(samples, function(s) { bbox_use <- bboxes[c("xmin", "xmax", "ymin", "ymax"),s] img_data <- imgData(x)$data[imgData(x)$sample_id == s] lapply(img_data, cropImg, bbox = bbox_use) }) new_imgs <- unlist(new_imgs, recursive = FALSE) imgData(x)$data <- I(new_imgs) } } x }