Bioconductor Code: CiteFuse

Raw Blame Patch Log History
#' spectralClustering
#'
#' A function to perform spectral clustering
#'
#' @param affinity An affinity matrix
#' @param K number of clusters
#' @param type type
#' @param fast fast
#' @param maxdim maxdim
#' @param delta delta
#' @param t t
#' @param neigen neigen
#'
#' @return A list indicates the spectral clustering results
#'
#' @examples
#'
#' data("sce_control_subset", package = "CiteFuse")
#' sce_control_subset <- CiteFuse(sce_control_subset)
#' SNF_W_clust <- spectralClustering(S4Vectors::metadata(sce_control_subset)[["SNF_W"]],
#' K = 5)
#'
#' @importFrom igraph arpack
#' @importFrom methods as
#'
#'
#' @export

spectralClustering <- function(affinity, K = 20, type = 4,
                               fast = TRUE,
                               maxdim = 50, delta = 1e-5,
                               t = 0, neigen = NULL)
{

  d <- rowSums(affinity)
  d[d == 0] <- .Machine$double.eps
  D <- diag(d)
  L <- affinity


  neff <- K + 1

  if (type == 1) {
    NL <- L
  }
  else if (type == 2) {
    Di <- diag(1/d)
    NL <- Di %*% L
  }
  else if (type == 3) {
    Di <- diag(1/sqrt(d))
    NL <- Di %*% L %*% Di

  } else if (type == 4) {
    v <- sqrt(d)
    NL <- L/(v %*% t(v))
  }

  if (!fast) {
    eig <- eigen(NL)
  }else {
    f = function(x, A = NULL){ # matrix multiplication for ARPACK
      as.matrix(A %*% x)
    }

    n <- nrow(affinity)

    NL <- ifelse(NL > delta, NL, 0)
    NL <- methods::as(NL, "dgCMatrix")


    eig <- igraph::arpack(f, extra = NL, sym = TRUE,
                          options = list(which = 'LA', nev = neff,
                                         n = n, ncv = max(min(c(n,4*neff)))))

  }

  psi = eig$vectors / (eig$vectors[,1] %*% matrix(1, 1, neff))#right ev
  eigenvals <- eig$values

  cat('Computing Spectral Clustering \n')

  res <- sort(abs(eigenvals), index.return = TRUE, decreasing = TRUE)
  U <- eig$vectors[, res$ix[seq_len(K)]]
  normalize <- function(x) x/sqrt(sum(x^2))

  if (type == 3 | type == 4) {
    U <- t(apply(U, 1, normalize))
  }
  # This part is equal to performing kmeans
  # labels <- kmeans(U, centers = K, nstart = 1000)$cluster
  eigDiscrete <- .discretisation(U)
  eigDiscrete <- eigDiscrete$discrete
  labels <- apply(eigDiscrete, 1, which.max)

  cat('Computing Diffusion Coordinates\n')
  if (t <= 0) {# use multi-scale geometry
    lambda = eigenvals[-1]/(1 - eigenvals[-1])
    lambda = rep(1,n) %*% t(lambda)
    if (is.null(neigen)) {#use no. of dimensions corresponding to 95% dropoff
      lam = lambda[1,]/lambda[1,1]
      # neigen = min(which(lam < .05)) # default number of eigenvalues
      neigen = min(neigen, maxdim, K)
      eigenvals = eigenvals[seq_len((neigen + 1))]
      cat('Used default value:',neigen,'dimensions\n')
    }
    X = psi[,2:(neigen + 1)]*lambda[, seq_len(neigen)] #diffusion coords. X
  }
  else{# use fixed scale t
    lambda = eigenvals[-1]^t
    lambda = rep(1, n) %*% t(lambda)

    if (is.null(neigen)) {#use no. of dimensions corresponding to 95% dropoff
      lam = lambda[1, ]/lambda[1, 1]
      neigen = min(which(lam < .05)) # default number of eigenvalues
      neigen = min(neigen, maxdim)
      eigenvals = eigenvals[seq_len(neigen + 1)]
      cat('Used default value:', neigen, 'dimensions\n')
    }

    X = psi[, 2:(neigen + 1)] * lambda[, seq_len(neigen)] #diffusion coords. X
  }

  return(list(labels = labels,
              eigen_values = eig$values,
              eigen_vectors = eig$vectors,
              X = X))
}




.discretisationEigenVectorData <- function(eigenVector) {

  Y = matrix(0,nrow(eigenVector),ncol(eigenVector))
  maxi <- function(x) {
    i = which(x == max(x))
    return(i[1])
  }
  j = apply(eigenVector,1,maxi)
  Y[cbind(seq_len(nrow(eigenVector)), j)] = 1

  return(Y)

}


.discretisation <- function(eigenVectors) {

  normalize <- function(x) x / sqrt(sum(x^2))
  eigenVectors = t(apply(eigenVectors,1,normalize))

  n = nrow(eigenVectors)
  k = ncol(eigenVectors)

  R = matrix(0,k,k)
  R[,1] = t(eigenVectors[round(n/2),])

  mini <- function(x) {
    i = which(x == min(x))
    return(i[1])
  }

  c = matrix(0, n, 1)
  for (j in seq(2, k)) {
    c = c + abs(eigenVectors %*% matrix(R[,j - 1], k, 1))
    i = mini(c)
    R[,j] = t(eigenVectors[i,])
  }

  lastObjectiveValue = 0
  for (i in seq_len(1000)) {
    eigenDiscrete = .discretisationEigenVectorData(eigenVectors %*% R)

    svde = svd(t(eigenDiscrete) %*% eigenVectors)
    U = svde[['u']]
    V = svde[['v']]
    S = svde[['d']]

    NcutValue = 2 * (n - sum(S))
    if (abs(NcutValue - lastObjectiveValue) < .Machine$double.eps)
      break

    lastObjectiveValue = NcutValue
    R = V %*% t(U)

  }

  return(list(discrete = eigenDiscrete, continuous = eigenVectors))
}



#' reducedDimSNF
#'
#' A function to reduce the dimension of the similarity matrix
#'
#' @param sce A singlecellexperiment object
#' @param metadata indicates the meta data name of
#' affinity matrix to virsualise
#' @param method the method of visualisation, which can be UMAP,
#' tSNE and diffusion map
#' @param dimNames indicates the name of the reduced dimension results.
#'
#' @param ... other parameters for tsne(), umap()
#'
#' @return A SingleCellExperiment object
#'
#' @examples
#' data("sce_control_subset", package = "CiteFuse")
#' sce_control_subset <- CiteFuse(sce_control_subset)
#' sce_control_subset <- reducedDimSNF(sce_control_subset,
#' method = "tSNE",
#' dimNames = "tSNE_joint")
#'
#' @importFrom uwot umap
#' @importFrom Rtsne Rtsne
#' @importFrom SingleCellExperiment reducedDim
#' @importFrom S4Vectors metadata
#' @importFrom stats as.dist
#' @export

reducedDimSNF <- function(sce,
                          metadata = "SNF_W",
                          method = "UMAP",
                          dimNames = NULL,
                          ...) {

  method <- match.arg(method, c("UMAP", "tSNE"), several.ok = FALSE)

  if (!metadata %in% names(S4Vectors::metadata(sce))) {
    stop("sce does not contain metadata")
  }

  W <- S4Vectors::metadata(sce)[[metadata]]



  if (is.null(dimNames)) {
    dimNames <- paste(method, metadata, sep = "_")
  }


  if ("UMAP" %in% method) {

    dimred <- uwot::umap(as.dist(0.5 - W), ...)
    colnames(dimred) <- paste("UMAP", seq_len(ncol(dimred)), sep = " ")

    SingleCellExperiment::reducedDim(sce, dimNames) <- dimred
  }

  if ("tSNE" %in% method) {

    if (nrow(W) - 1 < 3 * 30) {
      stop("Please set a smaller perplexity number for Rtsne()")
    }

    dimred <- Rtsne::Rtsne(as.dist(0.5 - W), is_distance = TRUE, ...)$Y
    colnames(dimred) <- paste("tSNE", seq_len(ncol(dimred)), sep = " ")

    SingleCellExperiment::reducedDim(sce, dimNames) <- dimred
  }



  return(sce)


}






#' visualiseDim
#'
#' A function to visualise the reduced dimension
#'
#' @param sce A singlecellexperiment object
#' @param dimNames indicates the name of the reduced dimension results.
#' @param colour_by A character indicates how the cells coloured by.
#' The information either stored in colData, assay, or altExp.
#' @param shape_by A character indicates how the cells shaped by.
#' The information either stored in colData, assay, or altExp.
#' @param data_from A character indicates where the colour by data stored
#' @param assay_name A character indicates the assay name of the expression
#' @param altExp_name A character indicates the name of alternative expression
#' @param altExp_assay_name A character indicates the assay name of alternative expression
#' @param dim a vector of numeric with length of 2 indicates
#' which component is being plot
#'
#' @return A ggplot of the reduced dimension visualisation
#'
#' @examples
#' data("sce_control_subset", package = "CiteFuse")
#' sce_control_subset <- CiteFuse(sce_control_subset)
#' sce_control_subset <- reducedDimSNF(sce_control_subset,
#' method = "tSNE",
#' dimNames = "tSNE_joint")
#' visualiseDim(sce_control_subset, dimNames = "tSNE_joint",
#' colour_by = "SNF_W_clust")
#'
#' @importFrom SingleCellExperiment reducedDimNames
#' @importFrom SummarizedExperiment colData
#' @importFrom S4Vectors metadata
#' @import ggplot2
#'
#' @export

visualiseDim <- function(sce,
                         dimNames = NULL,
                         colour_by = NULL,
                         shape_by = NULL,
                         data_from = c("colData", "assay", "altExp"),
                         assay_name = NULL,
                         altExp_name = NULL,
                         altExp_assay_name = NULL,
                         dim = seq_len(2)){


  if (!dimNames %in% SingleCellExperiment::reducedDimNames(sce)) {
    stop("sce does not contain dimNames")
  }

  data_from <- match.arg(data_from,
                         c("colData", "assay", "altExp"),
                         several.ok = TRUE)



  cts <- FALSE

  if (is.null(colour_by)) {

    colour_by <- NULL

  }else if ("character" %in% is(colour_by) & length(colour_by) == 1) {

    df_colour_by <- .get_color_by(sce, colour_by, data_from,
                               assay_name, altExp_name,
                               altExp_assay_name)
    colour_by <- df_colour_by$colour_by_info
    cts <- df_colour_by$cts

  } else if (length(colour_by) != ncol(sce)) {

    stop("colour_by needs to be a character or a vector with
         length equal to the number of cells.")

  } else{

    colour_by <- colour_by

  }


  if (is.null(shape_by)) {

    shape_by <- NULL

  }else if ("character" %in% is(shape_by) & length(shape_by) == 1) {

    if (!shape_by %in% names(colData(sce))) {

      stop("There is no colData with name shape_by")

    } else {

      shape_by <- as.factor(SummarizedExperiment::colData(sce)[, shape_by])

    }

  } else if (length(shape_by) != ncol(sce)) {

    stop("shape_by needs to be a character or a vector with
         length equal to the number of cells.")

  } else{

    shape_by <- shape_by

  }


  dimred <- reducedDim(sce, dimNames)

  if (is.null(colnames(dimred))) {
    colnames(dimred) <- paste(dimNames, seq_len(ncol(dimred)), sep = "_")
  }

  dimred <- data.frame(dimred)

  if (cts) {
    g_color <- scale_color_viridis_c()
  } else {
    g_color <- scale_color_manual(values = cite_colorPal(length(unique(colour_by))))
  }

  ggplot2::ggplot(dimred, aes(x = dimred[, dim[1]],
                              y = dimred[, dim[2]],
                              col = colour_by,
                              shape = shape_by)) +
    geom_point() +
    g_color +
    scale_shape_manual(values = cite_shapePal(length(unique(shape_by)))) +
    theme_bw() +
    theme(aspect.ratio = 1) +
    xlab(colnames(dimred)[dim[1]]) +
    ylab(colnames(dimred)[dim[2]])





}


.get_color_by <- function(sce,
                          colour_by,
                          data_from = c("colData", "assay", "altExp"),
                          assay_name = NULL,
                          altExp_name = NULL,
                          altExp_assay_name = NULL) {



  data_from <- match.arg(data_from,
                         c("colData", "assay", "altExp"),
                         several.ok = TRUE)

  colour_by_info <- NULL
  cts <- TRUE

  if ("colData" %in% data_from & is.null(colour_by_info)) {
    if (colour_by %in% names(colData(sce))) {
      colour_by_info <- SummarizedExperiment::colData(sce)[, colour_by]
      cts <- FALSE
    }
  }

  if ("assay" %in% data_from & is.null(colour_by_info)) {
    if (colour_by %in% rownames(sce)) {


      if (is.null(assay_name)) {
        if ("logcounts" %in% SummarizedExperiment::assayNames(sce)) {
          assay_name <- "logcounts"
        } else {
          assay_name <- SummarizedExperiment::assayNames(sce)[1]
        }
      } else {
        if (!assay_name %in% SummarizedExperiment::assayNames(sce)) {
          stop("There is no assay_name in assayNames of the sce")
        }
      }
      colour_by_info <- SummarizedExperiment::assay(sce, assay_name)[colour_by, ]

    }
  }

  if ("altExp" %in% data_from &
      !is.null(SingleCellExperiment::altExpNames(sce))  &
      is.null(colour_by_info)) {
    if (is.null(altExp_name)) {
      if ("ADT" %in% SingleCellExperiment::altExpNames(sce)) {
        altExp_name <- "ADT"
      } else {
        altExp_name <-  SingleCellExperiment::altExpNames(sce)[1]
      }
    }
    alt_se <- SingleCellExperiment::altExp(sce, altExp_name)

    if (colour_by %in% rownames(alt_se)) {

      if (is.null(altExp_assay_name)) {
        if ("logcounts" %in% SummarizedExperiment::assayNames(alt_se)) {
          altExp_assay_name <- "logcounts"
        } else {
          altExp_assay_name <- SummarizedExperiment::assayNames(alt_se)[1]
        }
      } else {
        if (!altExp_assay_name %in% SummarizedExperiment::assayNames(alt_se)) {
          stop("There is no altExp_assay_name in assayNames of the altExp")
        }
      }
      colour_by_info <- SummarizedExperiment::assay(alt_se,
                                                    altExp_assay_name)[colour_by, ]
    }
  }

  if (is.null(colour_by_info)) {
    warning("Can not find the required colour_by info,
            please check input data_from, assay_name, altExp_name, altExp_assay_name")
  }
  return(list(colour_by_info = colour_by_info, cts = cts))
}




.normalize <- function(X) {
  row.sum.mdiag <- rowSums(X) - diag(X)
  row.sum.mdiag[row.sum.mdiag == 0] <- 1
  X <- X/(2 * (row.sum.mdiag))
  diag(X) <- 0.5
  return(X)
}

#' igraphClustering
#'
#' A function to perform igraph clustering
#'
#' @param sce A singlecellexperiment object
#' @param metadata indicates the meta data name of affinity matrix
#' to virsualise
#' @param method A character indicates the method for finding communities f
#' rom igraph. Default is louvain clustering.
#' @param ... Other inputs for the igraph functions
#'
#' @return A vector indicates the membership (clustering) results
#'
#' @examples
#'
#' data("sce_control_subset", package = "CiteFuse")
#' sce_control_subset <- CiteFuse(sce_control_subset)
#' SNF_W_louvain <- igraphClustering(sce_control_subset,
#' method = "louvain")
#'
#' @importFrom S4Vectors metadata
#' @importFrom dbscan sNN
#' @importFrom igraph graph_from_adjacency_matrix cluster_louvain
#' cluster_walktrap cluster_spinglass cluster_optimal
#' cluster_edge_betweenness cluster_fast_greedy cluster_label_prop
#' cluster_leading_eigen
#' @importFrom stats median as.dist
#' @export

igraphClustering <- function(sce,
                             metadata = "SNF_W",
                             method = c("louvain", "walktrap", "spinglass",
                                        "optimal", "leading_eigen",
                                        "label_prop", "fast_greedy",
                                        "edge_betweenness"),
                             ...) {

  method <- match.arg(method, c("louvain", "walktrap", "spinglass", "optimal",
                                "leading_eigen", "label_prop", "fast_greedy",
                                "edge_betweenness"))

  normalized.mat <- S4Vectors::metadata(sce)[[metadata]]
  diag(normalized.mat) <- stats::median(as.vector(normalized.mat))
  normalized.mat <- .normalize(normalized.mat)
  normalized.mat <- normalized.mat + t(normalized.mat)

  binary.mat <- dbscan::sNN(stats::as.dist(0.5 - normalized.mat), k = 20)

  binary.mat <- sapply(seq_len(nrow(normalized.mat)), function(x) {
    tmp <- rep(0, ncol(normalized.mat))
    tmp[binary.mat$id[x,]] <- 1
    tmp
  })

  rownames(binary.mat) <- colnames(binary.mat)
  dim(binary.mat)


  g <- igraph::graph_from_adjacency_matrix(binary.mat, mode = "undirected")

  if (method == "louvain") {
    X <- igraph::cluster_louvain(g, ...)
  }

  if (method == "walktrap") {
    X <- igraph::cluster_walktrap(g, ...)
  }

  if (method == "spinglass") {
    X <- igraph::cluster_spinglass(g, ...)
  }

  if (method == "optimal") {
    X <- igraph::cluster_optimal(g, ...)
  }

  if (method == "leading_eigen") {
    X <- igraph::cluster_leading_eigen(g, ...)
  }

  if (method == "label_prop") {
    X <- igraph::cluster_label_prop(g, ...)
  }

  if (method == "fast_greedy") {
    X <- igraph::cluster_fast_greedy(g, ...)
  }

  if (method == "edge_betweenness") {
    X <- igraph::cluster_edge_betweenness(g, ...)
  }

  clustres <- X$membership

  return(clustres)

}






#' visualiseKNN
#'
#' A function to perform louvain clustering
#'
#' @param sce A singlecellexperiment object
#' @param colour_by the name of coldata that is used to colour the node
#' @param metadata indicates the meta data name of affinity matrix to virsualise
#'
#' @return A igraph plot
#'
#' @examples
#' data("sce_control_subset", package = "CiteFuse")
#' sce_control_subset <- CiteFuse(sce_control_subset)
#' SNF_W_louvain <- igraphClustering(sce_control_subset,
#' method = "louvain")
#' visualiseKNN(sce_control_subset, colour_by = "SNF_W_louvain")
#'
#' @importFrom S4Vectors metadata
#' @importFrom dbscan sNN
#' @importFrom igraph graph_from_adjacency_matrix
#' @importFrom SummarizedExperiment colData
#' @importFrom graphics plot legend
#' @export

visualiseKNN <- function(sce,
                         colour_by = NULL,
                         metadata = "SNF_W") {



  normalized.mat <- S4Vectors::metadata(sce)[[metadata]]
  diag(normalized.mat) <- stats::median(as.vector(normalized.mat))
  normalized.mat <- .normalize(normalized.mat)
  normalized.mat <- normalized.mat + t(normalized.mat)

  binary.mat <- dbscan::sNN(as.dist(0.5 - normalized.mat), k = 20)

  binary.mat <- sapply(seq_len(nrow(normalized.mat)), function(x) {
    tmp <- rep(0, ncol(normalized.mat))
    tmp[binary.mat$id[x,]] <- 1
    tmp
  })

  rownames(binary.mat) <- colnames(binary.mat)
  dim(binary.mat)


  g <- igraph::graph_from_adjacency_matrix(binary.mat, mode = "undirected")

  # cat('no. of clusters')
  # print(nlevels(as.factor(X$membership)))
  #
  if (is.null(colour_by)) {
    colour_by <- as.factor(rep(1, ncol(sce)))
  } else {
    colour_by <- as.factor(SummarizedExperiment::colData(sce)[, colour_by])
  }

  graphics::plot(g, vertex.label = NA,
                 edge.arrow.size = 0.000001,
                 layout = igraph::layout.fruchterman.reingold,
                 vertex.color = cite_colorPal(nlevels(colour_by))[as.numeric(colour_by)],
                 vertex.size = 4)

  graphics::legend('bottomright',
                   legend = levels(colour_by),
                   col = cite_colorPal(nlevels(colour_by)),
                   pch = 16,
                   ncol = ceiling(nlevels(colour_by)/10),
                   cex = 0.5)

}