Bioconductor Code: singleCellTK

Raw Blame Patch Log History
#' Summarize SingleCellExperiment
#'
#' Creates a table of summary metrics from an input SingleCellExperiment.
#'
#' @param indata Input SingleCellExperiment
#'
#' @return A data.frame object of summary metrics.
#' @export summarizeTable
summarizeTable <- function(indata){
  return(data.frame("Metric" = c("Number of Samples",
                               "Number of Genes",
                               "Average number of reads per cell",
                               "Average number of genes per cell",
                               "Samples with <1700 detected genes",
                               "Genes with no expression across all samples"),
                    "Value" = c(ncol(indata),
                              nrow(indata),
                              as.integer(mean(apply(assay(indata, "counts"), 2, function(x) sum(x)))),
                              as.integer(mean(apply(assay(indata, "counts"), 2, function(x) sum(x > 0)))),
                              sum(apply(assay(indata, "counts"), 2, function(x) sum(as.numeric(x) == 0)) < 1700),
                              sum(rowSums(assay(indata, "counts")) == 0))))
}

#' Create a SingleCellExperiment object
#'
#' From a file of counts and a file of annotation information, create a
#' SingleCellExperiment object.
#'
#' @param countfile The path to a text file that contains a header row of sample
#' names, and rows of raw counts per gene for those samples.
#' @param annotfile The path to a text file that contains columns of annotation
#' information for each sample in the countfile. This file should have the same
#' number of rows as there are columns in the countfile.
#' @param featurefile The path to a text file that contains columns of
#' annotation information for each gene in the count matrix. This file should
#' have the same genes in the same order as countfile. This is optional.
#' @param inputdataframes If TRUE, countfile and annotfile are read as data
#' frames instead of file paths. The default is FALSE.
#'
#' @return a SingleCellExperiment object
#' @export createSCE
#' @examples
#' \dontrun{
#' GSE60361_sce <- createSCE(countfile = "/path/to/input_counts.txt",
#'                           annotfile = "/path/to/input_annots.txt")
#'}
createSCE <- function(countfile=NULL, annotfile=NULL, featurefile=NULL,
                      inputdataframes=FALSE){
  if (is.null(countfile)){
    stop("You must supply a count file.")
  }
  if (inputdataframes){
    countsin <- countfile
    annotin <- annotfile
    featurein <- featurefile
  } else{
    countsin <- utils::read.table(countfile, sep = "\t", header = T, row.names = 1)
    if (!is.null(annotfile)){
      annotin <- utils::read.table(annotfile, sep = "\t", header = T, row.names = 1)
    }
    if (!is.null(featurefile)){
      featurein <- utils::read.table(featurefile, sep = "\t", header = T, row.names = 1)
    }
  }
  if (is.null(annotfile)){
    annotin <- data.frame(row.names = colnames(countsin))
    annotin$Sample <- rownames(annotin)
    annotin <- DataFrame(annotin)
  }
  if (is.null(featurefile)){
    featurein <- data.frame(Gene = rownames(countsin))
    rownames(featurein) <- featurein$Gene
    featurein <- DataFrame(featurein)
  }
  return(SummarizedExperiment(assays=list(counts=as.matrix(countsin)),
                              colData=annotin,
                              rowData=featurein))
}

#' Filter Genes and Samples from a Single Cell Object
#'
#' @param insceset Input single cell object, required
#' @param deletesamples List of samples to delete from the object.
#' @param remove_noexpress Remove genes that have no expression across all
#' samples. The default is true
#' @param remove_bottom Fraction of low expression genes to remove from the
#' single cell object. This occurs after remove_noexpress. The default is 0.50.
#' @param minimum_detect_genes Minimum number of genes with at least 1
#' count to include a sample in the single cell object. The default is 1700.
#'
#' @return The filtered single cell object.
#' @export filterSCData
#'
#' @examples
#' data("GSE60361_subset_sce")
#' GSE60361_subset_sce <- filterSCData(GSE60361_subset_sce,
#'                                     deletesamples="X1772063061_G11")
filterSCData <- function(insceset, deletesamples=NULL, remove_noexpress=TRUE,
                         remove_bottom=0.5, minimum_detect_genes=1700){
  insceset <- insceset[, !(colnames(insceset) %in% deletesamples)]
  if (remove_noexpress){
    insceset <- insceset[rowSums(assay(insceset, "counts")) != 0, ]
  }
  nkeeprows <- ceiling((1 - remove_bottom) * as.numeric(nrow(insceset)))
  tokeeprow <- order(rowSums(assay(insceset, "counts")), decreasing = TRUE)[1:nkeeprows]
  tokeepcol <- apply(assay(insceset, "counts"), 2, function(x) sum(as.numeric(x) == 0)) >= minimum_detect_genes
  insceset <- insceset[tokeeprow, tokeepcol]
  return(insceset)
}