Bioconductor Code: crisprDesign

History View file @ 358938b

                     new file mode 100644
@@ -0,0 +1,489 @@
                     +#' @title Add well row and column info to rowData
                     +#'
                     +#' @description Add well row and column info to rowData
                     +#'
                     +#' @param se A \linkS4class{SummarizedExperiment} object.
                     +#'
                     +#' @return A \linkS4class{SummarizedExperiment} object with updated
                     +#'     annotation.
                     +#'
                     +#' @author Jean-Philippe Fortin
                     +#'
                     +#' @examples
                     +#' data(seExample)
                     +#' seExample <- addPlateRowAndColumn(seExample)
                     +#'
                     +#' @importFrom stringr str_extract
                     +#' @importFrom SummarizedExperiment rowData rowData<-
                     +#' @export
                     +addPlateRowAndColumn <- function(se){
                     +    ann <- rowData(se)
                     +    if (!"Well" %in% colnames(ann)){
                     +        stop("Well must be in colData(se)")
                     +    }
                     +    ann$row    <- str_extract(rowData(se)[["Well"]], "[A-Z]+")
                     +    ann$column <- str_extract(rowData(se)[["Well"]], "[0-9]+")
                     +    ann$column <- as.integer(ann$column)
                     +    rowData(se) <- ann
                     +    return(se)
                     +}
+                    +
+                    +
+                    +
+                    +
+                    +
+                    +
                     +#' @title Add a column in the feature annotation for gene class
                     +#'
                     +#' @description Add a column in the feature annotation for gene class.
                     +#'
                     +#' @param se A \linkS4class{SummarizedExperiment} object.
                     +#' @param gene.field String specifying the name of the column
                     +#'     that contains the gene names.
                     +#' @param class.field String specifying the name of the column
                     +#'     that will store gene class. Default is "class".
                     +#' @param default.class What should be the default class for
                     +#'     genes not specified in \code{classes}?
                     +#'     Default is "target".
                     +#' @param classes Named list specifying the gene classes.
                     +#'     Names of the list correspond to the names of the classes,
                     +#'     each each list element contains the gene names corresponding
                     +#'     the the given class.
                     +#'
                     +#' @return A \linkS4class{SummarizedExperiment} object with updated
                     +#'     annotation.
                     +#'
                     +#' @author Jean-Philippe Fortin
                     +#'
                     +#' @importFrom stringr str_extract
                     +#' @importFrom SummarizedExperiment rowData rowData<-
                     +#' @export
                     +addGeneClassColumn <- function(se,
                     +                               gene.field="Gene",
                     +                               class.field="class",
                     +                               default.class="target",
                     +                               classes=NULL
                     +){
                     +    ann <- rowData(se)
                     +    ann[[class.field]] <- default.class
                     +    if (!gene.field %in% colnames(ann)){
                     +        stop("gene.field must be in colnames(rowData(se))")
                     +    }
                     +    if (!is.null(classes)){
                     +        n_classes <- length(classes)
                     +        for (class in names(classes)){
                     +            wh <- which(ann[[gene.field]] %in% classes[[class]])
                     +            ann[[class.field]][wh] <- class
                     +        }
                     +    }
                     +    rowData(se) <- ann
                     +    return(se)
                     +}
+                    +
+                    +
+                    +
+                    +
                     +#' @title log2 transformation of assay data
                     +#'
                     +#' @description log2 transformation of assay data.
                     +#'
                     +#' @param se A \linkS4class{SummarizedExperiment} object.
                     +#' @param assay Integer vector specifying the indices of the assay
                     +#'     to log2 transform. If NULL (default), all assays
                     +#'     are log2 trasnformed.
                     +#' @param offset Integer specifying offset to use
                     +#'     in the log2 transformation.
                     +#'
                     +#' @return A \linkS4class{SummarizedExperiment} object with
                     +#'     one or many assays log2 transformed.
                     +#'
                     +#' @examples
                     +#' data(seExample)
                     +#' seExample <- logTransform(seExample, offset=1)
                     +#'
                     +#' @author Jean-Philippe Fortin
                     +#'
                     +#' @importFrom stringr str_extract
                     +#' @importFrom SummarizedExperiment rowData rowData<-
                     +#' @importFrom SummarizedExperiment assays assays<-
                     +#' @export
                     +logTransform <- function(se,
                     +                         assay=NULL,
                     +                         offset=1
                     +){
                     +    n_assays <- length(assays(se))
                     +    if (is.null(assay)){
                     +        assaysToTransform <- seq_len(n_assays)
                     +    } else {
                     +        assaysToTransform <- assay
                     +    }
+                    +
                     +    for (assay in assaysToTransform){
                     +        assays(se)[[assay]] <- log2(assays(se)[[assay]]+offset)
                     +    }
                     +    return(se)
                     +}
+                    +
+                    +
+                    +
+                    +
+                    +
+                    +
+                    +
+                    +
+                    +
                     +#' @title Z score transformation using negative controls.
                     +#'
                     +#' @description Z score transformation using negative controls.
                     +#'
                     +#' @param se A \linkS4class{SummarizedExperiment} object.
                     +#' @param neg.controls Character vector specifying genes that should
                     +#'     be used for z-score transformation. The genes must be present
                     +#'     in the \code{gene.field} column of \code{rowData(se)}.
                     +#'     If NULL (default), all genes will be considered.
                     +#' @param gene.field String specifying the name of the column
                     +#'     that contains the gene names specified by \code{neg.controls}.
                     +#' @param fun.centering String specifying the method
                     +#'     to use for centering. Either "median" (default) or "mean".
                     +#' @param fun.scaling String specifying the method
                     +#'     to use for scaling. Either "mad" (default) or "sd".
                     +#' @param scaling.ignore.controls Should the scaling factors be
                     +#'     calculated using all genes? TRUE by default, which
                     +#'     will ignore negative controls.
                     +#' @param class.field String specifying the name of the column
                     +#'     that stored the gene class in \code{rowData(se)}.
                     +#' @param class.levels Gene classes that should be considered
                     +#'     for z-score transformation. If NULL (default), all classes
                     +#'     are considered. If specified, intersection of the specified
                     +#'     gene classes and the genes specified in \code{neg.controls}
                     +#'     will be considered for z scoring.
                     +#'
                     +#' @return A \linkS4class{SummarizedExperiment} object with
                     +#'     assays z-score transformed.
                     +#'
                     +#' @author Jean-Philippe Fortin
                     +#'
                     +#' @importFrom matrixStats colMedians colMads colSds
                     +#' @export
                     +transformToZScores <- function(se,
                     +                               neg.controls=NULL,
                     +                               gene.field="Gene",
                     +                               fun.centering=c("median", "mean"),
                     +                               fun.scaling=c("mad", "sd"),
                     +                               scaling.ignore.controls=TRUE,
                     +                               class.field="class",
                     +                               class.levels="target"
                     +){
                     +    fun.centering <- match.arg(fun.centering)
                     +    fun.scaling   <- match.arg(fun.scaling)
                     +    n_assays <- length(assays(se))
                     +    ann <- rowData(se)
                     +    n_genes <- nrow(se)
                     +    for (assay in seq_len(n_assays)){
                     +        Y <- assays(se)[[assay]]
                     +        wh_centering <- seq_len(n_genes)
                     +        wh_scaling   <- seq_len(n_genes)
                     +        wh_class     <- seq_len(n_genes)
                     +        if (!is.null(neg.controls)){
                     +            wh_centering <- which(ann[[gene.field]] %in% neg.controls)
                     +            if (!scaling.ignore.controls){
                     +                wh_scaling <- which(ann[[gene.field]] %in% neg.controls)
                     +            }
                     +        }
                     +        if (!is.null(class.levels)){
                     +            wh_class <- which(ann[[class.field]] %in% class.levels)
                     +        }
                     +        wh_centering <- intersect(wh_centering, wh_class)
                     +        wh_scaling   <- intersect(wh_scaling, wh_class)
+                    +
+                    +
                     +        if (length(wh_centering)==0 | length(wh_scaling)==0 ){
                     +            stop("Combination of neg.controls and class.levels returns
                     +                  0 gene for transformation.")
                     +        }
+                    +
                     +        #Centering:
                     +        if (fun.centering=="median"){
                     +            factors <- colMedians(Y[wh_centering,,drop=FALSE], na.rm=TRUE)
                     +        } else {
                     +            factors <- colMeans(Y[wh_centering,,drop=FALSE], na.rm=TRUE)
                     +        }
                     +        Y <- sweep(Y, 2 ,factors, "-")
+                    +
                     +        #Scaling:
                     +        if (fun.scaling=="mad"){
                     +            factors <- colMads(Y[wh_scaling,,drop=FALSE], na.rm=TRUE)
                     +        } else {
                     +            factors <- colSds(Y[wh_scaling,,drop=FALSE], na.rm=TRUE)
                     +        }
                     +        Y <- sweep(Y, 2 ,factors, "/")
                     +        assays(se)[[assay]] <- Y
                     +    }
                     +    return(se)
                     +}
+                    +
+                    +
+                    +
                     +#' @title Calculate limma statistics
                     +#'
                     +#' @description Calculate limma statistics.
                     +#'
                     +#' @param se A \linkS4class{SummarizedExperiment} object.
                     +#' @param design Design matrix to pass to limma.
                     +#' @param contrast.matrix Contrast matrix to pass to limma.
                     +#'
                     +#' @return For each assay in \code{se}, it returns a named
                     +#'     list with the following elements:
                     +#'     - lfc: log-fold changes
                     +#'     - pval: p-lvaues
                     +#'     - fdr: FDR-adjusted p-values.
                     +#'
                     +#'
                     +#' @examples
                     +#' data(seExample)
                     +#' library(SummarizedExperiment)
                     +#'
                     +#' classes <- list(neg=c("OR5M9","OR6N1"),
                     +#'     pos=c("KRAS","NRAS"),
                     +#'     rnaimax=c("RNAiMAX"),
                     +#'     empty="empty")
                     +#' seExample <- addGeneClassColumn(seExample, classes=classes)
                     +#'
                     +#' # Transforming the data:
                     +#' seExample <- logTransform(seExample, offset=1)
                     +#' seExample <- normalizeBetweenPlates(seExample)
                     +#' seExample <- transformToZScores(seExample)
                     +#'
                     +#' # Creating design matrix:
                     +#' pheno  <- as.factor(colData(seExample)$Condition)
                     +#' pheno  <- relevel(pheno, ref="WT")
                     +#' design <- model.matrix(~pheno-1)
                     +#' colnames(design) <- gsub("pheno", "", colnames(design))
                     +#' contrasts <- c(Mutant = "Mutant",
                     +#'     Wildtype="WT",
                     +#'     Mutant_vs_Wildtype="Mutant - WT")
                     +#' contrast.matrix <- cbind("Wildtype"=c(1,0),
                     +#'     "Mutant"=c(0,1),
                     +#'     "Mutant_vs_Wildtype"=c(-1,1))
                     +#'
                     +#' # Calculate statistics:
                     +#' stats <- calculateStatistics(seExample,
                     +#'     design=design,
                     +#'     contrast.matrix=contrast.matrix)
                     +#'
                     +#' @author Jean-Philippe Fortin
                     +#'
                     +#' @importFrom limma lmFit contrasts.fit eBayes
                     +#' @importFrom stats p.adjust
                     +#' @export
                     +calculateStatistics <- function(se,
                     +                                design,
                     +                                contrast.matrix=NULL
                     +){
                     +    n_assays <- length(assays(se))
                     +    results <- list()
                     +    for (assay in seq_len(n_assays)){
                     +        Y <- assays(se)[[assay]]
                     +        fit <- lmFit(Y, design)
                     +        if (!is.null(contrast.matrix)){
                     +            fit <- contrasts.fit(fit, contrast.matrix)
                     +        }
                     +        fit <- eBayes(fit)
                     +        stats <- list()
                     +        stats[["lfc"]]  <- fit$coefficients
                     +        stats[["pval"]] <- fit$p.value
                     +        stats[["fdr"]]  <- apply(stats[["pval"]], 2, p.adjust, "fdr")
                     +        results[[assay]] <- stats
                     +    }
                     +    names(results) <- names(assays(se))
                     +    return(results)
                     +}
+                    +
+                    +
+                    +
                     +#' @title Between-plate normalization
                     +#'
                     +#' @description Between-plate normalization.
                     +#'
                     +#' @param se A \linkS4class{SummarizedExperiment} object.
                     +#' @param neg.controls Character vector specifying genes that should
                     +#'     be used for estimating normalizatin factors.
                     +#'     The genes must be present in the \code{gene.field}
                     +#'     column of \code{rowData(se)}.
                     +#'     If NULL (default), all genes will be considered.
                     +#' @param fun String specifying the method
                     +#'     to use for normalization. Either "median" (default) or "mean".
                     +#' @param plate.field String specifying the name of the column
                     +#'     that contains the plate ID.
                     +#' @param gene.field String specifying the name of the column
                     +#'     that contains the gene names specified by \code{neg.controls}.
                     +#' @param class.field String specifying the name of the column
                     +#'     that stored the gene class in \code{rowData(se)}.
                     +#' @param class.levels Gene classes that should be considered
                     +#'     for estimating normalization factors. If NULL (default), all classes
                     +#'     are considered. If specified, intersection of the specified
                     +#'     gene classes and the genes specified in \code{neg.controls}
                     +#'     will be considered for z scoring.
                     +#'
                     +#' @return A \linkS4class{SummarizedExperiment} object with
                     +#'     value normalized between plates.
                     +#'
                     +#' @examples
                     +#' data(seExample)
                     +#' seExample <- logTransform(seExample, offset=1)
                     +#'
                     +#' classes <- list(neg=c("OR5M9","OR6N1"),
                     +#'     pos=c("KRAS","NRAS"),
                     +#'     rnaimax=c("RNAiMAX"),
                     +#'     empty="empty")
                     +#' seExample <- addGeneClassColumn(seExample, classes=classes)
                     +#'
                     +#' seExample <- normalizeBetweenPlates(seExample)
                     +#'
                     +#' @author Jean-Philippe Fortin
                     +#'
                     +#' @importFrom matrixStats colMedians
                     +#' @importFrom SummarizedExperiment rowData assays assays<-
                     +#' @export
                     +normalizeBetweenPlates <- function(se,
                     +                                   neg.controls=NULL,
                     +                                   fun=c("median", "mean"),
                     +                                   plate.field="Plate",
                     +                                   gene.field="Gene",
                     +                                   class.field="class",
                     +                                   class.levels=c("target")
                     +){
                     +    fun <- match.arg(fun)
                     +    n_assays  <- length(assays(se))
                     +    assaysToNormalize <- seq_len(n_assays)
                     +    ann    <- rowData(se)
                     +    plate  <- ann[[plate.field]]
                     +    plates <- unique(plate)
                     +    n_plates <- length(plates)
                     +    n_genes <- nrow(se)
                     +    if (!gene.field %in% colnames(ann)){
                     +        stop("gene.field not found in colnames(rowData(se))")
                     +    }
                     +    if (!class.field %in% colnames(ann)){
                     +        stop("class.field not found in colnames(rowData(se))")
                     +    }
                     +    for (assay in assaysToNormalize){
                     +        Y <- assays(se)[[assay]]
                     +        for (k in seq_len(n_plates)){
                     +            if (is.null(neg.controls)){
                     +                wh_control <- seq_len(n_genes)
                     +            } else {
                     +                wh_control <- which(ann[[gene.field]] %in% neg.controls)
                     +            }
                     +            wh_plate <- which(plate==plates[[k]])
                     +            wh_norm  <- intersect(wh_control, wh_plate)
+                    +
                     +            # Only normalizing using target genes:
                     +            if (!is.null(class.levels)){
                     +                wh_class <- which(ann[[class.field]] %in% class.levels)
                     +            } else {
                     +                wh_class <- seq_len(n_genes)
                     +            }
                     +            wh_norm <- intersect(wh_norm, wh_class)
                     +            if (fun=="median"){
                     +                factors <- colMedians(Y[wh_norm,,drop=FALSE], na.rm=TRUE)
                     +            } else {
                     +                factors <- colMeans(Y[wh_norm,,drop=FALSE], na.rm=TRUE)
                     +            }
                     +            Y[wh_plate,] <- sweep(Y[wh_plate,,drop=FALSE], 2 ,factors, "-")
                     +        }
                     +        assays(se)[[assay]] <- Y
                     +    }
                     +    return(se)
                     +}
+                    +
+                    +
+                    +
                     +#' @title Within-plate normalization
                     +#'
                     +#' @description Within-plate normalization
                     +#'
                     +#' @param se A \linkS4class{SummarizedExperiment} object.
                     +#' @param fun String specifying the method
                     +#'     to use for normalization. Either "median" (default) or "mean".
                     +#' @param plate.field String specifying the name of the column
                     +#'     that contains the plate ID.
                     +#' @param what String specifying if plate rows or columns
                     +#'     should be normalized. "row" by default.
                     +#' @param class.field String specifying the name of the column
                     +#'     that stored the gene class in \code{rowData(se)}.
                     +#' @param class.levels Gene classes that should be considered
                     +#'     for estimating normalization factors. If NULL (default), all classes
                     +#'     are considered. If specified, intersection of the specified
                     +#'     gene classes and the genes specified in \code{neg.controls}
                     +#'     will be considered for z scoring.
                     +#'
                     +#' @return A \linkS4class{SummarizedExperiment} object with
                     +#'     value normalized within plates.
                     +#'
                     +#' #' @examples
                     +#' data(seExample)
                     +#' seExample <- logTransform(seExample, offset=1)
                     +#' seExample <- normalizeWithinPlate(seExample)
                     +#'
                     +#' @author Jean-Philippe Fortin
                     +#' @importFrom SummarizedExperiment rowData assays assays<-
                     +#' @importFrom matrixStats colMedians
                     +#' @export
                     +normalizeWithinPlate <- function(se,
                     +                                 fun=c("median", "mean"),
                     +                                 what=c("row", "column"),
                     +                                 plate.field="Plate",
                     +                                 class.field="class",
                     +                                 class.levels=c("target")
                     +){
                     +    fun  <- match.arg(fun)
                     +    what <- match.arg(what)
                     +    n_assays  <- length(assays(se))
                     +    ann <- rowData(se)
                     +    if (!what %in% colnames(ann)){
                     +        se  <- addPlateRowAndColumn(se)
                     +        ann <- rowData(se)
                     +    }
                     +    plate  <- ann[[plate.field]]
                     +    slice  <- paste0(plate, "_", ann[[what]])
                     +    slices <- unique(slice)
                     +    n_slices <- length(slices)
                     +    n_genes <- nrow(se)
                     +    for (assay in seq_len(n_assays)){
                     +        Y <- assays(se)[[assay]]
                     +        for (k in seq_len(n_slices)){
                     +            wh_control <- seq_len(n_genes)
                     +            wh_slice   <- which(slice==slices[[k]])
                     +            wh_norm <- intersect(wh_control, wh_slice)
                     +            # Only normalizing using target genes:
                     +            if (!is.null(class.levels)){
                     +                wh_class <- which(ann[[class.field]] %in% class.levels)
                     +            } else {
                     +                wh_class <- seq_len(n_genes)
                     +            }
                     +            wh_norm <- intersect(wh_norm, wh_class)
                     +            if (fun=="median"){
                     +                factors <- colMedians(Y[wh_norm,,drop=FALSE], na.rm=TRUE)
                     +            } else {
                     +                factors <- colMeans(Y[wh_norm,,drop=FALSE], na.rm=TRUE)
                     +            }
                     +            Y[wh_slice,] <- sweep(Y[wh_slice,,drop=FALSE], 2 ,factors, "-")
                     +        }
                     +        assays(se)[[assay]] <- Y
                     +    }
                     +    return(se)
                     +}
+                    +
+                    +
+                    +
+                    +
+                    +
+                    +
+                    +
+                    +
+                    +
+                    +
+                    +
+                    +
+                    +

Util functions for arrays