#' Data smoothing for peptide microarray. #' #' This function applies a sliding mean window to intensities to reduce noise #' generated by experimental variation, as well as take advantage of the overlapping #' nature of array peptides to share signal. #' #' @param peptideSet A \code{peptideSet}. The expression data for the peptides as #' well as annotations and ranges. The range information is required to run this function. #' @param width A \code{numeric}. The width of the sliding window. #' @param verbose A \code{logical}. If set to TRUE, progress information will be displayed. #' @param split.by.clade A \code{logical}. If TRUE, the peptides will be smoothed by #' clade (See details section below for more information). #' #' @details #' Peptide membership in the sliding mean window is determined by its position and #' the width argument. Two peptides are in the same window if the difference in their #' positions is less than or equal to width/2. A peptide's position is taken to be #' position(peptideSet). #' #' A peptide's intensity is replaced by the mean of all peptide intensities within #' the peptide's sliding mean window. #' #' When split.by.clade = TRUE, peptides are smoothed within clades defined by the #' clade column of the GRanges object occupying the featureRange slot of #' peptideSet. If set to FALSE, a peptide at a given position will borrow #' information from the neighboring peptides as well as the ones from other #' clades around this position. #' #' @return A \code{peptideSet} object with smoothed intensities. #' #' @seealso \code{\link{summarizePeptides}}, \code{\link{normalizeArray}} #' #' @author Gregory Imholte #' #' @name slidingMean #' @rdname slidingMean #' #' @importFrom GenomicRanges GRangesList #' @importClassesFrom GenomicRanges GRangesList #' @importMethodsFrom IRanges unlist #' @export #' @example examples/pipeline.R slidingMean <-function(peptideSet, width=9, verbose=FALSE, split.by.clade=TRUE){ .check_peptideSet(peptideSet) if (preproc(peptideSet@experimentData)$transformation!="log" & preproc(peptideSet@experimentData)$transformation!="vsn") { stop("The probe measurements need to be log/vsn transformed!") } if (preproc(peptideSet@experimentData)$normalization=="none"){ warning("You should probably normalize your data before using this function") } if(split.by.clade & ncol(clade(peptideSet)) > 1){ pSet_list <- split(peptideSet, clade(peptideSet)) #peptides need to be ordered the same in exprs and featureRange for(i in 1:length(pSet_list)){ cur_clade <- colnames(clade(peptideSet))[i] ranges(pSet_list[[i]])$clade <- cur_clade exprs(pSet_list[[i]]) <- .applySlidingMean(exprs(pSet_list[[i]]), width, position(pSet_list[[i]])) # update row names with clade-appended peptide strings clade_rownames <- paste(peptide(pSet_list[[i]]), cur_clade, sep="_") rownames(pSet_list[[i]]) <- clade_rownames names(ranges(pSet_list[[i]])) <- clade_rownames } #ranges <- do.call("rbind", lapply(pSet_list, ranges)) clade_ranges <- unlist(GRangesList(lapply(pSet_list, ranges))) clade_exprs <- do.call("rbind", lapply(pSet_list, exprs)) peptideSet_smoothed <- new("peptideSet", exprs = clade_exprs, featureRange = clade_ranges, experimentData = peptideSet@experimentData, phenoData = peptideSet@phenoData, protocolData = peptideSet@protocolData) preproc(peptideSet_smoothed)$split.by.clade <- TRUE } else { # if (length(names(ranges(peptideSet))) > 1) # warning("smoothing multiple spaces together in peptideSet object") # # This could be made more efficient with multicore p <- position(peptideSet) o <- order(p) ro <- order(o) y <- exprs(peptideSet)[o,] p <- position(peptideSet)[o] ny <- .applySlidingMean(y, width, p) exprs(peptideSet) <- ny[ro,] peptideSet_smoothed <- peptideSet } if (verbose) { cat("** Finished processing ", nrow(peptideSet_smoothed), " probes on ", ncol(peptideSet_smoothed)," arrays **\n"); } peptideSet_smoothed } #return A matrix of the intensities ordered like y. .applySlidingMean <- function(y, width, position) { yn <- sapply(position, function(p) { p.window <- abs(position - p) <= width/2 colMeans(y[p.window,,drop = FALSE]) }) yn <- t(yn) rownames(yn) <- rownames(y) return(yn) }