R/makeCalls.R
31328a5c
 #' Make antibody binding positivity calls
 #'
 #' After normalization and data smoothing, this last step makes the call for each
 #' peptide of the peptideSet after baseline correcting the peptide intenstities.
 #'
 #' @param peptideSet A \code{peptideSet} object. The peptides, after normalization
 #' and possibly data smoothing.
 #' @param cutoff A \code{numeric}. If FDR, the FDR threshold. Otherwise, a cutoff
 #' for the background corrected intensities.
 #' @param method A \code{character}. The method used to make positivity calls.
 #' "absolute" and "FDR" are available. See details below.
 #' @param freq A \code{logical}. If set to TRUE, return the percentage of slides
 #' calling a peptide positive. Otherwise, return a \code{logical} indicating binding
 #' events.
 #' @param group A \code{character}. Only used when freq is set to TRUE. A character indicating
 #' a variable by which to group slides. If non-null the percentage is calculated by group.
 #' @param verbose A \code{logical}. If set to TRUE, progress information will be displayed.
 #'
 #' @details
 #' This function requires specific variables ptid and visit in pData(peptideSet).
 #' The variable \code{ptid} should indicate subjects, and the variable \code{visit}
 #' should be a factor with levels pre and post.
 #'
 #' If slides are paired for subjects, intensities corresponding to post-visit are
 #' substracted from pre. If slides are not paired, slides with pre have intensities
 #' averaged by peptides, and averaged peptide intensities are subtracted from slides
 #' that have entry post. Calls are made on these baseline corrected intensities.
 #'
 #' When method = FDR, a left-tail method is used to generate a threshold controlling
 #' the False Discovery Rate at level \code{cutoff}. When method = absolute, Intensities
 #' exceeding the threshold are labelled as positive.
 #'
 #' When freq = TRUE a group variable may be specified. The argument group indicates
 #' the name of a variable in pData(peptideSet) by which positive calls should be grouped.
 #' The call frequency for each peptide is calculated within groups.
 #'
 #' @return If freq = TRUE, a \code{numeric} \code{matrix} with peptides as rows and
 #' groups as columns where the values are the frequency of response in the group. If
 #' freq = FALSE, a \code{logical} \code{matrix} indicating binding events for each
 #' peptide in each subject.
 #'
 #' @rdname makeCalls
 #' @aliases makeCalls
 #' @author Greg Imholte
 #' @export
 #' @example examples/pipeline.R
 makeCalls <- function(peptideSet, cutoff=1.2, method="absolute", freq=TRUE, group=NULL, verbose=FALSE){
 	if (class(peptideSet)!="peptideSet") {
 		stop("peptideSet must be an object of class peptideSet")
 	}
 
 	if (preproc(peptideSet@experimentData)$transformation!="log") {
 		warning("The probe measurements should be log transformed.")
 	}
 
 	if (preproc(peptideSet@experimentData)$normalization=="none") {
 		warning("You should probably normalize your data before using this function.")
 	}
 
 	I <- baselineCorrect.pSet(peptideSet, verbose=verbose)
 
 	if (method == "FDR") {
 		Calls<-.findFDR(I, cutoff, position(peptideSet), verbose=verbose)
 	} else if(method == "absolute") {
 		Calls <- I > cutoff
 	}
 
 	if (!is.null(group) && freq) {
 		#parse the grouping variable
 
 		# Only select the Post and remove empty levels
 		t1 <- grepl("post", tolower(pData(peptideSet)$visit))
 		pd <- pData(peptideSet)[t1, ]
 
     if(!group%in%colnames(pd)){
       stop("The grouping variable is not part of the pData object.")
       } else {
         factor<-factor(pd[,group])
       }
 		if(nlevels(factor) > 1){
 			#split the ptid into groups
 			ptidGroups <- split(pd$ptid,factor)
 			#apply the rowMeans to each group
 			res <- lapply(ptidGroups, function(curPtid,Calls,ptid){rowMeans(Calls[,ptid%in%curPtid, drop=FALSE])}, Calls, pd$ptid)
 			res <- do.call(cbind, res)
 		} else {
 			return(rowMeans(Calls)*100)
 		}
     return(res*100)
 	} else if(freq) {
         return(rowMeans(Calls)*100)
     } else {
         return(Calls)
     }
 }
 
 .findFDR <- function(I, cutoff, position, verbose=FALSE){
     seqY <- seq(min(abs(I)), max(abs(I)),.05)
     # Split the data by unique positions
     tmp <- split(as.data.frame(I), position)
     # Compute the mean over unique positions
     D <- sapply(tmp,apply, 2, mean)
 
     #Calculate F(T)
     FDR <- sapply(seqY, function(x, D){median(
                   #Fs(T)
                   apply(D, 1, function(D,x){min(sum(D < -x)/sum(D > x), 1)}, x), na.rm=TRUE)}, D)
 
     # Did not find anything below the cutoff or everything is NA
     if (all(round(FDR,2)>cutoff, na.rm=TRUE) | all(is.na(FDR))) {
         return(I > max(I)) # Return all FALSE
     } else {
         Dmin <- seqY[which.min(abs(FDR-cutoff))]
         if(verbose){
           cat("The selected threshold T is ", Dmin, "\n")
         }
         return(I > Dmin)
     }
 }
 
 
 #' Substract baseline intensities
 #'
 #' Correct intensities by substracting PRE visit sample intensities.
 #'
 #' @param pSet A \code{peptideSet} with sample PRE and POST visits.
 #' @param verbose A \code{logical}. If TRUE, information regarding the
 #' pairedness of the data will be displayed.
 #'
 #' @return A \code{matrix} of the baseline corrected intensities, with as many
 #' columns as there are samples POST visit
 #'
 #' @details
 #' If samples are not PAIRED (One PRE and POST for each ptid), then the average
 #' expression of all PRE visit samples is substracted from each sample.
 #'
 #' @author Raphael Gottardo, Gregory Imholte
 #'
 baselineCorrect.pSet <- function(pSet, verbose=FALSE){
   y <- exprs(pSet)
   ptid <- pData(pSet)$ptid
   t0 <- grep("pre", tolower(pData(pSet)$visit))
   t1 <- grep("post", tolower(pData(pSet)$visit))
   ### Paired?
   if (length(ptid[t0])==0||length(ptid[t1])==0) {
     I<-as.matrix(y[,t1])
   } else {
     if (isTRUE(all.equal(sort(ptid[t0]), sort(ptid[t1])))) {
       if (verbose) {
         message("You have paired PRE/POST samples\n")
       }
       I <- as.matrix(y[,t1])-as.matrix(y[,t0])
     } else {
       if(verbose) {
         message("You don't have paired PRE/POST samples\n")
       }
       I <- as.matrix(y[,t1])-rowMeans(y[, t0, drop=FALSE], na.rm=TRUE)#the vector to be subtracted from matrix need to be the same length as nrow of the matrix
     }
   }
   colnames(I) <- ptid[t1]
   if(isTRUE(preproc(pSet)$split.by.clade)){
     rownames(I) <- rownames(pSet)
   } else{
     rownames(I) <- peptide(pSet)
   }
   return(I)
 }
 
 #' Substract baseline intensities
 #'
 #' Correct intensities by substracting PRE visit sample intensities.
 #'
 #' @param pSet A \code{peptideSet} with sample PRE and POST visits.
 #' @param verbose A \code{logical}. If TRUE, information regarding the
 #' pairedness of the data will be displayed.
 #'
 #' @return A \code{matrix} of the baseline corrected intensities, with as many
 #' columns as there are samples POST visit
 #'
 #' @details
 #' The function will try to pair as many sample as possible. The remaining subjects
 #' with a POST and no PRE will use the average expression of all baseline samples.
 #' Subjects with baseline only will not be represented in the resulting matrix.
 #'
 #' @author Renan Sauteraud
 baseline_correct <- function(pSet, verbose=FALSE){
   exprs <- exprs(pSet)
   pd <- pData(pSet)
   t0 <- grep("pre", tolower(pd$visit))
   t1 <- grep("post", tolower(pd$visit))
   ## All paired
   if (isTRUE(all.equal(sort(pd$ptid[t0]), sort(pd$ptid[t1])))){
     if (verbose) {
       message("You have paired PRE/POST samples\n")
     }
     I <- as.matrix(exprs[,t1])-as.matrix(exprs[,t0])
   } else {
     ## Deal with paired samples first
     paired <- pd$ptid[t1][pd$ptid[t1] %in% pd$ptid[t0]]
     pre_paired <- rownames(pd[ pd$ptid %in% paired & tolower(pd$visit)=="pre",])
     post_paired <- rownames(pd[ pd$ptid %in% paired & tolower(pd$visit)=="post",])
     I <- exprs[, post_paired] - exprs[, pre_paired]
     colnames(I) <- paired
     ## Cbind the rest
     if (verbose) {
       cat(length(non_paired), "subjects don't have a baseline sample.\n")
     }
     non_paired <- unique(pd$ptid[!( pd$ptid %in% paired) & tolower(pd$visit)=="post"])
     if(length(non_paired) > 0){
       post_only <- rownames(pd[ pd$ptid %in% non_paired, ])
       I_no_pairs <- exprs[, post_only] - rowMeans(exprs[, t0, drop=FALSE], na.rm=TRUE)
       colnames(I_no_pairs) <- non_paired
       I <- cbind(I, I_no_pairs)
     }
   }
   rownames(I) <- peptide(pSet)
   return(I)
 }