R/betweenAlignment.R
5f84f28d
 #' Data Structure for "between" alignment of many GCMS samples
 #'
 #' This function creates a "between" alignment (i.e. comparing merged peaks)
 #'
 #' \code{betweenAlignment} objects gives the data structure which stores the
 #' result of an alignment across several "pseudo" datasets. These pseudo
 #' datasets are constructed by merging the "within" alignments.
 #'
 #' @aliases betweenAlignment betweenAlignment-class betweenAlignment-show show,
 #' betweenAlignment-method
 #' @param pD a \code{peaksDataset} object
 #' @param cAList \code{list} of \code{clusterAlignment} objects, one for each
 #' experimental group
 #' @param pAList \code{list} of \code{progressiveAlignment} objects, one for
 #' each experimental group
 #' @param impList \code{list} of imputation lists
 #' @param filterMin minimum number of peaks within a merged peak to be kept in
 #' the analysis
 #' @param gap gap parameter
 #' @param D retention time penalty parameter
 #' @param usePeaks logical, whether to use peaks (if \code{TRUE}) or the full
 #' 2D profile alignment (if \code{FALSE})
 #' @param df distance from diagonal to calculate similarity
 #' @param verbose logical, whether to print information
 #' @param metric numeric, different algorithm to calculate the similarity
 #' matrix between two mass spectrum. \code{metric=1} call
 #' \code{normDotProduct()}; \code{metric=2} call \code{ndpRT()};
 #' \code{metric=3} call \code{corPrt()}
 #' @param type numeric, two different type of alignment function
 #' @param penality penalization applied to the matching between two mass
 #' spectra if \code{(t1-t2)>D}
 #' @param compress logical whether to compress the similarity matrix into a
 #' sparse format.
 #' @return \code{betweenAlignment} object
 #' @author Mark Robinson
 #' @seealso \code{\link{multipleAlignment}}
 #' @references Mark D Robinson (2008).  Methods for the analysis of gas
 #' chromatography - mass spectrometry data \emph{PhD dissertation} University
 #' of Melbourne.
 #' @keywords classes
 #' @examples
 #'
 #' 	require(gcspikelite)
 #' 	## see 'multipleAlignment'
 #' @importFrom stats median
 #' @export betweenAlignment
ef6be6a2
 betweenAlignment <- function(pD, cAList, pAList, impList, filterMin = 1,
                              gap = 0.7, D = 10, usePeaks = TRUE, df = 30,
5f84f28d
                              verbose = TRUE, metric = 2,  type = 2,
                              penality = 0.2, compress = FALSE){
ef6be6a2
     n <- length(pAList)
     if(length(filterMin) == 1)
     {
         filterMin <- rep(filterMin, n)
     }
     pkd <- vector("list", n)
     pkr <- vector("list", n)
     filtind <- vector("list", n)  # list of filtered indices
     newind <- vector("list", n)
     for(g in 1:n){
         pa <- pAList[[g]]
         nm <- length(pa@merges)
         runs <- pa@merges[[nm]]$runs
         ind <- pa@merges[[nm]]$ind
         keep <- rowSums(!is.na(ind)) >= filterMin[g] ## RR
         if (verbose)
         {
             cat("[betweenAlignment]", names(pAList)[g], ": Removing", nrow(ind) - sum(keep),"peaks.\n")
             ind <- ind[keep,]
         }
         newind[[g]] <- lapply(impList[[g]], FUN = function(u,k) u[k,], k = keep)
         filtind[[g]] <- ind
         rt <- matrix(NA, nrow(ind), ncol(ind))
         mz <- pD@mz
         peaksdata <- matrix(0, nrow = length(pD@mz), ncol = nrow(ind))
         for(j in 1:ncol(ind)){
             if(usePeaks)
             {
                 cur.ds <- pD@peaksdata[[runs[j]]]
                 cur.rt <- pD@peaksrt
             }
             else
             {
                 cur.ds <- pD@rawdata[[runs[j]]]
                 cur.rt <- pD@rawrt
             }
             for(i in 1:nrow(ind)){
                 if(!is.na(ind[i, j]))
                 {
                     peaksdata[,i] <- peaksdata[,i] + cur.ds[,ind[i,j]]
                     rt[i,j] <- cur.rt[[runs[j]]][ind[i,j]]
                 }
             }
         }
       pkd[[g]] <- peaksdata
 	if(!usePeaks)
   {
       pkd[[g]] <- pkd[[g]] / outer(rep(1, nrow(peaksdata)), rowSums(!is.na(ind)))
   } # average over the number of samples
         pkr[[g]] <- apply(rt, 1, median, na.rm = TRUE)
     }
     # wRA <- new("peaksDataset", mz = mz, rawdata = NULL, files = names(cAList), 
     #             rawrt = NULL, peaksdata = pkd, peaksrt = pkr, filtind = filtind)
     wRA <- new("peaksDataset",
                mz = mz, rawdata = list(NULL), files = names(cAList),
                rawrt = list(NULL), peaksdata = pkd, peaksrt = pkr)
     ## return(wRA)
     ## filtind <-
     ## RR
5f84f28d
     cA <- clusterAlignment(wRA, runs = 1:n, gap = gap, D = D, df = df,
                            metric = metric, type = type,
                            compress = compress, penality = penality) ## bug here with filterMin > 1
     pA <- progressiveAlignment(wRA, cA, gap = gap, D = D, df = df,
                                compress = compress, type = type)
ef6be6a2
     ##
     ## cA <- clusterAlignment(wRA, 1:n, gap = gap, D = D, df = df, verbose = verbose)
     ## pA <- progressiveAlignment(wRA, cA, gap = gap, D = D, df = df, verbose = verbose)
009d882f
 
ef6be6a2
     nm <- length(pA@merges)
     ind <- pA@merges[[nm]]$ind
     groups <- pA@merges[[nm]]$runs
     full.runs <- NULL
     full.groups <- NULL
     full.ind <- matrix(NA, nrow(ind), length(pD@rawdata))
     full.newind <- vector("list", 3)
     for(i in 1:length(full.newind)){
         full.newind[[i]] <- matrix(NA, nrow(ind), length(pD@rawdata))
     }
     names(full.newind) <- c("apex", "start", "end")
5f84f28d
 
ef6be6a2
     col <- 1
     for(i in 1:length(groups)){
         g <- groups[i]
         n <- length(pAList[[g]]@merges)
         full.runs <- c(full.runs, pAList[[g]]@merges[[n]]$runs)
         nr <- length(pAList[[g]]@merges[[n]]$runs)
         full.groups <- c(full.groups, rep(wRA@files[g],nr))
         this.ind <- filtind[[g]]
         rw <- which(!is.na(ind[,i]))
         cl <- col:(col + ncol(this.ind) - 1)
         full.ind[rw, cl] <- this.ind
         if(!is.null(newind[[g]]$apex))
         {
             for(j in 1:length(full.newind)){
                 full.newind[[j]] <- newind[[g]][[j]]
             }
         }
         col <- col + ncol(this.ind)
009d882f
     }
5f84f28d
 
ef6be6a2
     new("betweenAlignment",
         mergedPeaksDataset = wRA,
         ind = full.ind,
         imputeind = full.newind,
         runs = full.runs,
         groups = full.groups,
         cA = cA,
         pA = pA,
         filtind = filtind,
         newind = newind
         )
009d882f
 }
 
5f84f28d
 
 #' Show method for "between" alignment of many GCMS samples
 #'
 #' This function show the results of a "between" alignment
 #'
 #' \code{betweenAlignment} objects gives the data structure which stores the
 #' result of an alignment across several "pseudo" datasets. These pseudo
 #' datasets are constructed by merging the "within" alignments.
 #' @title betweenAlignment-show
 #' @param object 
 #' @return \code{betweenAlignment} object
 #' @author Mark Robinson
 #' @export
 #' @noRd
009d882f
 setMethod("show","betweenAlignment",
eea95d16
           function(object) {
5f84f28d
               cat("An object of class \"", class(object), "\"\n", sep = "")
               cat(length(object@mergedPeaksDataset@peaksrt), "groups:",
eea95d16
                   sapply(object@mergedPeaksDataset@peaksrt, length),
                   "merged peaks\n"
5f84f28d
                   )
           }
           )