R/metrics.R
5f84f28d
 #' Normalized Dot Product
01a018db
 #'
5f84f28d
 #' This function calculates the similarity of all pairs of peaks from 2
 #' samples, using the spectra similarity
01a018db
 #'
 #'
5f84f28d
 #' Efficiently computes the normalized dot product between every pair of peak
 #' vectors and returns a similarity matrix.  C code is called.
01a018db
 #'
5f84f28d
 #' @param x1 data matrix for sample 1
 #' @param x2 data matrix for sample 2
 #' @param t1 vector of retention times for sample 1
 #' @param t2 vector of retention times for sample 2
 #' @param df distance from diagonal to calculate similarity
 #' @param D retention time penalty
 #' @param timedf matrix of time differences to normalize to.  if \code{NULL}, 0
 #' is used.
 #' @param verbose logical, whether to print out information
 #' @return
01a018db
 #'
5f84f28d
 #' matrix of similarities
 #' @author Mark Robinson
 #' @seealso \code{\link{dp}}, \code{\link{peaksAlignment}}
 #' @references
01a018db
 #'
5f84f28d
 #' Mark D Robinson (2008).  Methods for the analysis of gas chromatography -
 #' mass spectrometry data \emph{PhD dissertation} University of Melbourne.
 #' @keywords manip
 #' @examples
01a018db
 #'
5f84f28d
 #' require(gcspikelite)
01a018db
 #'
5f84f28d
 #' # paths and files
 #' gcmsPath<-paste(find.package("gcspikelite"),"data",sep="/")
 #' cdfFiles<-dir(gcmsPath,"CDF",full=TRUE)
 #' eluFiles<-dir(gcmsPath,"ELU",full=TRUE)
01a018db
 #'
5f84f28d
 #' # read data, peak detection results
 #' pd<-peaksDataset(cdfFiles[1:2],mz=seq(50,550),rtrange=c(7.5,8.5))
 #' pd<-addAMDISPeaks(pd,eluFiles[1:2])
01a018db
 #'
5f84f28d
 #' r<-normDotProduct(pd@peaksdata[[1]],pd@peaksdata[[2]])
 #'
 #' @useDynLib flagme
 #' @export normDotProduct
01a018db
 normDotProduct <- function (x1, x2, t1=NULL, t2=NULL, df=max(ncol(x1), ncol(x2)),
d951f9cf
                                D=1e+05, timedf=NULL, verbose=FALSE){
01a018db
     if(is.null(t1))
d951f9cf
         t1 <- 1:ncol(x1)
01a018db
     if(is.null(t2))
d951f9cf
         t2 <- 1:ncol(x2)
01a018db
     if(nrow(x1) != nrow(x2) | length(t1) != ncol(x1) | length(t2) !=
d951f9cf
        ncol(x2)){
         stop("One of these is not true: nrow(x1)=nrow(x2), length(t1)=ncol(x1), length(t2)=ncol(x2).")
     }
     score <- matrix(0, nrow=ncol(x1), ncol=ncol(x2))
     if(length(D) == 1 & is.null(timedf)){
01a018db
         out <- .C("cos_ndp_lowmem", score=as.double(score),
                   nr=as.integer(nrow(x1)), nc1=as.integer(ncol(x1)),
                   nc2=as.integer(ncol(x2)), x1=as.double(x1), x2=as.double(x2),
                   t1=as.double(t1), t2 = as.double(t2), D = as.double(D),
d951f9cf
                   df=as.integer(df), PACKAGE="flagme")
     }else{
01a018db
         if(length(D) == 1)
d951f9cf
             D <- matrix(D, nrow=ncol(x1), ncol=ncol(x2))
01a018db
         if(ncol(x1) != nrow(D) | ncol(x2) != ncol(D))
d951f9cf
             stop("D must have dimensions nrow=ncol(x1) ncol=ncol(x2) or be scalar.")
01a018db
         if(is.null(timedf))
d951f9cf
             timedf <- matrix(0, nrow=ncol(x1), ncol=ncol(x2))
01a018db
         if(ncol(x1) != nrow(timedf) | ncol(x2) != ncol(timedf))
d951f9cf
             stop("'timedf' must have dimensions nrow=ncol(x1) ncol=ncol(x2) or be set to NULL.")
01a018db
         out <- .C("cos_ndp_himem", score=as.double(score),
                   nr=as.integer(nrow(x1)), nc1=as.integer(ncol(x1)),
                   nc2=as.integer(ncol(x2)), x1=as.double(x1), x2=as.double(x2),
                   D=as.double(D), df=as.integer(df), timedf=as.double(timedf),
d951f9cf
                   PACKAGE="flagme")
     }
     NDP <- matrix(1 - out$score, ncol=ncol(x2))
01a018db
     NDP[is.nan(NDP)] <- 0 ## remove NaN
d951f9cf
     return(NDP)
 }
009d882f
 
d951f9cf
 ## normDotProduct<-function(x1,x2,t1=NULL,t2=NULL,df=max(ncol(x1),ncol(x2)),D=100000,timedf=NULL,verbose=FALSE){
 ##   if(is.null(t1)) t1<-1:ncol(x1)
 ##   if(is.null(t2)) t2<-1:ncol(x2)
01a018db
 
d951f9cf
 ##   if( nrow(x1) != nrow(x2) | length(t1)!=ncol(x1) | length(t2)!=ncol(x2) ) {
 ##     stop("One of these is not true: nrow(x1)=nrow(x2), length(t1)=ncol(x1), length(t2)=ncol(x2).")
 ##   }
009d882f
 
d951f9cf
 ##   score<-matrix(0,nrow=ncol(x1),ncol=ncol(x2))
01a018db
 
d951f9cf
 ##   if( length(D)==1 & is.null(timedf) ) {
 ##     out<-.C("cos_ndp_lowmem", score=as.double(score), nr=as.integer(nrow(x1)), nc1=as.integer(ncol(x1)),
 ##                      nc2=as.integer(ncol(x2)), x1=as.double(x1), x2=as.double(x2), t1=as.double(t1),
 ##                      t2=as.double(t2),D=as.double(D),df=as.integer(df), PACKAGE="flagme")
 ##   } else {
 ##     if (length(D)==1)
 ## 	  D<-matrix(D,nrow=ncol(x1),ncol=ncol(x2))
 ##     if( ncol(x1) != nrow(D) | ncol(x2)!=ncol(D) )
 ##       stop("D must have dimensions nrow=ncol(x1) ncol=ncol(x2) or be scalar.")
 ## 	if( is.null(timedf) )
 ## 	  timedf<-matrix(0,nrow=ncol(x1),ncol=ncol(x2))
 ##     if( ncol(x1) != nrow(timedf) | ncol(x2)!=ncol(timedf) )
 ##       stop("'timedf' must have dimensions nrow=ncol(x1) ncol=ncol(x2) or be set to NULL.")
 ##     out<-.C("cos_ndp_himem", score=as.double(score), nr=as.integer(nrow(x1)), nc1=as.integer(ncol(x1)),
 ##                      nc2=as.integer(ncol(x2)), x1=as.double(x1), x2=as.double(x2),
01a018db
 ## 					 D=as.double(D),df=as.integer(df),timedf=as.double(timedf), PACKAGE="flagme")
d951f9cf
 ##   }
 ##   matrix(1-out$score,ncol=ncol(x2))
 ## }
06c0bd25
 
 
5f84f28d
 
06c0bd25
 ## RR ##
4ff72b45
 ## retention time penalized normDotProd
5f84f28d
 
 #' Retention Time Penalized Normalized Dot Product
01a018db
 #'
5f84f28d
 #' This function calculates the similarity of all pairs of peaks from 2
eea95d16
 #' samples, using the spectra similarity and the retention time differencies
01a018db
 #'
5f84f28d
 #' Computes the normalized dot product between every pair of peak vectors in
 #' the retention time window (\code{D})and returns a similarity matrix.
01a018db
 #'
5f84f28d
 #' @param s1 data matrix for sample 1
 #' @param s2 data matrix for sample 2
 #' @param t1 vector of retention times for sample 1
 #' @param t2 vector of retention times for sample 2
 #' @param D retention time window for the matching
 #' @return matrix of similarities
 #' @author Riccardo Romoli
 #' @seealso \code{\link{peaksAlignment}}
 #' @keywords manip
 #' @examples
01a018db
 #'
5f84f28d
 #' ## Not Run
 #' require(gcspikelite)
eea95d16
 #' files <- list.files(path = paste(find.package("gcspikelite"), "data",
 #'                     sep = "/"),"CDF", full = TRUE)
 #' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
 #' ## create settings object
 #' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
01a018db
 #' cwt <- xcms::CentWaveParam()
 #' data <- addXCMSPeaks(files[1:2], data, settings = mfp, multipleMF = FALSE)
eea95d16
 #' data
5f84f28d
 #' ## review peak picking
eea95d16
 #' plotChrom(data, rtrange = c(7.5, 10.5), runs = c(1:2))
01a018db
 #'
eea95d16
 #' r <- ndpRT(data@peaksdata[[1]], data@peaksdata[[2]],
 #'            data@peaksrt[[1]], data@peaksrt[[2]], D = 50)
5f84f28d
 #' ## End (Not Run)
01a018db
 #'
5f84f28d
 #' @export ndpRT
eea95d16
 ndpRT <- function(s1, s2, t1, t2, D) {
06c0bd25
     Normalize <- function(j){
eea95d16
         n <- apply(j, 2, function(k) {
06c0bd25
             m <- k[which.max(k)]
eea95d16
             norm <- k / m * 100
06c0bd25
         })
         return(n)
     }
eea95d16
     scoring <- function(s1, s2, t1, t2, D) {
         angle <- function(s1, s2) {
             theta <- acos(
                 sum(s1 * s2) / (sqrt(sum(s1 * s1)) * sqrt(sum(s2 * s2)))
                 )
             theta <- 1 - theta
             if(theta < 0) {
06c0bd25
                 theta <- 0
eea95d16
             }
             return(theta)
06c0bd25
         }
eea95d16
         rtPen <- function(t1, t2, D) {
06c0bd25
             ## D espresso in secondi
eea95d16
             t1 <- t1 / 60 # trasformo in secondi
             t2 <- t2 / 60 # trasformo in secondi
             srt <- exp(- (((t1 - t2)^2) / D^2)) # da articolo MR, modificato
06c0bd25
             # era 2*D^2
             return(srt)
         }
         score <- angle(s1, s2) * rtPen(t1, t2, D)
         return(score)
     }
     s1 <- Normalize(s1)
     s2 <- Normalize(s2)
eea95d16
     res <- matrix(0, nrow = ncol(s1), ncol = ncol(s2))
     for (i in 1:ncol(s1)) {
         for (j in 1:ncol(s2)) {
             res[i, j] <- scoring(s1[, i], s2[, j], t1[i], t2[j], D = D)
06c0bd25
         }
eea95d16
     }
06c0bd25
     return(res)
 }
 
 
5f84f28d
 
 
06c0bd25
 ## correlation Alignment
5f84f28d
 
 #' Retention Time Penalized Correlation
01a018db
 #'
5f84f28d
 #' This function calculates the similarity of all pairs of peaks from 2
 #' samples, using the spectra similarity and the rretention time differencies
01a018db
 #'
5f84f28d
 #' Computes the Pearson carrelation between every pair of peak vectors in the
 #' retention time window (\code{D})and returns the similarity matrix.
01a018db
 #'
5f84f28d
 #' @param d1 data matrix for sample 1
 #' @param d2 data matrix for sample 2
 #' @param t1 vector of retention times for sample 1
 #' @param t2 vector of retention times for sample 2
 #' @param D retention time window for the matching
 #' @param penality penalization applied to the matching between two mass
 #' spectra if \code{(t1-t2)>D}
 #' @return matrix of similarities
 #' @author Riccardo Romoli
 #' @seealso \code{\link{peaksAlignment}}
 #' @keywords manip
 #' @examples
01a018db
 #'
5f84f28d
 #' ## Not Run
 #' require(gcspikelite)
eea95d16
 #' files <- list.files(path = paste(find.package("gcspikelite"), "data",
 #'                     sep = "/"),"CDF", full = TRUE)
 #' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
 #' ## create settings object
 #' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
01a018db
 #' cwt <- xcms::CentWaveParam()
 #' data <- addXCMSPeaks(files[1:2], data, settings = mfp, multipleMF = FALSE)
eea95d16
 #' data
5f84f28d
 #' ## review peak picking
eea95d16
 #' plotChrom(data, rtrange=c(7.5, 10.5), runs=c(1:2))
01a018db
 #'
eea95d16
 #' r <- corPrt(data@peaksdata[[1]], data@peaksdata[[2]],
 #'            data@peaksrt[[1]], data@peaksrt[[2]], D = 50, penality = 0.2)
5f84f28d
 #' ## End (Not Run)
 #'
 #' @importFrom stats complete.cases
 #' @export corPrt
eea95d16
 corPrt <- function(d1, d2, t1, t2, D, penality = 0.2) {
06c0bd25
     D <- as.numeric(D) # time window in second
     pn <- as.numeric(penality)# penality if out of time window
eea95d16
     pearson <- function(x,y) {
06c0bd25
         size <- length(x)
eea95d16
         cfun <- .C("pearson", size = as.integer(size), x = as.double(x),
                    y = as.double(y), result = double(1), PACKAGE = 'flagme')
06c0bd25
         return(cfun[["result"]])
     }
eea95d16
     Normalize <- function(j) {
         n <- apply(j, 2, function(k) {
06c0bd25
             m <- k[which.max(k)]
eea95d16
             norm <- k / m * 100
06c0bd25
         })
     }
     Rank <- function(u) {
eea95d16
         if (length(u) == 0L)
06c0bd25
             u
         else if (is.matrix(u)) {
eea95d16
             if (nrow(u) > 1L)
                 apply(u, 2L, rank, na.last = "keep")
06c0bd25
             else row(u)
         }
eea95d16
         else rank(u, na.last = "keep")
06c0bd25
     }
487143b6
         x <- Normalize(d1)
         y <- Normalize(d2)
     ## method <- c("pearson", "kendall", "spearman")
06c0bd25
     ncx <- ncol(x)
     ncy <- ncol(y)
eea95d16
     r <- matrix(0, nrow = ncx, ncol = ncy)
06c0bd25
     for (i in seq_len(ncx)) {
         for (j in seq_len(ncy)) {
             x2 <- x[, i]
             y2 <- y[, j]
             ok <- complete.cases(x2, y2)
             x2 <- rank(x2[ok])
             y2 <- rank(y2[ok])
             ## insert rt penality in seconds
eea95d16
             rtDiff <- t1[i] * 60 - t2[j] * 60 # retention time in seconds
06c0bd25
             rtDiff <- abs(rtDiff)
             r[i, j] <- if (any(ok))
eea95d16
                            if (rtDiff <= D)
06c0bd25
                                pearson(x2, y2)
                            else 
                                pearson(x2, y2) - pn
a545aada
                        else 0
06c0bd25
         }
     }
eea95d16
     r <- apply(r, MARGIN = c(1, 2), function(x) {
         if (x < 0.2) {
487143b6
             x <- 0
eea95d16
         } else {
487143b6
             x <- x
         }
     })
06c0bd25
     rownames(r) <- colnames(x)
     colnames(r) <- colnames(y)
     return(r)
 }