5f84f28d |
#' Normalized Dot Product
|
01a018db |
#'
|
5f84f28d |
#' This function calculates the similarity of all pairs of peaks from 2
#' samples, using the spectra similarity
|
01a018db |
#'
#'
|
5f84f28d |
#' Efficiently computes the normalized dot product between every pair of peak
#' vectors and returns a similarity matrix. C code is called.
|
01a018db |
#'
|
5f84f28d |
#' @param x1 data matrix for sample 1
#' @param x2 data matrix for sample 2
#' @param t1 vector of retention times for sample 1
#' @param t2 vector of retention times for sample 2
#' @param df distance from diagonal to calculate similarity
#' @param D retention time penalty
#' @param timedf matrix of time differences to normalize to. if \code{NULL}, 0
#' is used.
#' @param verbose logical, whether to print out information
#' @return
|
01a018db |
#'
|
5f84f28d |
#' matrix of similarities
#' @author Mark Robinson
#' @seealso \code{\link{dp}}, \code{\link{peaksAlignment}}
#' @references
|
01a018db |
#'
|
5f84f28d |
#' Mark D Robinson (2008). Methods for the analysis of gas chromatography -
#' mass spectrometry data \emph{PhD dissertation} University of Melbourne.
#' @keywords manip
#' @examples
|
01a018db |
#'
|
5f84f28d |
#' require(gcspikelite)
|
01a018db |
#'
|
5f84f28d |
#' # paths and files
#' gcmsPath<-paste(find.package("gcspikelite"),"data",sep="/")
#' cdfFiles<-dir(gcmsPath,"CDF",full=TRUE)
#' eluFiles<-dir(gcmsPath,"ELU",full=TRUE)
|
01a018db |
#'
|
5f84f28d |
#' # read data, peak detection results
#' pd<-peaksDataset(cdfFiles[1:2],mz=seq(50,550),rtrange=c(7.5,8.5))
#' pd<-addAMDISPeaks(pd,eluFiles[1:2])
|
01a018db |
#'
|
5f84f28d |
#' r<-normDotProduct(pd@peaksdata[[1]],pd@peaksdata[[2]])
#'
#' @useDynLib flagme
#' @export normDotProduct
|
01a018db |
normDotProduct <- function (x1, x2, t1=NULL, t2=NULL, df=max(ncol(x1), ncol(x2)),
|
d951f9cf |
D=1e+05, timedf=NULL, verbose=FALSE){
|
01a018db |
if(is.null(t1))
|
d951f9cf |
t1 <- 1:ncol(x1)
|
01a018db |
if(is.null(t2))
|
d951f9cf |
t2 <- 1:ncol(x2)
|
01a018db |
if(nrow(x1) != nrow(x2) | length(t1) != ncol(x1) | length(t2) !=
|
d951f9cf |
ncol(x2)){
stop("One of these is not true: nrow(x1)=nrow(x2), length(t1)=ncol(x1), length(t2)=ncol(x2).")
}
score <- matrix(0, nrow=ncol(x1), ncol=ncol(x2))
if(length(D) == 1 & is.null(timedf)){
|
01a018db |
out <- .C("cos_ndp_lowmem", score=as.double(score),
nr=as.integer(nrow(x1)), nc1=as.integer(ncol(x1)),
nc2=as.integer(ncol(x2)), x1=as.double(x1), x2=as.double(x2),
t1=as.double(t1), t2 = as.double(t2), D = as.double(D),
|
d951f9cf |
df=as.integer(df), PACKAGE="flagme")
}else{
|
01a018db |
if(length(D) == 1)
|
d951f9cf |
D <- matrix(D, nrow=ncol(x1), ncol=ncol(x2))
|
01a018db |
if(ncol(x1) != nrow(D) | ncol(x2) != ncol(D))
|
d951f9cf |
stop("D must have dimensions nrow=ncol(x1) ncol=ncol(x2) or be scalar.")
|
01a018db |
if(is.null(timedf))
|
d951f9cf |
timedf <- matrix(0, nrow=ncol(x1), ncol=ncol(x2))
|
01a018db |
if(ncol(x1) != nrow(timedf) | ncol(x2) != ncol(timedf))
|
d951f9cf |
stop("'timedf' must have dimensions nrow=ncol(x1) ncol=ncol(x2) or be set to NULL.")
|
01a018db |
out <- .C("cos_ndp_himem", score=as.double(score),
nr=as.integer(nrow(x1)), nc1=as.integer(ncol(x1)),
nc2=as.integer(ncol(x2)), x1=as.double(x1), x2=as.double(x2),
D=as.double(D), df=as.integer(df), timedf=as.double(timedf),
|
d951f9cf |
PACKAGE="flagme")
}
NDP <- matrix(1 - out$score, ncol=ncol(x2))
|
01a018db |
NDP[is.nan(NDP)] <- 0 ## remove NaN
|
d951f9cf |
return(NDP)
}
|
009d882f |
|
d951f9cf |
## normDotProduct<-function(x1,x2,t1=NULL,t2=NULL,df=max(ncol(x1),ncol(x2)),D=100000,timedf=NULL,verbose=FALSE){
## if(is.null(t1)) t1<-1:ncol(x1)
## if(is.null(t2)) t2<-1:ncol(x2)
|
01a018db |
|
d951f9cf |
## if( nrow(x1) != nrow(x2) | length(t1)!=ncol(x1) | length(t2)!=ncol(x2) ) {
## stop("One of these is not true: nrow(x1)=nrow(x2), length(t1)=ncol(x1), length(t2)=ncol(x2).")
## }
|
009d882f |
|
d951f9cf |
## score<-matrix(0,nrow=ncol(x1),ncol=ncol(x2))
|
01a018db |
|
d951f9cf |
## if( length(D)==1 & is.null(timedf) ) {
## out<-.C("cos_ndp_lowmem", score=as.double(score), nr=as.integer(nrow(x1)), nc1=as.integer(ncol(x1)),
## nc2=as.integer(ncol(x2)), x1=as.double(x1), x2=as.double(x2), t1=as.double(t1),
## t2=as.double(t2),D=as.double(D),df=as.integer(df), PACKAGE="flagme")
## } else {
## if (length(D)==1)
## D<-matrix(D,nrow=ncol(x1),ncol=ncol(x2))
## if( ncol(x1) != nrow(D) | ncol(x2)!=ncol(D) )
## stop("D must have dimensions nrow=ncol(x1) ncol=ncol(x2) or be scalar.")
## if( is.null(timedf) )
## timedf<-matrix(0,nrow=ncol(x1),ncol=ncol(x2))
## if( ncol(x1) != nrow(timedf) | ncol(x2)!=ncol(timedf) )
## stop("'timedf' must have dimensions nrow=ncol(x1) ncol=ncol(x2) or be set to NULL.")
## out<-.C("cos_ndp_himem", score=as.double(score), nr=as.integer(nrow(x1)), nc1=as.integer(ncol(x1)),
## nc2=as.integer(ncol(x2)), x1=as.double(x1), x2=as.double(x2),
|
01a018db |
## D=as.double(D),df=as.integer(df),timedf=as.double(timedf), PACKAGE="flagme")
|
d951f9cf |
## }
## matrix(1-out$score,ncol=ncol(x2))
## }
|
06c0bd25 |
|
5f84f28d |
|
06c0bd25 |
## RR ##
|
4ff72b45 |
## retention time penalized normDotProd
|
5f84f28d |
#' Retention Time Penalized Normalized Dot Product
|
01a018db |
#'
|
5f84f28d |
#' This function calculates the similarity of all pairs of peaks from 2
|
eea95d16 |
#' samples, using the spectra similarity and the retention time differencies
|
01a018db |
#'
|
5f84f28d |
#' Computes the normalized dot product between every pair of peak vectors in
#' the retention time window (\code{D})and returns a similarity matrix.
|
01a018db |
#'
|
5f84f28d |
#' @param s1 data matrix for sample 1
#' @param s2 data matrix for sample 2
#' @param t1 vector of retention times for sample 1
#' @param t2 vector of retention times for sample 2
#' @param D retention time window for the matching
#' @return matrix of similarities
#' @author Riccardo Romoli
#' @seealso \code{\link{peaksAlignment}}
#' @keywords manip
#' @examples
|
01a018db |
#'
|
5f84f28d |
#' ## Not Run
#' require(gcspikelite)
|
eea95d16 |
#' files <- list.files(path = paste(find.package("gcspikelite"), "data",
#' sep = "/"),"CDF", full = TRUE)
#' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
#' ## create settings object
#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
|
01a018db |
#' cwt <- xcms::CentWaveParam()
#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, multipleMF = FALSE)
|
eea95d16 |
#' data
|
5f84f28d |
#' ## review peak picking
|
eea95d16 |
#' plotChrom(data, rtrange = c(7.5, 10.5), runs = c(1:2))
|
01a018db |
#'
|
eea95d16 |
#' r <- ndpRT(data@peaksdata[[1]], data@peaksdata[[2]],
#' data@peaksrt[[1]], data@peaksrt[[2]], D = 50)
|
5f84f28d |
#' ## End (Not Run)
|
01a018db |
#'
|
5f84f28d |
#' @export ndpRT
|
eea95d16 |
ndpRT <- function(s1, s2, t1, t2, D) {
|
06c0bd25 |
Normalize <- function(j){
|
eea95d16 |
n <- apply(j, 2, function(k) {
|
06c0bd25 |
m <- k[which.max(k)]
|
eea95d16 |
norm <- k / m * 100
|
06c0bd25 |
})
return(n)
}
|
eea95d16 |
scoring <- function(s1, s2, t1, t2, D) {
angle <- function(s1, s2) {
theta <- acos(
sum(s1 * s2) / (sqrt(sum(s1 * s1)) * sqrt(sum(s2 * s2)))
)
theta <- 1 - theta
if(theta < 0) {
|
06c0bd25 |
theta <- 0
|
eea95d16 |
}
return(theta)
|
06c0bd25 |
}
|
eea95d16 |
rtPen <- function(t1, t2, D) {
|
06c0bd25 |
## D espresso in secondi
|
eea95d16 |
t1 <- t1 / 60 # trasformo in secondi
t2 <- t2 / 60 # trasformo in secondi
srt <- exp(- (((t1 - t2)^2) / D^2)) # da articolo MR, modificato
|
06c0bd25 |
# era 2*D^2
return(srt)
}
score <- angle(s1, s2) * rtPen(t1, t2, D)
return(score)
}
s1 <- Normalize(s1)
s2 <- Normalize(s2)
|
eea95d16 |
res <- matrix(0, nrow = ncol(s1), ncol = ncol(s2))
for (i in 1:ncol(s1)) {
for (j in 1:ncol(s2)) {
res[i, j] <- scoring(s1[, i], s2[, j], t1[i], t2[j], D = D)
|
06c0bd25 |
}
|
eea95d16 |
}
|
06c0bd25 |
return(res)
}
|
5f84f28d |
|
06c0bd25 |
## correlation Alignment
|
5f84f28d |
#' Retention Time Penalized Correlation
|
01a018db |
#'
|
5f84f28d |
#' This function calculates the similarity of all pairs of peaks from 2
#' samples, using the spectra similarity and the rretention time differencies
|
01a018db |
#'
|
5f84f28d |
#' Computes the Pearson carrelation between every pair of peak vectors in the
#' retention time window (\code{D})and returns the similarity matrix.
|
01a018db |
#'
|
5f84f28d |
#' @param d1 data matrix for sample 1
#' @param d2 data matrix for sample 2
#' @param t1 vector of retention times for sample 1
#' @param t2 vector of retention times for sample 2
#' @param D retention time window for the matching
#' @param penality penalization applied to the matching between two mass
#' spectra if \code{(t1-t2)>D}
#' @return matrix of similarities
#' @author Riccardo Romoli
#' @seealso \code{\link{peaksAlignment}}
#' @keywords manip
#' @examples
|
01a018db |
#'
|
5f84f28d |
#' ## Not Run
#' require(gcspikelite)
|
eea95d16 |
#' files <- list.files(path = paste(find.package("gcspikelite"), "data",
#' sep = "/"),"CDF", full = TRUE)
#' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5))
#' ## create settings object
#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5)
|
01a018db |
#' cwt <- xcms::CentWaveParam()
#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, multipleMF = FALSE)
|
eea95d16 |
#' data
|
5f84f28d |
#' ## review peak picking
|
eea95d16 |
#' plotChrom(data, rtrange=c(7.5, 10.5), runs=c(1:2))
|
01a018db |
#'
|
eea95d16 |
#' r <- corPrt(data@peaksdata[[1]], data@peaksdata[[2]],
#' data@peaksrt[[1]], data@peaksrt[[2]], D = 50, penality = 0.2)
|
5f84f28d |
#' ## End (Not Run)
#'
#' @importFrom stats complete.cases
#' @export corPrt
|
eea95d16 |
corPrt <- function(d1, d2, t1, t2, D, penality = 0.2) {
|
06c0bd25 |
D <- as.numeric(D) # time window in second
pn <- as.numeric(penality)# penality if out of time window
|
eea95d16 |
pearson <- function(x,y) {
|
06c0bd25 |
size <- length(x)
|
eea95d16 |
cfun <- .C("pearson", size = as.integer(size), x = as.double(x),
y = as.double(y), result = double(1), PACKAGE = 'flagme')
|
06c0bd25 |
return(cfun[["result"]])
}
|
eea95d16 |
Normalize <- function(j) {
n <- apply(j, 2, function(k) {
|
06c0bd25 |
m <- k[which.max(k)]
|
eea95d16 |
norm <- k / m * 100
|
06c0bd25 |
})
}
Rank <- function(u) {
|
eea95d16 |
if (length(u) == 0L)
|
06c0bd25 |
u
else if (is.matrix(u)) {
|
eea95d16 |
if (nrow(u) > 1L)
apply(u, 2L, rank, na.last = "keep")
|
06c0bd25 |
else row(u)
}
|
eea95d16 |
else rank(u, na.last = "keep")
|
06c0bd25 |
}
|
487143b6 |
x <- Normalize(d1)
y <- Normalize(d2)
## method <- c("pearson", "kendall", "spearman")
|
06c0bd25 |
ncx <- ncol(x)
ncy <- ncol(y)
|
eea95d16 |
r <- matrix(0, nrow = ncx, ncol = ncy)
|
06c0bd25 |
for (i in seq_len(ncx)) {
for (j in seq_len(ncy)) {
x2 <- x[, i]
y2 <- y[, j]
ok <- complete.cases(x2, y2)
x2 <- rank(x2[ok])
y2 <- rank(y2[ok])
## insert rt penality in seconds
|
eea95d16 |
rtDiff <- t1[i] * 60 - t2[j] * 60 # retention time in seconds
|
06c0bd25 |
rtDiff <- abs(rtDiff)
r[i, j] <- if (any(ok))
|
eea95d16 |
if (rtDiff <= D)
|
06c0bd25 |
pearson(x2, y2)
else
pearson(x2, y2) - pn
|
a545aada |
else 0
|
06c0bd25 |
}
}
|
eea95d16 |
r <- apply(r, MARGIN = c(1, 2), function(x) {
if (x < 0.2) {
|
487143b6 |
x <- 0
|
eea95d16 |
} else {
|
487143b6 |
x <- x
}
})
|
06c0bd25 |
rownames(r) <- colnames(x)
colnames(r) <- colnames(y)
return(r)
}
|