... | ... |
@@ -1,12 +1,12 @@ |
1 | 1 |
#' Normalized Dot Product |
2 |
-#' |
|
2 |
+#' |
|
3 | 3 |
#' This function calculates the similarity of all pairs of peaks from 2 |
4 | 4 |
#' samples, using the spectra similarity |
5 |
-#' |
|
6 |
-#' |
|
5 |
+#' |
|
6 |
+#' |
|
7 | 7 |
#' Efficiently computes the normalized dot product between every pair of peak |
8 | 8 |
#' vectors and returns a similarity matrix. C code is called. |
9 |
-#' |
|
9 |
+#' |
|
10 | 10 |
#' @param x1 data matrix for sample 1 |
11 | 11 |
#' @param x2 data matrix for sample 2 |
12 | 12 |
#' @param t1 vector of retention times for sample 1 |
... | ... |
@@ -17,79 +17,79 @@ |
17 | 17 |
#' is used. |
18 | 18 |
#' @param verbose logical, whether to print out information |
19 | 19 |
#' @return |
20 |
-#' |
|
20 |
+#' |
|
21 | 21 |
#' matrix of similarities |
22 | 22 |
#' @author Mark Robinson |
23 | 23 |
#' @seealso \code{\link{dp}}, \code{\link{peaksAlignment}} |
24 | 24 |
#' @references |
25 |
-#' |
|
25 |
+#' |
|
26 | 26 |
#' Mark D Robinson (2008). Methods for the analysis of gas chromatography - |
27 | 27 |
#' mass spectrometry data \emph{PhD dissertation} University of Melbourne. |
28 | 28 |
#' @keywords manip |
29 | 29 |
#' @examples |
30 |
-#' |
|
30 |
+#' |
|
31 | 31 |
#' require(gcspikelite) |
32 |
-#' |
|
32 |
+#' |
|
33 | 33 |
#' # paths and files |
34 | 34 |
#' gcmsPath<-paste(find.package("gcspikelite"),"data",sep="/") |
35 | 35 |
#' cdfFiles<-dir(gcmsPath,"CDF",full=TRUE) |
36 | 36 |
#' eluFiles<-dir(gcmsPath,"ELU",full=TRUE) |
37 |
-#' |
|
37 |
+#' |
|
38 | 38 |
#' # read data, peak detection results |
39 | 39 |
#' pd<-peaksDataset(cdfFiles[1:2],mz=seq(50,550),rtrange=c(7.5,8.5)) |
40 | 40 |
#' pd<-addAMDISPeaks(pd,eluFiles[1:2]) |
41 |
-#' |
|
41 |
+#' |
|
42 | 42 |
#' r<-normDotProduct(pd@peaksdata[[1]],pd@peaksdata[[2]]) |
43 | 43 |
#' |
44 | 44 |
#' @useDynLib flagme |
45 | 45 |
#' @export normDotProduct |
46 |
-normDotProduct <- function (x1, x2, t1=NULL, t2=NULL, df=max(ncol(x1), ncol(x2)), |
|
46 |
+normDotProduct <- function (x1, x2, t1=NULL, t2=NULL, df=max(ncol(x1), ncol(x2)), |
|
47 | 47 |
D=1e+05, timedf=NULL, verbose=FALSE){ |
48 |
- if(is.null(t1)) |
|
48 |
+ if(is.null(t1)) |
|
49 | 49 |
t1 <- 1:ncol(x1) |
50 |
- if(is.null(t2)) |
|
50 |
+ if(is.null(t2)) |
|
51 | 51 |
t2 <- 1:ncol(x2) |
52 |
- if(nrow(x1) != nrow(x2) | length(t1) != ncol(x1) | length(t2) != |
|
52 |
+ if(nrow(x1) != nrow(x2) | length(t1) != ncol(x1) | length(t2) != |
|
53 | 53 |
ncol(x2)){ |
54 | 54 |
stop("One of these is not true: nrow(x1)=nrow(x2), length(t1)=ncol(x1), length(t2)=ncol(x2).") |
55 | 55 |
} |
56 | 56 |
score <- matrix(0, nrow=ncol(x1), ncol=ncol(x2)) |
57 | 57 |
if(length(D) == 1 & is.null(timedf)){ |
58 |
- out <- .C("cos_ndp_lowmem", score=as.double(score), |
|
59 |
- nr=as.integer(nrow(x1)), nc1=as.integer(ncol(x1)), |
|
60 |
- nc2=as.integer(ncol(x2)), x1=as.double(x1), x2=as.double(x2), |
|
61 |
- t1=as.double(t1), t2 = as.double(t2), D = as.double(D), |
|
58 |
+ out <- .C("cos_ndp_lowmem", score=as.double(score), |
|
59 |
+ nr=as.integer(nrow(x1)), nc1=as.integer(ncol(x1)), |
|
60 |
+ nc2=as.integer(ncol(x2)), x1=as.double(x1), x2=as.double(x2), |
|
61 |
+ t1=as.double(t1), t2 = as.double(t2), D = as.double(D), |
|
62 | 62 |
df=as.integer(df), PACKAGE="flagme") |
63 | 63 |
}else{ |
64 |
- if(length(D) == 1) |
|
64 |
+ if(length(D) == 1) |
|
65 | 65 |
D <- matrix(D, nrow=ncol(x1), ncol=ncol(x2)) |
66 |
- if(ncol(x1) != nrow(D) | ncol(x2) != ncol(D)) |
|
66 |
+ if(ncol(x1) != nrow(D) | ncol(x2) != ncol(D)) |
|
67 | 67 |
stop("D must have dimensions nrow=ncol(x1) ncol=ncol(x2) or be scalar.") |
68 |
- if(is.null(timedf)) |
|
68 |
+ if(is.null(timedf)) |
|
69 | 69 |
timedf <- matrix(0, nrow=ncol(x1), ncol=ncol(x2)) |
70 |
- if(ncol(x1) != nrow(timedf) | ncol(x2) != ncol(timedf)) |
|
70 |
+ if(ncol(x1) != nrow(timedf) | ncol(x2) != ncol(timedf)) |
|
71 | 71 |
stop("'timedf' must have dimensions nrow=ncol(x1) ncol=ncol(x2) or be set to NULL.") |
72 |
- out <- .C("cos_ndp_himem", score=as.double(score), |
|
73 |
- nr=as.integer(nrow(x1)), nc1=as.integer(ncol(x1)), |
|
74 |
- nc2=as.integer(ncol(x2)), x1=as.double(x1), x2=as.double(x2), |
|
75 |
- D=as.double(D), df=as.integer(df), timedf=as.double(timedf), |
|
72 |
+ out <- .C("cos_ndp_himem", score=as.double(score), |
|
73 |
+ nr=as.integer(nrow(x1)), nc1=as.integer(ncol(x1)), |
|
74 |
+ nc2=as.integer(ncol(x2)), x1=as.double(x1), x2=as.double(x2), |
|
75 |
+ D=as.double(D), df=as.integer(df), timedf=as.double(timedf), |
|
76 | 76 |
PACKAGE="flagme") |
77 | 77 |
} |
78 | 78 |
NDP <- matrix(1 - out$score, ncol=ncol(x2)) |
79 |
- NDP[is.nan(NDP)] <- 0 ## remove NaN |
|
79 |
+ NDP[is.nan(NDP)] <- 0 ## remove NaN |
|
80 | 80 |
return(NDP) |
81 | 81 |
} |
82 | 82 |
|
83 | 83 |
## normDotProduct<-function(x1,x2,t1=NULL,t2=NULL,df=max(ncol(x1),ncol(x2)),D=100000,timedf=NULL,verbose=FALSE){ |
84 | 84 |
## if(is.null(t1)) t1<-1:ncol(x1) |
85 | 85 |
## if(is.null(t2)) t2<-1:ncol(x2) |
86 |
- |
|
86 |
+ |
|
87 | 87 |
## if( nrow(x1) != nrow(x2) | length(t1)!=ncol(x1) | length(t2)!=ncol(x2) ) { |
88 | 88 |
## stop("One of these is not true: nrow(x1)=nrow(x2), length(t1)=ncol(x1), length(t2)=ncol(x2).") |
89 | 89 |
## } |
90 | 90 |
|
91 | 91 |
## score<-matrix(0,nrow=ncol(x1),ncol=ncol(x2)) |
92 |
- |
|
92 |
+ |
|
93 | 93 |
## if( length(D)==1 & is.null(timedf) ) { |
94 | 94 |
## out<-.C("cos_ndp_lowmem", score=as.double(score), nr=as.integer(nrow(x1)), nc1=as.integer(ncol(x1)), |
95 | 95 |
## nc2=as.integer(ncol(x2)), x1=as.double(x1), x2=as.double(x2), t1=as.double(t1), |
... | ... |
@@ -105,7 +105,7 @@ normDotProduct <- function (x1, x2, t1=NULL, t2=NULL, df=max(ncol(x1), ncol(x2)) |
105 | 105 |
## stop("'timedf' must have dimensions nrow=ncol(x1) ncol=ncol(x2) or be set to NULL.") |
106 | 106 |
## out<-.C("cos_ndp_himem", score=as.double(score), nr=as.integer(nrow(x1)), nc1=as.integer(ncol(x1)), |
107 | 107 |
## nc2=as.integer(ncol(x2)), x1=as.double(x1), x2=as.double(x2), |
108 |
-## D=as.double(D),df=as.integer(df),timedf=as.double(timedf), PACKAGE="flagme") |
|
108 |
+## D=as.double(D),df=as.integer(df),timedf=as.double(timedf), PACKAGE="flagme") |
|
109 | 109 |
## } |
110 | 110 |
## matrix(1-out$score,ncol=ncol(x2)) |
111 | 111 |
## } |
... | ... |
@@ -116,13 +116,13 @@ normDotProduct <- function (x1, x2, t1=NULL, t2=NULL, df=max(ncol(x1), ncol(x2)) |
116 | 116 |
## retention time penalized normDotProd |
117 | 117 |
|
118 | 118 |
#' Retention Time Penalized Normalized Dot Product |
119 |
-#' |
|
119 |
+#' |
|
120 | 120 |
#' This function calculates the similarity of all pairs of peaks from 2 |
121 | 121 |
#' samples, using the spectra similarity and the retention time differencies |
122 |
-#' |
|
122 |
+#' |
|
123 | 123 |
#' Computes the normalized dot product between every pair of peak vectors in |
124 | 124 |
#' the retention time window (\code{D})and returns a similarity matrix. |
125 |
-#' |
|
125 |
+#' |
|
126 | 126 |
#' @param s1 data matrix for sample 1 |
127 | 127 |
#' @param s2 data matrix for sample 2 |
128 | 128 |
#' @param t1 vector of retention times for sample 1 |
... | ... |
@@ -133,7 +133,7 @@ normDotProduct <- function (x1, x2, t1=NULL, t2=NULL, df=max(ncol(x1), ncol(x2)) |
133 | 133 |
#' @seealso \code{\link{peaksAlignment}} |
134 | 134 |
#' @keywords manip |
135 | 135 |
#' @examples |
136 |
-#' |
|
136 |
+#' |
|
137 | 137 |
#' ## Not Run |
138 | 138 |
#' require(gcspikelite) |
139 | 139 |
#' files <- list.files(path = paste(find.package("gcspikelite"), "data", |
... | ... |
@@ -141,20 +141,16 @@ normDotProduct <- function (x1, x2, t1=NULL, t2=NULL, df=max(ncol(x1), ncol(x2)) |
141 | 141 |
#' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5)) |
142 | 142 |
#' ## create settings object |
143 | 143 |
#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5) |
144 |
-#' cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40), |
|
145 |
-#' prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0, |
|
146 |
-#' extendLengthMSW = TRUE, mzCenterFun = "wMean") |
|
147 |
-#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100, |
|
148 |
-#' multipleMatchedFilter = FALSE, multipleMatchedFilterParam = |
|
149 |
-#' list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1)) |
|
144 |
+#' cwt <- xcms::CentWaveParam() |
|
145 |
+#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, multipleMF = FALSE) |
|
150 | 146 |
#' data |
151 | 147 |
#' ## review peak picking |
152 | 148 |
#' plotChrom(data, rtrange = c(7.5, 10.5), runs = c(1:2)) |
153 |
-#' |
|
149 |
+#' |
|
154 | 150 |
#' r <- ndpRT(data@peaksdata[[1]], data@peaksdata[[2]], |
155 | 151 |
#' data@peaksrt[[1]], data@peaksrt[[2]], D = 50) |
156 | 152 |
#' ## End (Not Run) |
157 |
-#' |
|
153 |
+#' |
|
158 | 154 |
#' @export ndpRT |
159 | 155 |
ndpRT <- function(s1, s2, t1, t2, D) { |
160 | 156 |
Normalize <- function(j){ |
... | ... |
@@ -203,13 +199,13 @@ ndpRT <- function(s1, s2, t1, t2, D) { |
203 | 199 |
## correlation Alignment |
204 | 200 |
|
205 | 201 |
#' Retention Time Penalized Correlation |
206 |
-#' |
|
202 |
+#' |
|
207 | 203 |
#' This function calculates the similarity of all pairs of peaks from 2 |
208 | 204 |
#' samples, using the spectra similarity and the rretention time differencies |
209 |
-#' |
|
205 |
+#' |
|
210 | 206 |
#' Computes the Pearson carrelation between every pair of peak vectors in the |
211 | 207 |
#' retention time window (\code{D})and returns the similarity matrix. |
212 |
-#' |
|
208 |
+#' |
|
213 | 209 |
#' @param d1 data matrix for sample 1 |
214 | 210 |
#' @param d2 data matrix for sample 2 |
215 | 211 |
#' @param t1 vector of retention times for sample 1 |
... | ... |
@@ -222,7 +218,7 @@ ndpRT <- function(s1, s2, t1, t2, D) { |
222 | 218 |
#' @seealso \code{\link{peaksAlignment}} |
223 | 219 |
#' @keywords manip |
224 | 220 |
#' @examples |
225 |
-#' |
|
221 |
+#' |
|
226 | 222 |
#' ## Not Run |
227 | 223 |
#' require(gcspikelite) |
228 | 224 |
#' files <- list.files(path = paste(find.package("gcspikelite"), "data", |
... | ... |
@@ -230,16 +226,12 @@ ndpRT <- function(s1, s2, t1, t2, D) { |
230 | 226 |
#' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5)) |
231 | 227 |
#' ## create settings object |
232 | 228 |
#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5) |
233 |
-#' cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40), |
|
234 |
-#' prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0, |
|
235 |
-#' extendLengthMSW = TRUE, mzCenterFun = "wMean") |
|
236 |
-#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100, |
|
237 |
-#' multipleMatchedFilter = FALSE, multipleMatchedFilterParam = |
|
238 |
-#' list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1)) |
|
229 |
+#' cwt <- xcms::CentWaveParam() |
|
230 |
+#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, multipleMF = FALSE) |
|
239 | 231 |
#' data |
240 | 232 |
#' ## review peak picking |
241 | 233 |
#' plotChrom(data, rtrange=c(7.5, 10.5), runs=c(1:2)) |
242 |
-#' |
|
234 |
+#' |
|
243 | 235 |
#' r <- corPrt(data@peaksdata[[1]], data@peaksdata[[2]], |
244 | 236 |
#' data@peaksrt[[1]], data@peaksrt[[2]], D = 50, penality = 0.2) |
245 | 237 |
#' ## End (Not Run) |
... | ... |
@@ -118,7 +118,7 @@ normDotProduct <- function (x1, x2, t1=NULL, t2=NULL, df=max(ncol(x1), ncol(x2)) |
118 | 118 |
#' Retention Time Penalized Normalized Dot Product |
119 | 119 |
#' |
120 | 120 |
#' This function calculates the similarity of all pairs of peaks from 2 |
121 |
-#' samples, using the spectra similarity and the rretention time differencies |
|
121 |
+#' samples, using the spectra similarity and the retention time differencies |
|
122 | 122 |
#' |
123 | 123 |
#' Computes the normalized dot product between every pair of peak vectors in |
124 | 124 |
#' the retention time window (\code{D})and returns a similarity matrix. |
... | ... |
@@ -136,65 +136,64 @@ normDotProduct <- function (x1, x2, t1=NULL, t2=NULL, df=max(ncol(x1), ncol(x2)) |
136 | 136 |
#' |
137 | 137 |
#' ## Not Run |
138 | 138 |
#' require(gcspikelite) |
139 |
-#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/") |
|
140 |
-#' cdfFiles <- dir(gcmsPath,"CDF", full=TRUE) |
|
141 |
-#' |
|
142 |
-#' # read data, peak detection results |
|
143 |
-#' pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,10.5)) |
|
144 |
-#' pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'), |
|
145 |
-#' snthresh=3, fwhm=10, step=0.1, steps=2, mzdiff=0.5, |
|
146 |
-#' sleep=0) |
|
139 |
+#' files <- list.files(path = paste(find.package("gcspikelite"), "data", |
|
140 |
+#' sep = "/"),"CDF", full = TRUE) |
|
141 |
+#' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5)) |
|
142 |
+#' ## create settings object |
|
143 |
+#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5) |
|
144 |
+#' cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40), |
|
145 |
+#' prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0, |
|
146 |
+#' extendLengthMSW = TRUE, mzCenterFun = "wMean") |
|
147 |
+#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100, |
|
148 |
+#' multipleMatchedFilter = FALSE, multipleMatchedFilterParam = |
|
149 |
+#' list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1)) |
|
150 |
+#' data |
|
147 | 151 |
#' ## review peak picking |
148 |
-#' plotChrom(pd, rtrange=c(7.5, 10.5), runs=c(1:3)) |
|
152 |
+#' plotChrom(data, rtrange = c(7.5, 10.5), runs = c(1:2)) |
|
149 | 153 |
#' |
150 |
-#' r <- ndpRT(pd@peaksdata[[1]], pd@peaksdata[[2]], |
|
151 |
-#' pd@peaksrt[[1]], pd@peaksrt[[2]], D=50) |
|
154 |
+#' r <- ndpRT(data@peaksdata[[1]], data@peaksdata[[2]], |
|
155 |
+#' data@peaksrt[[1]], data@peaksrt[[2]], D = 50) |
|
152 | 156 |
#' ## End (Not Run) |
153 | 157 |
#' |
154 | 158 |
#' @export ndpRT |
155 |
-ndpRT <- function(s1, s2, t1, t2, D){ |
|
156 |
- |
|
159 |
+ndpRT <- function(s1, s2, t1, t2, D) { |
|
157 | 160 |
Normalize <- function(j){ |
158 |
- n <- apply(j, 2, function(k){ |
|
161 |
+ n <- apply(j, 2, function(k) { |
|
159 | 162 |
m <- k[which.max(k)] |
160 |
- norm <- k/m*100 |
|
163 |
+ norm <- k / m * 100 |
|
161 | 164 |
}) |
162 | 165 |
return(n) |
163 | 166 |
} |
164 |
- |
|
165 |
- scoring <- function(s1, s2, t1, t2, D){ |
|
166 |
- angle <- function(s1, s2){ |
|
167 |
- theta <- acos(sum(s1*s2) / (sqrt(sum(s1 * s1)) * sqrt(sum(s2 * s2)))) |
|
168 |
- theta <- 1-theta |
|
169 |
- if(theta < 0) |
|
170 |
- { |
|
167 |
+ scoring <- function(s1, s2, t1, t2, D) { |
|
168 |
+ angle <- function(s1, s2) { |
|
169 |
+ theta <- acos( |
|
170 |
+ sum(s1 * s2) / (sqrt(sum(s1 * s1)) * sqrt(sum(s2 * s2))) |
|
171 |
+ ) |
|
172 |
+ theta <- 1 - theta |
|
173 |
+ if(theta < 0) { |
|
171 | 174 |
theta <- 0 |
172 |
- } |
|
173 |
- return(theta) |
|
175 |
+ } |
|
176 |
+ return(theta) |
|
174 | 177 |
} |
175 |
- |
|
176 |
- rtPen <- function(t1, t2, D){ |
|
178 |
+ rtPen <- function(t1, t2, D) { |
|
177 | 179 |
## D espresso in secondi |
178 |
- t1 <- t1/60 # trasformo in secondi |
|
179 |
- t2 <- t2/60 # trasformo in secondi |
|
180 |
- srt <- exp(-(((t1-t2)^2) / D^2)) # da articolo MR, modificato |
|
180 |
+ t1 <- t1 / 60 # trasformo in secondi |
|
181 |
+ t2 <- t2 / 60 # trasformo in secondi |
|
182 |
+ srt <- exp(- (((t1 - t2)^2) / D^2)) # da articolo MR, modificato |
|
181 | 183 |
# era 2*D^2 |
182 | 184 |
return(srt) |
183 | 185 |
} |
184 |
- |
|
185 | 186 |
score <- angle(s1, s2) * rtPen(t1, t2, D) |
186 | 187 |
return(score) |
187 | 188 |
} |
188 |
- |
|
189 | 189 |
s1 <- Normalize(s1) |
190 | 190 |
s2 <- Normalize(s2) |
191 |
- |
|
192 |
- res <- matrix(0, nrow=ncol(s1), ncol=ncol(s2)) |
|
193 |
- for(i in 1:ncol(s1)){ |
|
194 |
- for(j in 1:ncol(s2)){ |
|
195 |
- res[i,j] <- scoring(s1[,i], s2[,j], t1[i], t2[j], D=D) |
|
191 |
+ res <- matrix(0, nrow = ncol(s1), ncol = ncol(s2)) |
|
192 |
+ for (i in 1:ncol(s1)) { |
|
193 |
+ for (j in 1:ncol(s2)) { |
|
194 |
+ res[i, j] <- scoring(s1[, i], s2[, j], t1[i], t2[j], D = D) |
|
196 | 195 |
} |
197 |
- } |
|
196 |
+ } |
|
198 | 197 |
return(res) |
199 | 198 |
} |
200 | 199 |
|
... | ... |
@@ -226,55 +225,58 @@ ndpRT <- function(s1, s2, t1, t2, D){ |
226 | 225 |
#' |
227 | 226 |
#' ## Not Run |
228 | 227 |
#' require(gcspikelite) |
229 |
-#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/") |
|
230 |
-#' cdfFiles <- dir(gcmsPath,"CDF", full=TRUE) |
|
231 |
-#' ## read data, peak detection results |
|
232 |
-#' pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,10.5)) |
|
233 |
-#' pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'), |
|
234 |
-#' snthresh=3, fwhm=10, step=0.1, steps=2, mzdiff=0.5, |
|
235 |
-#' sleep=0) |
|
228 |
+#' files <- list.files(path = paste(find.package("gcspikelite"), "data", |
|
229 |
+#' sep = "/"),"CDF", full = TRUE) |
|
230 |
+#' data <- peaksDataset(files[1:2], mz = seq(50, 550), rtrange = c(7.5, 8.5)) |
|
231 |
+#' ## create settings object |
|
232 |
+#' mfp <- xcms::MatchedFilterParam(fwhm = 10, snthresh = 5) |
|
233 |
+#' cwt <- xcms::CentWaveParam(snthresh = 3, ppm = 3000, peakwidth = c(3, 40), |
|
234 |
+#' prefilter = c(3, 100), fitgauss = FALSE, integrate = 2, noise = 0, |
|
235 |
+#' extendLengthMSW = TRUE, mzCenterFun = "wMean") |
|
236 |
+#' data <- addXCMSPeaks(files[1:2], data, settings = mfp, minintens = 100, |
|
237 |
+#' multipleMatchedFilter = FALSE, multipleMatchedFilterParam = |
|
238 |
+#' list(fwhm = c(5, 10, 20), rt_abs = 3, mz_abs = 0.1)) |
|
239 |
+#' data |
|
236 | 240 |
#' ## review peak picking |
237 |
-#' plotChrom(pd, rtrange=c(7.5, 10.5), runs=c(1:3)) |
|
241 |
+#' plotChrom(data, rtrange=c(7.5, 10.5), runs=c(1:2)) |
|
238 | 242 |
#' |
239 |
-#' r <- corPrt(pd@peaksdata[[1]], pd@peaksdata[[2]], |
|
240 |
-#' pd@peaksrt[[1]], pd@peaksrt[[2]], D=50, penality=0.2) |
|
243 |
+#' r <- corPrt(data@peaksdata[[1]], data@peaksdata[[2]], |
|
244 |
+#' data@peaksrt[[1]], data@peaksrt[[2]], D = 50, penality = 0.2) |
|
241 | 245 |
#' ## End (Not Run) |
242 | 246 |
#' |
243 | 247 |
#' @importFrom stats complete.cases |
244 | 248 |
#' @export corPrt |
245 |
-corPrt <- function(d1, d2, t1, t2, D, penality=0.2){ |
|
249 |
+corPrt <- function(d1, d2, t1, t2, D, penality = 0.2) { |
|
246 | 250 |
D <- as.numeric(D) # time window in second |
247 | 251 |
pn <- as.numeric(penality)# penality if out of time window |
248 |
- pearson <- function(x,y){ |
|
252 |
+ pearson <- function(x,y) { |
|
249 | 253 |
size <- length(x) |
250 |
- cfun <- .C("pearson", size=as.integer(size), x=as.double(x), |
|
251 |
- y=as.double(y), result=double(1), PACKAGE='flagme') |
|
254 |
+ cfun <- .C("pearson", size = as.integer(size), x = as.double(x), |
|
255 |
+ y = as.double(y), result = double(1), PACKAGE = 'flagme') |
|
252 | 256 |
return(cfun[["result"]]) |
253 | 257 |
} |
254 |
- Normalize <- function(j){ |
|
255 |
- n <- apply(j, 2, function(k){ |
|
258 |
+ Normalize <- function(j) { |
|
259 |
+ n <- apply(j, 2, function(k) { |
|
256 | 260 |
m <- k[which.max(k)] |
257 |
- norm <- k/m*100 |
|
261 |
+ norm <- k / m * 100 |
|
258 | 262 |
}) |
259 | 263 |
} |
260 | 264 |
Rank <- function(u) { |
261 |
- if (length(u) == 0L) |
|
265 |
+ if (length(u) == 0L) |
|
262 | 266 |
u |
263 | 267 |
else if (is.matrix(u)) { |
264 |
- if (nrow(u) > 1L) |
|
265 |
- apply(u, 2L, rank, na.last="keep") |
|
268 |
+ if (nrow(u) > 1L) |
|
269 |
+ apply(u, 2L, rank, na.last = "keep") |
|
266 | 270 |
else row(u) |
267 | 271 |
} |
268 |
- else rank(u, na.last="keep") |
|
272 |
+ else rank(u, na.last = "keep") |
|
269 | 273 |
} |
270 |
- # |
|
271 | 274 |
x <- Normalize(d1) |
272 | 275 |
y <- Normalize(d2) |
273 |
- |
|
274 | 276 |
## method <- c("pearson", "kendall", "spearman") |
275 | 277 |
ncx <- ncol(x) |
276 | 278 |
ncy <- ncol(y) |
277 |
- r <- matrix(0, nrow=ncx, ncol=ncy) |
|
279 |
+ r <- matrix(0, nrow = ncx, ncol = ncy) |
|
278 | 280 |
for (i in seq_len(ncx)) { |
279 | 281 |
for (j in seq_len(ncy)) { |
280 | 282 |
x2 <- x[, i] |
... | ... |
@@ -283,24 +285,20 @@ corPrt <- function(d1, d2, t1, t2, D, penality=0.2){ |
283 | 285 |
x2 <- rank(x2[ok]) |
284 | 286 |
y2 <- rank(y2[ok]) |
285 | 287 |
## insert rt penality in seconds |
286 |
- rtDiff <- t1[i]*60 - t2[j]*60 # retention time in seconds |
|
288 |
+ rtDiff <- t1[i] * 60 - t2[j] * 60 # retention time in seconds |
|
287 | 289 |
rtDiff <- abs(rtDiff) |
288 | 290 |
r[i, j] <- if (any(ok)) |
289 |
- if(rtDiff <= D) |
|
291 |
+ if (rtDiff <= D) |
|
290 | 292 |
pearson(x2, y2) |
291 | 293 |
else |
292 | 294 |
pearson(x2, y2) - pn |
293 | 295 |
else 0 |
294 | 296 |
} |
295 | 297 |
} |
296 |
- |
|
297 |
- r <- apply(r, MARGIN=c(1,2), function(x){ |
|
298 |
- if(x < 0.2) |
|
299 |
- { |
|
298 |
+ r <- apply(r, MARGIN = c(1, 2), function(x) { |
|
299 |
+ if (x < 0.2) { |
|
300 | 300 |
x <- 0 |
301 |
- } |
|
302 |
- else |
|
303 |
- { |
|
301 |
+ } else { |
|
304 | 302 |
x <- x |
305 | 303 |
} |
306 | 304 |
}) |
... | ... |
@@ -1,3 +1,48 @@ |
1 |
+#' Normalized Dot Product |
|
2 |
+#' |
|
3 |
+#' This function calculates the similarity of all pairs of peaks from 2 |
|
4 |
+#' samples, using the spectra similarity |
|
5 |
+#' |
|
6 |
+#' |
|
7 |
+#' Efficiently computes the normalized dot product between every pair of peak |
|
8 |
+#' vectors and returns a similarity matrix. C code is called. |
|
9 |
+#' |
|
10 |
+#' @param x1 data matrix for sample 1 |
|
11 |
+#' @param x2 data matrix for sample 2 |
|
12 |
+#' @param t1 vector of retention times for sample 1 |
|
13 |
+#' @param t2 vector of retention times for sample 2 |
|
14 |
+#' @param df distance from diagonal to calculate similarity |
|
15 |
+#' @param D retention time penalty |
|
16 |
+#' @param timedf matrix of time differences to normalize to. if \code{NULL}, 0 |
|
17 |
+#' is used. |
|
18 |
+#' @param verbose logical, whether to print out information |
|
19 |
+#' @return |
|
20 |
+#' |
|
21 |
+#' matrix of similarities |
|
22 |
+#' @author Mark Robinson |
|
23 |
+#' @seealso \code{\link{dp}}, \code{\link{peaksAlignment}} |
|
24 |
+#' @references |
|
25 |
+#' |
|
26 |
+#' Mark D Robinson (2008). Methods for the analysis of gas chromatography - |
|
27 |
+#' mass spectrometry data \emph{PhD dissertation} University of Melbourne. |
|
28 |
+#' @keywords manip |
|
29 |
+#' @examples |
|
30 |
+#' |
|
31 |
+#' require(gcspikelite) |
|
32 |
+#' |
|
33 |
+#' # paths and files |
|
34 |
+#' gcmsPath<-paste(find.package("gcspikelite"),"data",sep="/") |
|
35 |
+#' cdfFiles<-dir(gcmsPath,"CDF",full=TRUE) |
|
36 |
+#' eluFiles<-dir(gcmsPath,"ELU",full=TRUE) |
|
37 |
+#' |
|
38 |
+#' # read data, peak detection results |
|
39 |
+#' pd<-peaksDataset(cdfFiles[1:2],mz=seq(50,550),rtrange=c(7.5,8.5)) |
|
40 |
+#' pd<-addAMDISPeaks(pd,eluFiles[1:2]) |
|
41 |
+#' |
|
42 |
+#' r<-normDotProduct(pd@peaksdata[[1]],pd@peaksdata[[2]]) |
|
43 |
+#' |
|
44 |
+#' @useDynLib flagme |
|
45 |
+#' @export normDotProduct |
|
1 | 46 |
normDotProduct <- function (x1, x2, t1=NULL, t2=NULL, df=max(ncol(x1), ncol(x2)), |
2 | 47 |
D=1e+05, timedf=NULL, verbose=FALSE){ |
3 | 48 |
if(is.null(t1)) |
... | ... |
@@ -66,8 +111,47 @@ normDotProduct <- function (x1, x2, t1=NULL, t2=NULL, df=max(ncol(x1), ncol(x2)) |
66 | 111 |
## } |
67 | 112 |
|
68 | 113 |
|
114 |
+ |
|
69 | 115 |
## RR ## |
70 | 116 |
## retention time penalized normDotProd |
117 |
+ |
|
118 |
+#' Retention Time Penalized Normalized Dot Product |
|
119 |
+#' |
|
120 |
+#' This function calculates the similarity of all pairs of peaks from 2 |
|
121 |
+#' samples, using the spectra similarity and the rretention time differencies |
|
122 |
+#' |
|
123 |
+#' Computes the normalized dot product between every pair of peak vectors in |
|
124 |
+#' the retention time window (\code{D})and returns a similarity matrix. |
|
125 |
+#' |
|
126 |
+#' @param s1 data matrix for sample 1 |
|
127 |
+#' @param s2 data matrix for sample 2 |
|
128 |
+#' @param t1 vector of retention times for sample 1 |
|
129 |
+#' @param t2 vector of retention times for sample 2 |
|
130 |
+#' @param D retention time window for the matching |
|
131 |
+#' @return matrix of similarities |
|
132 |
+#' @author Riccardo Romoli |
|
133 |
+#' @seealso \code{\link{peaksAlignment}} |
|
134 |
+#' @keywords manip |
|
135 |
+#' @examples |
|
136 |
+#' |
|
137 |
+#' ## Not Run |
|
138 |
+#' require(gcspikelite) |
|
139 |
+#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/") |
|
140 |
+#' cdfFiles <- dir(gcmsPath,"CDF", full=TRUE) |
|
141 |
+#' |
|
142 |
+#' # read data, peak detection results |
|
143 |
+#' pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,10.5)) |
|
144 |
+#' pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'), |
|
145 |
+#' snthresh=3, fwhm=10, step=0.1, steps=2, mzdiff=0.5, |
|
146 |
+#' sleep=0) |
|
147 |
+#' ## review peak picking |
|
148 |
+#' plotChrom(pd, rtrange=c(7.5, 10.5), runs=c(1:3)) |
|
149 |
+#' |
|
150 |
+#' r <- ndpRT(pd@peaksdata[[1]], pd@peaksdata[[2]], |
|
151 |
+#' pd@peaksrt[[1]], pd@peaksrt[[2]], D=50) |
|
152 |
+#' ## End (Not Run) |
|
153 |
+#' |
|
154 |
+#' @export ndpRT |
|
71 | 155 |
ndpRT <- function(s1, s2, t1, t2, D){ |
72 | 156 |
|
73 | 157 |
Normalize <- function(j){ |
... | ... |
@@ -115,7 +199,49 @@ ndpRT <- function(s1, s2, t1, t2, D){ |
115 | 199 |
} |
116 | 200 |
|
117 | 201 |
|
202 |
+ |
|
203 |
+ |
|
118 | 204 |
## correlation Alignment |
205 |
+ |
|
206 |
+#' Retention Time Penalized Correlation |
|
207 |
+#' |
|
208 |
+#' This function calculates the similarity of all pairs of peaks from 2 |
|
209 |
+#' samples, using the spectra similarity and the rretention time differencies |
|
210 |
+#' |
|
211 |
+#' Computes the Pearson carrelation between every pair of peak vectors in the |
|
212 |
+#' retention time window (\code{D})and returns the similarity matrix. |
|
213 |
+#' |
|
214 |
+#' @param d1 data matrix for sample 1 |
|
215 |
+#' @param d2 data matrix for sample 2 |
|
216 |
+#' @param t1 vector of retention times for sample 1 |
|
217 |
+#' @param t2 vector of retention times for sample 2 |
|
218 |
+#' @param D retention time window for the matching |
|
219 |
+#' @param penality penalization applied to the matching between two mass |
|
220 |
+#' spectra if \code{(t1-t2)>D} |
|
221 |
+#' @return matrix of similarities |
|
222 |
+#' @author Riccardo Romoli |
|
223 |
+#' @seealso \code{\link{peaksAlignment}} |
|
224 |
+#' @keywords manip |
|
225 |
+#' @examples |
|
226 |
+#' |
|
227 |
+#' ## Not Run |
|
228 |
+#' require(gcspikelite) |
|
229 |
+#' gcmsPath <- paste(find.package("gcspikelite"), "data", sep="/") |
|
230 |
+#' cdfFiles <- dir(gcmsPath,"CDF", full=TRUE) |
|
231 |
+#' ## read data, peak detection results |
|
232 |
+#' pd <- peaksDataset(cdfFiles[1:3], mz=seq(50,550), rtrange=c(7.5,10.5)) |
|
233 |
+#' pd <- addXCMSPeaks(files=cdfFiles[1:3], object=pd, peakPicking=c('mF'), |
|
234 |
+#' snthresh=3, fwhm=10, step=0.1, steps=2, mzdiff=0.5, |
|
235 |
+#' sleep=0) |
|
236 |
+#' ## review peak picking |
|
237 |
+#' plotChrom(pd, rtrange=c(7.5, 10.5), runs=c(1:3)) |
|
238 |
+#' |
|
239 |
+#' r <- corPrt(pd@peaksdata[[1]], pd@peaksdata[[2]], |
|
240 |
+#' pd@peaksrt[[1]], pd@peaksrt[[2]], D=50, penality=0.2) |
|
241 |
+#' ## End (Not Run) |
|
242 |
+#' |
|
243 |
+#' @importFrom stats complete.cases |
|
244 |
+#' @export corPrt |
|
119 | 245 |
corPrt <- function(d1, d2, t1, t2, D, penality=0.2){ |
120 | 246 |
D <- as.numeric(D) # time window in second |
121 | 247 |
pn <- as.numeric(penality)# penality if out of time window |
... | ... |
@@ -91,8 +91,8 @@ ndpRT <- function(s1, s2, t1, t2, D){ |
91 | 91 |
|
92 | 92 |
rtPen <- function(t1, t2, D){ |
93 | 93 |
## D espresso in secondi |
94 |
- t1 <- t1*60 # trasformo in secondi |
|
95 |
- t2 <- t2*60 # trasformo in secondi |
|
94 |
+ t1 <- t1/60 # trasformo in secondi |
|
95 |
+ t2 <- t2/60 # trasformo in secondi |
|
96 | 96 |
srt <- exp(-(((t1-t2)^2) / D^2)) # da articolo MR, modificato |
97 | 97 |
# era 2*D^2 |
98 | 98 |
return(srt) |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/flagme@126268 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -116,7 +116,7 @@ ndpRT <- function(s1, s2, t1, t2, D){ |
116 | 116 |
|
117 | 117 |
|
118 | 118 |
## correlation Alignment |
119 |
-corPrt <- function(d1, d2, t1, t2, D=D, penality=0.2){ |
|
119 |
+corPrt <- function(d1, d2, t1, t2, D, penality=0.2){ |
|
120 | 120 |
D <- as.numeric(D) # time window in second |
121 | 121 |
pn <- as.numeric(penality)# penality if out of time window |
122 | 122 |
pearson <- function(x,y){ |
... | ... |
@@ -164,10 +164,10 @@ corPrt <- function(d1, d2, t1, t2, D=D, penality=0.2){ |
164 | 164 |
pearson(x2, y2) |
165 | 165 |
else |
166 | 166 |
pearson(x2, y2) - pn |
167 |
- else NA |
|
167 |
+ else 0 |
|
168 | 168 |
} |
169 | 169 |
} |
170 |
- |
|
170 |
+ |
|
171 | 171 |
r <- apply(r, MARGIN=c(1,2), function(x){ |
172 | 172 |
if(x < 0.2) |
173 | 173 |
{ |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/flagme@126266 bc3139a8-67e5-0310-9ffc-ced21a209358
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/flagme@126263 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -115,25 +115,21 @@ ndpRT <- function(s1, s2, t1, t2, D){ |
115 | 115 |
|
116 | 116 |
|
117 | 117 |
## correlation Alignment |
118 |
-corP <- function(object, idx1, idx2, D, penality, normalize){ |
|
119 |
- # |
|
118 |
+corPrt <- function(d1, d2, t1, t2, D=D, penality=0.2){ |
|
120 | 119 |
D <- as.numeric(D) # time window in second |
121 | 120 |
pn <- as.numeric(penality)# penality if out of time window |
122 |
- # |
|
123 | 121 |
pearson <- function(x,y){ |
124 | 122 |
size <- length(x) |
125 | 123 |
cfun <- .C("pearson", size=as.integer(size), x=as.double(x), |
126 | 124 |
y=as.double(y), result=double(1), PACKAGE='flagme') |
127 | 125 |
return(cfun[["result"]]) |
128 | 126 |
} |
129 |
- # |
|
130 | 127 |
Normalize <- function(j){ |
131 | 128 |
n <- apply(j, 2, function(k){ |
132 | 129 |
m <- k[which.max(k)] |
133 | 130 |
norm <- k/m*100 |
134 | 131 |
}) |
135 | 132 |
} |
136 |
- # |
|
137 | 133 |
Rank <- function(u) { |
138 | 134 |
if (length(u) == 0L) |
139 | 135 |
u |
... | ... |
@@ -145,14 +141,10 @@ corP <- function(object, idx1, idx2, D, penality, normalize){ |
145 | 141 |
else rank(u, na.last="keep") |
146 | 142 |
} |
147 | 143 |
# |
148 |
- if(normalize == TRUE){ |
|
149 |
- x <- Normalize(object@peaksdata[[idx1]]) |
|
150 |
- y <- Normalize(object@peaksdata[[idx2]]) |
|
151 |
- }else if(normalize == FALSE){ |
|
152 |
- x <- object@peaksdata[[idx1]] |
|
153 |
- y <- object@peaksdata[[idx2]] |
|
154 |
- } |
|
155 |
- method <- c("pearson", "kendall", "spearman") |
|
144 |
+ x <- Normalize(d1) |
|
145 |
+ y <- Normalize(d2) |
|
146 |
+ |
|
147 |
+ ## method <- c("pearson", "kendall", "spearman") |
|
156 | 148 |
ncx <- ncol(x) |
157 | 149 |
ncy <- ncol(y) |
158 | 150 |
r <- matrix(0, nrow=ncx, ncol=ncy) |
... | ... |
@@ -164,7 +156,7 @@ corP <- function(object, idx1, idx2, D, penality, normalize){ |
164 | 156 |
x2 <- rank(x2[ok]) |
165 | 157 |
y2 <- rank(y2[ok]) |
166 | 158 |
## insert rt penality in seconds |
167 |
- rtDiff <- object@peaksrt[[idx1]][i]*60 - object@peaksrt[[idx2]][j]*60 # retention time in seconds |
|
159 |
+ rtDiff <- t1[i]*60 - t2[j]*60 # retention time in seconds |
|
168 | 160 |
rtDiff <- abs(rtDiff) |
169 | 161 |
r[i, j] <- if (any(ok)) |
170 | 162 |
if(rtDiff <= D) |
... | ... |
@@ -174,6 +166,17 @@ corP <- function(object, idx1, idx2, D, penality, normalize){ |
174 | 166 |
else NA |
175 | 167 |
} |
176 | 168 |
} |
169 |
+ |
|
170 |
+ r <- apply(r, MARGIN=c(1,2), function(x){ |
|
171 |
+ if(x < 0.2) |
|
172 |
+ { |
|
173 |
+ x <- 0 |
|
174 |
+ } |
|
175 |
+ else |
|
176 |
+ { |
|
177 |
+ x <- x |
|
178 |
+ } |
|
179 |
+ }) |
|
177 | 180 |
rownames(r) <- colnames(x) |
178 | 181 |
colnames(r) <- colnames(y) |
179 | 182 |
return(r) |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/flagme@126262 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -64,3 +64,117 @@ normDotProduct <- function (x1, x2, t1=NULL, t2=NULL, df=max(ncol(x1), ncol(x2)) |
64 | 64 |
## } |
65 | 65 |
## matrix(1-out$score,ncol=ncol(x2)) |
66 | 66 |
## } |
67 |
+ |
|
68 |
+ |
|
69 |
+## RR ## |
|
70 |
+ndpRT <- function(s1, s2, t1, t2, D){ |
|
71 |
+ |
|
72 |
+ Normalize <- function(j){ |
|
73 |
+ n <- apply(j, 2, function(k){ |
|
74 |
+ m <- k[which.max(k)] |
|
75 |
+ norm <- k/m*100 |
|
76 |
+ }) |
|
77 |
+ return(n) |
|
78 |
+ } |
|
79 |
+ |
|
80 |
+ scoring <- function(s1, s2, t1, t2, D){ |
|
81 |
+ angle <- function(s1, s2){ |
|
82 |
+ theta <- acos(sum(s1*s2) / (sqrt(sum(s1 * s1)) * sqrt(sum(s2 * s2)))) |
|
83 |
+ theta <- 1-theta |
|
84 |
+ if(theta < 0) |
|
85 |
+ { |
|
86 |
+ theta <- 0 |
|
87 |
+ } |
|
88 |
+ return(theta) |
|
89 |
+ } |
|
90 |
+ |
|
91 |
+ rtPen <- function(t1, t2, D){ |
|
92 |
+ ## D espresso in secondi |
|
93 |
+ t1 <- t1*60 # trasformo in secondi |
|
94 |
+ t2 <- t2*60 # trasformo in secondi |
|
95 |
+ srt <- exp(-(((t1-t2)^2) / D^2)) # da articolo MR, modificato |
|
96 |
+ # era 2*D^2 |
|
97 |
+ return(srt) |
|
98 |
+ } |
|
99 |
+ |
|
100 |
+ score <- angle(s1, s2) * rtPen(t1, t2, D) |
|
101 |
+ return(score) |
|
102 |
+ } |
|
103 |
+ |
|
104 |
+ s1 <- Normalize(s1) |
|
105 |
+ s2 <- Normalize(s2) |
|
106 |
+ |
|
107 |
+ res <- matrix(0, nrow=ncol(s1), ncol=ncol(s2)) |
|
108 |
+ for(i in 1:ncol(s1)){ |
|
109 |
+ for(j in 1:ncol(s2)){ |
|
110 |
+ res[i,j] <- scoring(s1[,i], s2[,j], t1[i], t2[j], D=D) |
|
111 |
+ } |
|
112 |
+ } |
|
113 |
+ return(res) |
|
114 |
+} |
|
115 |
+ |
|
116 |
+ |
|
117 |
+## correlation Alignment |
|
118 |
+corP <- function(object, idx1, idx2, D, penality, normalize){ |
|
119 |
+ # |
|
120 |
+ D <- as.numeric(D) # time window in second |
|
121 |
+ pn <- as.numeric(penality)# penality if out of time window |
|
122 |
+ # |
|
123 |
+ pearson <- function(x,y){ |
|
124 |
+ size <- length(x) |
|
125 |
+ cfun <- .C("pearson", size=as.integer(size), x=as.double(x), |
|
126 |
+ y=as.double(y), result=double(1), PACKAGE='flagme') |
|
127 |
+ return(cfun[["result"]]) |
|
128 |
+ } |
|
129 |
+ # |
|
130 |
+ Normalize <- function(j){ |
|
131 |
+ n <- apply(j, 2, function(k){ |
|
132 |
+ m <- k[which.max(k)] |
|
133 |
+ norm <- k/m*100 |
|
134 |
+ }) |
|
135 |
+ } |
|
136 |
+ # |
|
137 |
+ Rank <- function(u) { |
|
138 |
+ if (length(u) == 0L) |
|
139 |
+ u |
|
140 |
+ else if (is.matrix(u)) { |
|
141 |
+ if (nrow(u) > 1L) |
|
142 |
+ apply(u, 2L, rank, na.last="keep") |
|
143 |
+ else row(u) |
|
144 |
+ } |
|
145 |
+ else rank(u, na.last="keep") |
|
146 |
+ } |
|
147 |
+ # |
|
148 |
+ if(normalize == TRUE){ |
|
149 |
+ x <- Normalize(object@peaksdata[[idx1]]) |
|
150 |
+ y <- Normalize(object@peaksdata[[idx2]]) |
|
151 |
+ }else if(normalize == FALSE){ |
|
152 |
+ x <- object@peaksdata[[idx1]] |
|
153 |
+ y <- object@peaksdata[[idx2]] |
|
154 |
+ } |
|
155 |
+ method <- c("pearson", "kendall", "spearman") |
|
156 |
+ ncx <- ncol(x) |
|
157 |
+ ncy <- ncol(y) |
|
158 |
+ r <- matrix(0, nrow=ncx, ncol=ncy) |
|
159 |
+ for (i in seq_len(ncx)) { |
|
160 |
+ for (j in seq_len(ncy)) { |
|
161 |
+ x2 <- x[, i] |
|
162 |
+ y2 <- y[, j] |
|
163 |
+ ok <- complete.cases(x2, y2) |
|
164 |
+ x2 <- rank(x2[ok]) |
|
165 |
+ y2 <- rank(y2[ok]) |
|
166 |
+ ## insert rt penality in seconds |
|
167 |
+ rtDiff <- object@peaksrt[[idx1]][i]*60 - object@peaksrt[[idx2]][j]*60 # retention time in seconds |
|
168 |
+ rtDiff <- abs(rtDiff) |
|
169 |
+ r[i, j] <- if (any(ok)) |
|
170 |
+ if(rtDiff <= D) |
|
171 |
+ pearson(x2, y2) |
|
172 |
+ else |
|
173 |
+ pearson(x2, y2) - pn |
|
174 |
+ else NA |
|
175 |
+ } |
|
176 |
+ } |
|
177 |
+ rownames(r) <- colnames(x) |
|
178 |
+ colnames(r) <- colnames(y) |
|
179 |
+ return(r) |
|
180 |
+} |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/flagme@114594 bc3139a8-67e5-0310-9ffc-ced21a209358
From: Riccardo <r@mbp>
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/flagme@111489 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,30 +1,66 @@ |
1 |
-normDotProduct<-function(x1,x2,t1=NULL,t2=NULL,df=max(ncol(x1),ncol(x2)),D=100000,timedf=NULL,verbose=FALSE) { |
|
1 |
+normDotProduct <- function (x1, x2, t1=NULL, t2=NULL, df=max(ncol(x1), ncol(x2)), |
|
2 |
+ D=1e+05, timedf=NULL, verbose=FALSE){ |
|
3 |
+ if(is.null(t1)) |
|
4 |
+ t1 <- 1:ncol(x1) |
|
5 |
+ if(is.null(t2)) |
|
6 |
+ t2 <- 1:ncol(x2) |
|
7 |
+ if(nrow(x1) != nrow(x2) | length(t1) != ncol(x1) | length(t2) != |
|
8 |
+ ncol(x2)){ |
|
9 |
+ stop("One of these is not true: nrow(x1)=nrow(x2), length(t1)=ncol(x1), length(t2)=ncol(x2).") |
|
10 |
+ } |
|
11 |
+ score <- matrix(0, nrow=ncol(x1), ncol=ncol(x2)) |
|
12 |
+ if(length(D) == 1 & is.null(timedf)){ |
|
13 |
+ out <- .C("cos_ndp_lowmem", score=as.double(score), |
|
14 |
+ nr=as.integer(nrow(x1)), nc1=as.integer(ncol(x1)), |
|
15 |
+ nc2=as.integer(ncol(x2)), x1=as.double(x1), x2=as.double(x2), |
|
16 |
+ t1=as.double(t1), t2 = as.double(t2), D = as.double(D), |
|
17 |
+ df=as.integer(df), PACKAGE="flagme") |
|
18 |
+ }else{ |
|
19 |
+ if(length(D) == 1) |
|
20 |
+ D <- matrix(D, nrow=ncol(x1), ncol=ncol(x2)) |
|
21 |
+ if(ncol(x1) != nrow(D) | ncol(x2) != ncol(D)) |
|
22 |
+ stop("D must have dimensions nrow=ncol(x1) ncol=ncol(x2) or be scalar.") |
|
23 |
+ if(is.null(timedf)) |
|
24 |
+ timedf <- matrix(0, nrow=ncol(x1), ncol=ncol(x2)) |
|
25 |
+ if(ncol(x1) != nrow(timedf) | ncol(x2) != ncol(timedf)) |
|
26 |
+ stop("'timedf' must have dimensions nrow=ncol(x1) ncol=ncol(x2) or be set to NULL.") |
|
27 |
+ out <- .C("cos_ndp_himem", score=as.double(score), |
|
28 |
+ nr=as.integer(nrow(x1)), nc1=as.integer(ncol(x1)), |
|
29 |
+ nc2=as.integer(ncol(x2)), x1=as.double(x1), x2=as.double(x2), |
|
30 |
+ D=as.double(D), df=as.integer(df), timedf=as.double(timedf), |
|
31 |
+ PACKAGE="flagme") |
|
32 |
+ } |
|
33 |
+ NDP <- matrix(1 - out$score, ncol=ncol(x2)) |
|
34 |
+ NDP[is.nan(NDP)] <- 0 ## remove NaN |
|
35 |
+ return(NDP) |
|
36 |
+} |
|
2 | 37 |
|
3 |
- if(is.null(t1)) t1<-1:ncol(x1) |
|
4 |
- if(is.null(t2)) t2<-1:ncol(x2) |
|
38 |
+## normDotProduct<-function(x1,x2,t1=NULL,t2=NULL,df=max(ncol(x1),ncol(x2)),D=100000,timedf=NULL,verbose=FALSE){ |
|
39 |
+## if(is.null(t1)) t1<-1:ncol(x1) |
|
40 |
+## if(is.null(t2)) t2<-1:ncol(x2) |
|
5 | 41 |
|
6 |
- if( nrow(x1) != nrow(x2) | length(t1)!=ncol(x1) | length(t2)!=ncol(x2) ) { |
|
7 |
- stop("One of these is not true: nrow(x1)=nrow(x2), length(t1)=ncol(x1), length(t2)=ncol(x2).") |
|
8 |
- } |
|
42 |
+## if( nrow(x1) != nrow(x2) | length(t1)!=ncol(x1) | length(t2)!=ncol(x2) ) { |
|
43 |
+## stop("One of these is not true: nrow(x1)=nrow(x2), length(t1)=ncol(x1), length(t2)=ncol(x2).") |
|
44 |
+## } |
|
9 | 45 |
|
10 |
- score<-matrix(0,nrow=ncol(x1),ncol=ncol(x2)) |
|
46 |
+## score<-matrix(0,nrow=ncol(x1),ncol=ncol(x2)) |
|
11 | 47 |
|
12 |
- if( length(D)==1 & is.null(timedf) ) { |
|
13 |
- out<-.C("cos_ndp_lowmem", score=as.double(score), nr=as.integer(nrow(x1)), nc1=as.integer(ncol(x1)), |
|
14 |
- nc2=as.integer(ncol(x2)), x1=as.double(x1), x2=as.double(x2), t1=as.double(t1), |
|
15 |
- t2=as.double(t2),D=as.double(D),df=as.integer(df), PACKAGE="flagme") |
|
16 |
- } else { |
|
17 |
- if (length(D)==1) |
|
18 |
- D<-matrix(D,nrow=ncol(x1),ncol=ncol(x2)) |
|
19 |
- if( ncol(x1) != nrow(D) | ncol(x2)!=ncol(D) ) |
|
20 |
- stop("D must have dimensions nrow=ncol(x1) ncol=ncol(x2) or be scalar.") |
|
21 |
- if( is.null(timedf) ) |
|
22 |
- timedf<-matrix(0,nrow=ncol(x1),ncol=ncol(x2)) |
|
23 |
- if( ncol(x1) != nrow(timedf) | ncol(x2)!=ncol(timedf) ) |
|
24 |
- stop("'timedf' must have dimensions nrow=ncol(x1) ncol=ncol(x2) or be set to NULL.") |
|
25 |
- out<-.C("cos_ndp_himem", score=as.double(score), nr=as.integer(nrow(x1)), nc1=as.integer(ncol(x1)), |
|
26 |
- nc2=as.integer(ncol(x2)), x1=as.double(x1), x2=as.double(x2), |
|
27 |
- D=as.double(D),df=as.integer(df),timedf=as.double(timedf), PACKAGE="flagme") |
|
28 |
- } |
|
29 |
- matrix(1-out$score,ncol=ncol(x2)) |
|
30 |
-} |
|
48 |
+## if( length(D)==1 & is.null(timedf) ) { |
|
49 |
+## out<-.C("cos_ndp_lowmem", score=as.double(score), nr=as.integer(nrow(x1)), nc1=as.integer(ncol(x1)), |
|
50 |
+## nc2=as.integer(ncol(x2)), x1=as.double(x1), x2=as.double(x2), t1=as.double(t1), |
|
51 |
+## t2=as.double(t2),D=as.double(D),df=as.integer(df), PACKAGE="flagme") |
|
52 |
+## } else { |
|
53 |
+## if (length(D)==1) |
|
54 |
+## D<-matrix(D,nrow=ncol(x1),ncol=ncol(x2)) |
|
55 |
+## if( ncol(x1) != nrow(D) | ncol(x2)!=ncol(D) ) |
|
56 |
+## stop("D must have dimensions nrow=ncol(x1) ncol=ncol(x2) or be scalar.") |
|
57 |
+## if( is.null(timedf) ) |
|
58 |
+## timedf<-matrix(0,nrow=ncol(x1),ncol=ncol(x2)) |
|
59 |
+## if( ncol(x1) != nrow(timedf) | ncol(x2)!=ncol(timedf) ) |
|
60 |
+## stop("'timedf' must have dimensions nrow=ncol(x1) ncol=ncol(x2) or be set to NULL.") |
|
61 |
+## out<-.C("cos_ndp_himem", score=as.double(score), nr=as.integer(nrow(x1)), nc1=as.integer(ncol(x1)), |
|
62 |
+## nc2=as.integer(ncol(x2)), x1=as.double(x1), x2=as.double(x2), |
|
63 |
+## D=as.double(D),df=as.integer(df),timedf=as.double(timedf), PACKAGE="flagme") |
|
64 |
+## } |
|
65 |
+## matrix(1-out$score,ncol=ncol(x2)) |
|
66 |
+## } |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/flagme@34957 bc3139a8-67e5-0310-9ffc-ced21a209358
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,30 @@ |
1 |
+normDotProduct<-function(x1,x2,t1=NULL,t2=NULL,df=max(ncol(x1),ncol(x2)),D=100000,timedf=NULL,verbose=FALSE) { |
|
2 |
+ |
|
3 |
+ if(is.null(t1)) t1<-1:ncol(x1) |
|
4 |
+ if(is.null(t2)) t2<-1:ncol(x2) |
|
5 |
+ |
|
6 |
+ if( nrow(x1) != nrow(x2) | length(t1)!=ncol(x1) | length(t2)!=ncol(x2) ) { |
|
7 |
+ stop("One of these is not true: nrow(x1)=nrow(x2), length(t1)=ncol(x1), length(t2)=ncol(x2).") |
|
8 |
+ } |
|
9 |
+ |
|
10 |
+ score<-matrix(0,nrow=ncol(x1),ncol=ncol(x2)) |
|
11 |
+ |
|
12 |
+ if( length(D)==1 & is.null(timedf) ) { |
|
13 |
+ out<-.C("cos_ndp_lowmem", score=as.double(score), nr=as.integer(nrow(x1)), nc1=as.integer(ncol(x1)), |
|
14 |
+ nc2=as.integer(ncol(x2)), x1=as.double(x1), x2=as.double(x2), t1=as.double(t1), |
|
15 |
+ t2=as.double(t2),D=as.double(D),df=as.integer(df), PACKAGE="flagme") |
|
16 |
+ } else { |
|
17 |
+ if (length(D)==1) |
|
18 |
+ D<-matrix(D,nrow=ncol(x1),ncol=ncol(x2)) |
|
19 |
+ if( ncol(x1) != nrow(D) | ncol(x2)!=ncol(D) ) |
|
20 |
+ stop("D must have dimensions nrow=ncol(x1) ncol=ncol(x2) or be scalar.") |
|
21 |
+ if( is.null(timedf) ) |
|
22 |
+ timedf<-matrix(0,nrow=ncol(x1),ncol=ncol(x2)) |
|
23 |
+ if( ncol(x1) != nrow(timedf) | ncol(x2)!=ncol(timedf) ) |
|
24 |
+ stop("'timedf' must have dimensions nrow=ncol(x1) ncol=ncol(x2) or be set to NULL.") |
|
25 |
+ out<-.C("cos_ndp_himem", score=as.double(score), nr=as.integer(nrow(x1)), nc1=as.integer(ncol(x1)), |
|
26 |
+ nc2=as.integer(ncol(x2)), x1=as.double(x1), x2=as.double(x2), |
|
27 |
+ D=as.double(D),df=as.integer(df),timedf=as.double(timedf), PACKAGE="flagme") |
|
28 |
+ } |
|
29 |
+ matrix(1-out$score,ncol=ncol(x2)) |
|
30 |
+} |