This reverts commit 128a3495f9b0a02a9ba8711527d1616406485b29.
... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
Package: structToolbox |
2 | 2 |
Type: Package |
3 | 3 |
Title: Some tools bult using the struct package |
4 |
-Version: 0.8.3 |
|
4 |
+Version: 0.8.2 |
|
5 | 5 |
Author: Gavin Rhys Lloyd |
6 | 6 |
Maintainer: Gavin Rhys Lloyd <[email protected]> |
7 | 7 |
Description: Extends the class templates provided by the struct package to provide methods for training PCA, PLS models with cross-validation, permutation testing etc. |
... | ... |
@@ -44,7 +44,6 @@ Collate: |
44 | 44 |
'glog_class.R' |
45 | 45 |
'grid_search_1d_class.R' |
46 | 46 |
'hca_class.R' |
47 |
- 'kfold_xval2_class.R' |
|
48 | 47 |
'kfold_xval_class.R' |
49 | 48 |
'kfold_xval_charts.R' |
50 | 49 |
'knn_impute_class.R' |
... | ... |
@@ -76,7 +75,7 @@ Collate: |
76 | 75 |
'vec_norm_class.R' |
77 | 76 |
'wilcox_test_class.R' |
78 | 77 |
'zzz.R' |
79 |
-Depends: struct (>= 0.4.1), struct(< 0.5.0) |
|
78 |
+Depends: struct (== 0.4.1) |
|
80 | 79 |
Imports: ggplot2, |
81 | 80 |
pmp, |
82 | 81 |
gridExtra, |
... | ... |
@@ -104,7 +103,7 @@ Suggests: |
104 | 103 |
sbcms, |
105 | 104 |
Rtsne |
106 | 105 |
Remotes: computational-metabolomics/pmp, |
107 |
- computational-metabolomics/struct, |
|
106 |
+ computational-metabolomics/[email protected], |
|
108 | 107 |
computational-metabolomics/sbcms |
109 | 108 |
VignetteBuilder: knitr |
110 | 109 |
biocViews: WorkflowStep |
... | ... |
@@ -19,8 +19,7 @@ PCA<-setClass( |
19 | 19 |
outputs.eigenvalues='data.frame', |
20 | 20 |
outputs.ssx='numeric', |
21 | 21 |
outputs.correlation='data.frame', |
22 |
- outputs.that='dataset', |
|
23 |
- outputs.xhat='dataset' |
|
22 |
+ outputs.that='dataset' |
|
24 | 23 |
), |
25 | 24 |
prototype = list(name='Principal Component Analysis (PCA)', |
26 | 25 |
description='PCA is a multivariate data reduction technique. It summarises the data in a smaller number of Principal Components that describe the maximum variation present in the dataset.', |
... | ... |
@@ -108,12 +107,6 @@ setMethod(f="model.predict", |
108 | 107 |
dataset.data(S)=that |
109 | 108 |
output.value(M,'that')=S |
110 | 109 |
|
111 |
- xhat=as.matrix(that)%*%as.matrix(t(P)) |
|
112 |
- xhat=as.data.frame(xhat) |
|
113 |
- rownames(that)=rownames(X) |
|
114 |
- colnames(xhat)=colnames(X) |
|
115 |
- M$xhat=dataset(data=xhat,sample_meta=D$sample_meta,variable_meta=D$variable_meta) |
|
116 |
- |
|
117 | 110 |
return(M) |
118 | 111 |
} |
119 | 112 |
) |
... | ... |
@@ -167,6 +167,7 @@ eval_loess=function(x,X,Y,k=10,p=0.66) |
167 | 167 |
# Y = observed values |
168 | 168 |
# k = number of replicates |
169 | 169 |
# p = proportion in training |
170 |
+ |
|
170 | 171 |
residual=numeric(k) |
171 | 172 |
for (i in 1:k) |
172 | 173 |
{ |
... | ... |
@@ -180,16 +181,9 @@ eval_loess=function(x,X,Y,k=10,p=0.66) |
180 | 181 |
yy2=Y[X %in% xx2] |
181 | 182 |
|
182 | 183 |
|
183 |
- loessMod <- loess(yy ~ xx, span=x) |
|
184 |
- |
|
185 |
- # check for NaN |
|
186 |
- if (any(is.nan(loessMod$fitted))){ |
|
187 |
- residual[i]=99999 |
|
188 |
- } else { |
|
189 |
- |
|
190 |
- smoothed=stats::predict(loessMod,newdata=xx2) |
|
191 |
- residual[i]=sum((smoothed-yy2)^2) |
|
192 |
- } |
|
184 |
+ loessMod <- loess(yy ~ xx, span=x) # 25% smoothing span |
|
185 |
+ smoothed=stats::predict(loessMod,newdata=xx2) |
|
186 |
+ residual[i]=sum((smoothed-yy2)^2) |
|
193 | 187 |
} |
194 | 188 |
return(sqrt(mean(residual))) |
195 | 189 |
} |
196 | 190 |
deleted file mode 100644 |
... | ... |
@@ -1,128 +0,0 @@ |
1 |
-#' kfold_xval model class |
|
2 |
-#' |
|
3 |
-#' Applies k-fold crossvalidation to a model or model.seq() |
|
4 |
-#' @export kfold_xval2 |
|
5 |
-#' @examples |
|
6 |
-#' I = kfold_xval2() |
|
7 |
-kfold_xval2<-setClass( |
|
8 |
- "kfold_xval2", |
|
9 |
- contains='resampler', |
|
10 |
- slots=c(params.folds='numeric', |
|
11 |
- params.method='character', |
|
12 |
- params.factor_name='entity', |
|
13 |
- outputs.metric='data.frame' |
|
14 |
- ), |
|
15 |
- prototype = list(name='k-fold cross-validation', |
|
16 |
- type="resampling", |
|
17 |
- result='metric', |
|
18 |
- params.folds=10, |
|
19 |
- params.method='venetian' |
|
20 |
- ) |
|
21 |
-) |
|
22 |
- |
|
23 |
-#' @export |
|
24 |
-#' @template run |
|
25 |
-setMethod(f="run", |
|
26 |
- signature=c("kfold_xval2",'dataset','metric'), |
|
27 |
- definition=function(I,D,MET=NULL) |
|
28 |
- { |
|
29 |
- X=dataset.data(D) |
|
30 |
- |
|
31 |
- |
|
32 |
- WF=models(I) |
|
33 |
- |
|
34 |
- # venetian 123123123123 |
|
35 |
- if (param.value(I,'method')=='venetian') |
|
36 |
- { |
|
37 |
- fold_id=rep(1:param.value(I,'folds'),length.out=nrow(X)) |
|
38 |
- } else if (param.value(I,'method')=='blocks') |
|
39 |
- { # blocks 111122223333 |
|
40 |
- fold_id=rep(1:param.value(I,'folds'),length.out=nrow(X)) |
|
41 |
- fold_id=sort(fold_id) |
|
42 |
- } else if (param.value(I,'method')=='random') { |
|
43 |
- fold_id=rep(1:param.value(I,'folds'),length.out=nrow(X)) |
|
44 |
- fold_id=sample(fold_id,length(fold_id),replace = FALSE) |
|
45 |
- } else { |
|
46 |
- stop('unknown method for cross-validation. (try "venetian", "blocks" or "random")') |
|
47 |
- } |
|
48 |
- |
|
49 |
- # for each value of k, split the data and run the workflow |
|
50 |
- for (i in 1:param.value(I,'folds')) |
|
51 |
- { |
|
52 |
- # prep the training data |
|
53 |
- TrainX=X[fold_id!=i,,drop=FALSE] |
|
54 |
- TrainY=Y[fold_id!=i,,drop=FALSE] |
|
55 |
- dtrain=dataset(data=TrainX,sample_meta=TrainY) |
|
56 |
- |
|
57 |
- TestX=X[fold_id==i,,drop=FALSE] |
|
58 |
- TestY=Y[fold_id==i,,drop=FALSE] |
|
59 |
- dtest=dataset(data=TestX,sample_meta=TestY) |
|
60 |
- |
|
61 |
- if (is(WF,'model_OR_model.seq')) |
|
62 |
- # HAS TO BE A model OR model.seq |
|
63 |
- { |
|
64 |
- WF=model.train(WF,dtrain) |
|
65 |
- # apply the model |
|
66 |
- WF=model.predict(WF,dtrain) |
|
67 |
- p=predicted(WF) |
|
68 |
- # metric |
|
69 |
- if (MET@actual=='sample_meta') { |
|
70 |
- yhat=p |
|
71 |
- } else if (MET@actual=='data') { |
|
72 |
- yhat=p$data |
|
73 |
- } else { |
|
74 |
- stop('MET$actual not implemented yet') |
|
75 |
- } |
|
76 |
- YHATtr[fold_id!=i,]=yhat |
|
77 |
- |
|
78 |
- # test set |
|
79 |
- WF=model.predict(WF,dtest) |
|
80 |
- p=predicted(WF) |
|
81 |
- |
|
82 |
- if (MET@actual=='sample_meta') { |
|
83 |
- yhat=p |
|
84 |
- } else if (MET@actual=='data') { |
|
85 |
- yhat=p$data |
|
86 |
- } else { |
|
87 |
- stop('MET$actual not implemented yet') |
|
88 |
- } |
|
89 |
- YHAT[fold_id==i,]=yhat |
|
90 |
- |
|
91 |
- |
|
92 |
- } else if (is(WF,'iterator')) |
|
93 |
- { |
|
94 |
- stop('not implemented yet') |
|
95 |
- } |
|
96 |
- # validation set...?? |
|
97 |
- # WF=predict(WF,dval) |
|
98 |
- # p=predicted(WF[length(WF)]) |
|
99 |
- # val_result[,1]=p[,1] |
|
100 |
- |
|
101 |
- #all_results[((nrow(X)*(i-1))+1):(nrow(X)*i),]=fold_results |
|
102 |
- } |
|
103 |
- |
|
104 |
- if (MET@actual=='data') { |
|
105 |
- # if its a model sequence get the prediction from the penultimate step |
|
106 |
- # for comparison with the predictions |
|
107 |
- if (is(WF,'model_OR_model.seq')) { |
|
108 |
- # apply model to data |
|
109 |
- WF=model.apply(WF,D) |
|
110 |
- n=length(WF) |
|
111 |
- if (n>1) {# just in case a sequence of 1 |
|
112 |
- Y=predicted(WF[n-1])$data |
|
113 |
- } |
|
114 |
- } |
|
115 |
- } |
|
116 |
- |
|
117 |
- # test sets metric |
|
118 |
- df=data.frame('training_set'=0,'test_set'=0,'metric'=class(MET)[[1]]) |
|
119 |
- MET=calculate(MET,Y,YHAT) |
|
120 |
- df$training_set=value(MET) |
|
121 |
- # training set metric |
|
122 |
- MET=calculate(MET,Y,YHATtr) |
|
123 |
- df$test_set=value(MET) |
|
124 |
- I$metric=df |
|
125 |
- return(I) |
|
126 |
- } |
|
127 |
-) |
|
128 |
- |
... | ... |
@@ -11,8 +11,9 @@ permutation_test2<-setClass( |
11 | 11 |
slots=c( |
12 | 12 |
params.number_of_permutations='numeric', |
13 | 13 |
params.collect='character', |
14 |
- outputs.metric_permuted='data.frame', |
|
15 |
- outputs.metric_unpermuted='data.frame', |
|
14 |
+ outputs.results.permuted='data.frame', |
|
15 |
+ outputs.results.unpermuted='data.frame', |
|
16 |
+ outputs.metric='data.frame', |
|
16 | 17 |
outputs.collected='entity' |
17 | 18 |
), |
18 | 19 |
prototype = list(name='permutation test', |
... | ... |
@@ -39,6 +40,9 @@ setMethod(f="run", |
39 | 40 |
WF=models(I) |
40 | 41 |
n=param.value(I,'number_of_permutations') |
41 | 42 |
|
43 |
+ all_results_permuted=data.frame('actual'=rep(y[,1],n),'predicted'=rep(y[,1],n),'permutation'=0) |
|
44 |
+ all_results_unpermuted=data.frame('actual'=rep(y[,1],n),'predicted'=rep(y[,1],n),'permutation'=0) |
|
45 |
+ |
|
42 | 46 |
collected=list(permuted=list(),unpermuted=list()) |
43 | 47 |
|
44 | 48 |
for (i in 1:n) |
... | ... |
@@ -138,8 +142,8 @@ setMethod(f="run", |
138 | 142 |
|
139 | 143 |
} |
140 | 144 |
# store results |
141 |
- output.value(I,'metric_permuted')=all_results_permuted |
|
142 |
- output.value(I,'metric_unpermuted')=all_results_unpermuted |
|
145 |
+ output.value(I,'results.permuted')=all_results_permuted |
|
146 |
+ output.value(I,'results.unpermuted')=all_results_unpermuted |
|
143 | 147 |
return(I) |
144 | 148 |
} |
145 | 149 |
) |
... | ... |
@@ -19,14 +19,9 @@ setMethod(f="calculate", |
19 | 19 |
signature=c('r_squared'), |
20 | 20 |
definition=function(obj,Y,Yhat) |
21 | 21 |
{ |
22 |
- |
|
23 |
- M=matrix(colMeans(Y),nrow=1) |
|
24 |
- O=matrix(1,nrow=nrow(Y),ncol=1) |
|
25 |
- M=O %*% M |
|
26 |
- |
|
27 |
- SSR = sum(sum((Yhat-M)^2)) |
|
28 |
- SSE = sum(sum((Y-Yhat)^2)) |
|
29 |
- SSTO = sum(sum((Y-M)^2)) |
|
22 |
+ SSR = sum((Yhat-mean(Y))^2) |
|
23 |
+ SSE = sum((Y-Yhat)^2) |
|
24 |
+ SSTO = sum((Y-mean(Y))^2) |
|
30 | 25 |
|
31 | 26 |
R2=1-(SSE/SSTO) |
32 | 27 |
|
33 | 28 |
deleted file mode 100644 |
... | ... |
@@ -1,13 +0,0 @@ |
1 |
-% Generated by roxygen2: do not edit by hand |
|
2 |
-% Please edit documentation in R/kfold_xval2_class.R |
|
3 |
-\docType{class} |
|
4 |
-\name{kfold_xval2-class} |
|
5 |
-\alias{kfold_xval2-class} |
|
6 |
-\alias{kfold_xval2} |
|
7 |
-\title{kfold_xval model class} |
|
8 |
-\description{ |
|
9 |
-Applies k-fold crossvalidation to a model or model.seq() |
|
10 |
-} |
|
11 |
-\examples{ |
|
12 |
-I = kfold_xval2() |
|
13 |
-} |
... | ... |
@@ -1,14 +1,13 @@ |
1 | 1 |
% Generated by roxygen2: do not edit by hand |
2 | 2 |
% Please edit documentation in R/bootstrap_class.R, |
3 | 3 |
% R/forward_selection_by_rank_class.R, R/grid_search_1d_class.R, |
4 |
-% R/kfold_xval2_class.R, R/kfold_xval_class.R, R/permutation_test2_class.R, |
|
4 |
+% R/kfold_xval_class.R, R/permutation_test2_class.R, |
|
5 | 5 |
% R/permutation_test_class.R, R/permute_sample_order_class.R, R/run_doc.R |
6 | 6 |
\docType{methods} |
7 | 7 |
\name{run,bootstrap,dataset,metric-method} |
8 | 8 |
\alias{run,bootstrap,dataset,metric-method} |
9 | 9 |
\alias{run,forward_selection_byrank,dataset,metric-method} |
10 | 10 |
\alias{run,grid_search_1d,dataset,metric-method} |
11 |
-\alias{run,kfold_xval2,dataset,metric-method} |
|
12 | 11 |
\alias{run,kfold_xval,dataset,metric-method} |
13 | 12 |
\alias{run,permutation_test2,dataset,metric-method} |
14 | 13 |
\alias{run,permutation_test,dataset,metric-method} |
... | ... |
@@ -22,8 +21,6 @@ |
22 | 21 |
|
23 | 22 |
\S4method{run}{grid_search_1d,dataset,metric}(I, D, MET) |
24 | 23 |
|
25 |
-\S4method{run}{kfold_xval2,dataset,metric}(I, D, MET = NULL) |
|
26 |
- |
|
27 | 24 |
\S4method{run}{kfold_xval,dataset,metric}(I, D, MET = NULL) |
28 | 25 |
|
29 | 26 |
\S4method{run}{permutation_test2,dataset,metric}(I, D, MET = NULL) |