Browse code

Revert "version bump 0.8.3"

This reverts commit 128a3495f9b0a02a9ba8711527d1616406485b29.

Gavin Rhys Lloyd authored on 19/11/2019 09:40:22
Showing 9 changed files

... ...
@@ -1,7 +1,7 @@
1 1
 Package: structToolbox
2 2
 Type: Package
3 3
 Title: Some tools bult using the struct package
4
-Version: 0.8.3
4
+Version: 0.8.2
5 5
 Author: Gavin Rhys Lloyd
6 6
 Maintainer: Gavin Rhys Lloyd <[email protected]>
7 7
 Description: Extends the class templates provided by the struct package to provide methods for training PCA, PLS models with cross-validation, permutation testing etc.
... ...
@@ -44,7 +44,6 @@ Collate:
44 44
     'glog_class.R'
45 45
     'grid_search_1d_class.R'
46 46
     'hca_class.R'
47
-    'kfold_xval2_class.R'
48 47
     'kfold_xval_class.R'
49 48
     'kfold_xval_charts.R'
50 49
     'knn_impute_class.R'
... ...
@@ -76,7 +75,7 @@ Collate:
76 75
     'vec_norm_class.R'
77 76
     'wilcox_test_class.R'
78 77
     'zzz.R'
79
-Depends: struct (>= 0.4.1), struct(< 0.5.0)
78
+Depends: struct (== 0.4.1)
80 79
 Imports: ggplot2,
81 80
  pmp,
82 81
  gridExtra,
... ...
@@ -104,7 +103,7 @@ Suggests:
104 103
  sbcms,
105 104
  Rtsne
106 105
 Remotes: computational-metabolomics/pmp,
107
-  computational-metabolomics/struct,
106
+  computational-metabolomics/[email protected],
108 107
   computational-metabolomics/sbcms
109 108
 VignetteBuilder: knitr
110 109
 biocViews: WorkflowStep
... ...
@@ -42,7 +42,6 @@ export(grid_search_1d)
42 42
 export(gs_line)
43 43
 export(hca_dendrogram)
44 44
 export(kfold_xval)
45
-export(kfold_xval2)
46 45
 export(kfoldxcv_grid)
47 46
 export(kfoldxcv_metric)
48 47
 export(knn_impute)
... ...
@@ -19,8 +19,7 @@ PCA<-setClass(
19 19
         outputs.eigenvalues='data.frame',
20 20
         outputs.ssx='numeric',
21 21
         outputs.correlation='data.frame',
22
-        outputs.that='dataset',
23
-        outputs.xhat='dataset'
22
+        outputs.that='dataset'
24 23
     ),
25 24
     prototype = list(name='Principal Component Analysis (PCA)',
26 25
         description='PCA is a multivariate data reduction technique. It summarises the data in a smaller number of Principal Components that describe the maximum variation present in the dataset.',
... ...
@@ -108,12 +107,6 @@ setMethod(f="model.predict",
108 107
         dataset.data(S)=that
109 108
         output.value(M,'that')=S
110 109
 
111
-        xhat=as.matrix(that)%*%as.matrix(t(P))
112
-        xhat=as.data.frame(xhat)
113
-        rownames(that)=rownames(X)
114
-        colnames(xhat)=colnames(X)
115
-        M$xhat=dataset(data=xhat,sample_meta=D$sample_meta,variable_meta=D$variable_meta)
116
-
117 110
         return(M)
118 111
     }
119 112
 )
... ...
@@ -167,6 +167,7 @@ eval_loess=function(x,X,Y,k=10,p=0.66)
167 167
     # Y = observed values
168 168
     # k = number of replicates
169 169
     # p = proportion in training
170
+
170 171
     residual=numeric(k)
171 172
     for (i in 1:k)
172 173
     {
... ...
@@ -180,16 +181,9 @@ eval_loess=function(x,X,Y,k=10,p=0.66)
180 181
         yy2=Y[X %in% xx2]
181 182
 
182 183
 
183
-        loessMod <- loess(yy ~ xx, span=x)
184
-
185
-        # check for NaN
186
-        if (any(is.nan(loessMod$fitted))){
187
-            residual[i]=99999
188
-        } else {
189
-
190
-            smoothed=stats::predict(loessMod,newdata=xx2)
191
-            residual[i]=sum((smoothed-yy2)^2)
192
-        }
184
+        loessMod <- loess(yy ~ xx, span=x) # 25% smoothing span
185
+        smoothed=stats::predict(loessMod,newdata=xx2)
186
+        residual[i]=sum((smoothed-yy2)^2)
193 187
     }
194 188
     return(sqrt(mean(residual)))
195 189
 }
196 190
deleted file mode 100644
... ...
@@ -1,128 +0,0 @@
1
-#' kfold_xval model class
2
-#'
3
-#' Applies k-fold crossvalidation to a model or model.seq()
4
-#' @export kfold_xval2
5
-#' @examples
6
-#' I = kfold_xval2()
7
-kfold_xval2<-setClass(
8
-    "kfold_xval2",
9
-    contains='resampler',
10
-    slots=c(params.folds='numeric',
11
-        params.method='character',
12
-        params.factor_name='entity',
13
-        outputs.metric='data.frame'
14
-    ),
15
-    prototype = list(name='k-fold cross-validation',
16
-        type="resampling",
17
-        result='metric',
18
-        params.folds=10,
19
-        params.method='venetian'
20
-    )
21
-)
22
-
23
-#' @export
24
-#' @template run
25
-setMethod(f="run",
26
-    signature=c("kfold_xval2",'dataset','metric'),
27
-    definition=function(I,D,MET=NULL)
28
-    {
29
-        X=dataset.data(D)
30
-
31
-
32
-        WF=models(I)
33
-
34
-        # venetian 123123123123
35
-        if (param.value(I,'method')=='venetian')
36
-        {
37
-            fold_id=rep(1:param.value(I,'folds'),length.out=nrow(X))
38
-        } else if (param.value(I,'method')=='blocks')
39
-        { # blocks 111122223333
40
-            fold_id=rep(1:param.value(I,'folds'),length.out=nrow(X))
41
-            fold_id=sort(fold_id)
42
-        } else if (param.value(I,'method')=='random') {
43
-            fold_id=rep(1:param.value(I,'folds'),length.out=nrow(X))
44
-            fold_id=sample(fold_id,length(fold_id),replace = FALSE)
45
-        } else {
46
-            stop('unknown method for cross-validation. (try "venetian", "blocks" or "random")')
47
-        }
48
-
49
-        # for each value of k, split the data and run the workflow
50
-        for (i in 1:param.value(I,'folds'))
51
-        {
52
-            # prep the training data
53
-            TrainX=X[fold_id!=i,,drop=FALSE]
54
-            TrainY=Y[fold_id!=i,,drop=FALSE]
55
-            dtrain=dataset(data=TrainX,sample_meta=TrainY)
56
-
57
-            TestX=X[fold_id==i,,drop=FALSE]
58
-            TestY=Y[fold_id==i,,drop=FALSE]
59
-            dtest=dataset(data=TestX,sample_meta=TestY)
60
-
61
-            if (is(WF,'model_OR_model.seq'))
62
-                # HAS TO BE A model OR model.seq
63
-            {
64
-                WF=model.train(WF,dtrain)
65
-                # apply the model
66
-                WF=model.predict(WF,dtrain)
67
-                p=predicted(WF)
68
-                # metric
69
-                if (MET@actual=='sample_meta') {
70
-                    yhat=p
71
-                } else if (MET@actual=='data') {
72
-                    yhat=p$data
73
-                } else {
74
-                    stop('MET$actual not implemented yet')
75
-                }
76
-                YHATtr[fold_id!=i,]=yhat
77
-
78
-                # test set
79
-                WF=model.predict(WF,dtest)
80
-                p=predicted(WF)
81
-
82
-                if (MET@actual=='sample_meta') {
83
-                    yhat=p
84
-                } else if (MET@actual=='data') {
85
-                    yhat=p$data
86
-                } else {
87
-                    stop('MET$actual not implemented yet')
88
-                }
89
-                YHAT[fold_id==i,]=yhat
90
-
91
-
92
-            } else if (is(WF,'iterator'))
93
-            {
94
-                stop('not implemented yet')
95
-            }
96
-            # validation set...??
97
-            # WF=predict(WF,dval)
98
-            # p=predicted(WF[length(WF)])
99
-            # val_result[,1]=p[,1]
100
-
101
-            #all_results[((nrow(X)*(i-1))+1):(nrow(X)*i),]=fold_results
102
-        }
103
-
104
-        if (MET@actual=='data') {
105
-            # if its a model sequence get the prediction from the penultimate step
106
-            # for comparison with the predictions
107
-            if (is(WF,'model_OR_model.seq')) {
108
-                # apply model to data
109
-                WF=model.apply(WF,D)
110
-                n=length(WF)
111
-                if (n>1) {# just in case a sequence of 1
112
-                    Y=predicted(WF[n-1])$data
113
-                }
114
-            }
115
-        }
116
-
117
-        # test sets metric
118
-        df=data.frame('training_set'=0,'test_set'=0,'metric'=class(MET)[[1]])
119
-        MET=calculate(MET,Y,YHAT)
120
-        df$training_set=value(MET)
121
-        # training set metric
122
-        MET=calculate(MET,Y,YHATtr)
123
-        df$test_set=value(MET)
124
-        I$metric=df
125
-        return(I)
126
-    }
127
-)
128
-
... ...
@@ -11,8 +11,9 @@ permutation_test2<-setClass(
11 11
     slots=c(
12 12
         params.number_of_permutations='numeric',
13 13
         params.collect='character',
14
-        outputs.metric_permuted='data.frame',
15
-        outputs.metric_unpermuted='data.frame',
14
+        outputs.results.permuted='data.frame',
15
+        outputs.results.unpermuted='data.frame',
16
+        outputs.metric='data.frame',
16 17
         outputs.collected='entity'
17 18
     ),
18 19
     prototype = list(name='permutation test',
... ...
@@ -39,6 +40,9 @@ setMethod(f="run",
39 40
               WF=models(I)
40 41
               n=param.value(I,'number_of_permutations')
41 42
 
43
+              all_results_permuted=data.frame('actual'=rep(y[,1],n),'predicted'=rep(y[,1],n),'permutation'=0)
44
+              all_results_unpermuted=data.frame('actual'=rep(y[,1],n),'predicted'=rep(y[,1],n),'permutation'=0)
45
+
42 46
               collected=list(permuted=list(),unpermuted=list())
43 47
 
44 48
               for (i in 1:n)
... ...
@@ -138,8 +142,8 @@ setMethod(f="run",
138 142
 
139 143
               }
140 144
               # store results
141
-              output.value(I,'metric_permuted')=all_results_permuted
142
-              output.value(I,'metric_unpermuted')=all_results_unpermuted
145
+              output.value(I,'results.permuted')=all_results_permuted
146
+              output.value(I,'results.unpermuted')=all_results_unpermuted
143 147
               return(I)
144 148
           }
145 149
 )
... ...
@@ -19,14 +19,9 @@ setMethod(f="calculate",
19 19
     signature=c('r_squared'),
20 20
     definition=function(obj,Y,Yhat)
21 21
     {
22
-
23
-        M=matrix(colMeans(Y),nrow=1)
24
-        O=matrix(1,nrow=nrow(Y),ncol=1)
25
-        M=O %*% M
26
-
27
-        SSR  = sum(sum((Yhat-M)^2))
28
-        SSE  = sum(sum((Y-Yhat)^2))
29
-        SSTO = sum(sum((Y-M)^2))
22
+        SSR  = sum((Yhat-mean(Y))^2)
23
+        SSE  = sum((Y-Yhat)^2)
24
+        SSTO = sum((Y-mean(Y))^2)
30 25
 
31 26
         R2=1-(SSE/SSTO)
32 27
 
33 28
deleted file mode 100644
... ...
@@ -1,13 +0,0 @@
1
-% Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/kfold_xval2_class.R
3
-\docType{class}
4
-\name{kfold_xval2-class}
5
-\alias{kfold_xval2-class}
6
-\alias{kfold_xval2}
7
-\title{kfold_xval model class}
8
-\description{
9
-Applies k-fold crossvalidation to a model or model.seq()
10
-}
11
-\examples{
12
-I = kfold_xval2()
13
-}
... ...
@@ -1,14 +1,13 @@
1 1
 % Generated by roxygen2: do not edit by hand
2 2
 % Please edit documentation in R/bootstrap_class.R,
3 3
 %   R/forward_selection_by_rank_class.R, R/grid_search_1d_class.R,
4
-%   R/kfold_xval2_class.R, R/kfold_xval_class.R, R/permutation_test2_class.R,
4
+%   R/kfold_xval_class.R, R/permutation_test2_class.R,
5 5
 %   R/permutation_test_class.R, R/permute_sample_order_class.R, R/run_doc.R
6 6
 \docType{methods}
7 7
 \name{run,bootstrap,dataset,metric-method}
8 8
 \alias{run,bootstrap,dataset,metric-method}
9 9
 \alias{run,forward_selection_byrank,dataset,metric-method}
10 10
 \alias{run,grid_search_1d,dataset,metric-method}
11
-\alias{run,kfold_xval2,dataset,metric-method}
12 11
 \alias{run,kfold_xval,dataset,metric-method}
13 12
 \alias{run,permutation_test2,dataset,metric-method}
14 13
 \alias{run,permutation_test,dataset,metric-method}
... ...
@@ -22,8 +21,6 @@
22 21
 
23 22
 \S4method{run}{grid_search_1d,dataset,metric}(I, D, MET)
24 23
 
25
-\S4method{run}{kfold_xval2,dataset,metric}(I, D, MET = NULL)
26
-
27 24
 \S4method{run}{kfold_xval,dataset,metric}(I, D, MET = NULL)
28 25
 
29 26
 \S4method{run}{permutation_test2,dataset,metric}(I, D, MET = NULL)