R/split_data_class.R
d0f81a51
 #' @eval get_description('split_data')
883f7f27
 #' @export split_data
5dcc4dc2
 #' @examples
6daf6f4e
 #' M = split_data(p_train=0.75)
5dcc4dc2
 #'
6daf6f4e
 split_data = function(p_train,...) {
a110b7e7
     out=struct::new_struct('split_data',
6daf6f4e
         p_train=p_train,
a110b7e7
         ...)
56a019fe
     return(out)
 }
 
 
 .split_data<-setClass(
c34a3c53
     "split_data",
0ea73702
     contains = c('model'),
09b8b680
     slots=c(
         p_train='entity',
b3b2ba0e
         training='entity',
         testing='entity'
c34a3c53
     ),
883f7f27
 
6daf6f4e
     prototype=list(
         name = 'Split data',
d0f81a51
         description = paste0('The data matrix is divided into two subsets.',
         'A predefined proportion of the samples are randomly selected for a ',
         'training set, and the remaining samples are used for the test set.'),
c34a3c53
         type = 'processing',
         predicted = 'testing',
6daf6f4e
         .params=c('p_train'),
a110b7e7
         .outputs=c('training','testing'),
883f7f27
 
6daf6f4e
         p_train=entity(name = 'Proportion in training set',
d0f81a51
             description = paste0('The proportion of samples selected for the ',
             'training set.'),
c34a3c53
             value = 0.75,
             type='numeric'),
883f7f27
 
b3b2ba0e
         training=entity(name = 'A DatasetExperiment of training data',
8db793ae
             description = 'A DatasetExperiment object containing samples selected for the training set.',
             type='DatasetExperiment',
             value=DatasetExperiment()
c34a3c53
         ),
b3b2ba0e
         testing=entity(name = 'A DatasetExperiment of data for testing',
8db793ae
             description = 'A DatasetExperiment object containing samples selected for the testing set.',
             type='DatasetExperiment',
             value=DatasetExperiment()
c34a3c53
         )
     )
883f7f27
 )
 
 #' @export
c1de0264
 #' @template model_apply
8db793ae
 setMethod(f="model_apply",
     signature=c("split_data","DatasetExperiment"),
a110b7e7
     definition=function(M,D) {
8db793ae
         opt=param_list(M)
c34a3c53
         # number of samples
8db793ae
         nMax=nrow(D$data)
c34a3c53
         # number in the training set
6daf6f4e
         n=floor(nMax*opt$p_train)
c34a3c53
         # select a random subset of the data for training
         in_training=sample(x=1:nMax,size = n, replace=FALSE,prob=NULL)
8db793ae
         training=DatasetExperiment(data=D$data[in_training,,drop=FALSE],
             sample_meta=D$sample_meta[in_training,,drop=FALSE],
56a019fe
             variable_meta=D$variable_meta,
             name=c(D$name,'(Training set)'),
             description=c(D$description,'A subset of the data has been selected as a training set'))
8db793ae
         testing=DatasetExperiment(data=D$data[-in_training,,drop=FALSE],
             sample_meta=D$sample_meta[-in_training,,drop=FALSE],
56a019fe
             variable_meta=D$variable_meta,
             name=c(D$name,'(Testing set)'),
             description=c(D$description,'A subset of the data has been selected as a test set'))
8db793ae
         output_value(M,'training')=training
         output_value(M,'testing')=testing
883f7f27
 
c34a3c53
         return(M)
     }
883f7f27
 )