d0f81a51 |
#' @eval get_description('split_data')
|
883f7f27 |
#' @export split_data
|
5dcc4dc2 |
#' @examples
|
6daf6f4e |
#' M = split_data(p_train=0.75)
|
5dcc4dc2 |
#'
|
6daf6f4e |
split_data = function(p_train,...) {
|
a110b7e7 |
out=struct::new_struct('split_data',
|
6daf6f4e |
p_train=p_train,
|
a110b7e7 |
...)
|
56a019fe |
return(out)
}
.split_data<-setClass(
|
c34a3c53 |
"split_data",
|
0ea73702 |
contains = c('model'),
|
09b8b680 |
slots=c(
p_train='entity',
|
b3b2ba0e |
training='entity',
testing='entity'
|
c34a3c53 |
),
|
883f7f27 |
|
6daf6f4e |
prototype=list(
name = 'Split data',
|
d0f81a51 |
description = paste0('The data matrix is divided into two subsets.',
'A predefined proportion of the samples are randomly selected for a ',
'training set, and the remaining samples are used for the test set.'),
|
c34a3c53 |
type = 'processing',
predicted = 'testing',
|
6daf6f4e |
.params=c('p_train'),
|
a110b7e7 |
.outputs=c('training','testing'),
|
883f7f27 |
|
6daf6f4e |
p_train=entity(name = 'Proportion in training set',
|
d0f81a51 |
description = paste0('The proportion of samples selected for the ',
'training set.'),
|
c34a3c53 |
value = 0.75,
type='numeric'),
|
883f7f27 |
|
b3b2ba0e |
training=entity(name = 'A DatasetExperiment of training data',
|
8db793ae |
description = 'A DatasetExperiment object containing samples selected for the training set.',
type='DatasetExperiment',
value=DatasetExperiment()
|
c34a3c53 |
),
|
b3b2ba0e |
testing=entity(name = 'A DatasetExperiment of data for testing',
|
8db793ae |
description = 'A DatasetExperiment object containing samples selected for the testing set.',
type='DatasetExperiment',
value=DatasetExperiment()
|
c34a3c53 |
)
)
|
883f7f27 |
)
#' @export
|
c1de0264 |
#' @template model_apply
|
8db793ae |
setMethod(f="model_apply",
signature=c("split_data","DatasetExperiment"),
|
a110b7e7 |
definition=function(M,D) {
|
8db793ae |
opt=param_list(M)
|
c34a3c53 |
# number of samples
|
8db793ae |
nMax=nrow(D$data)
|
c34a3c53 |
# number in the training set
|
6daf6f4e |
n=floor(nMax*opt$p_train)
|
c34a3c53 |
# select a random subset of the data for training
in_training=sample(x=1:nMax,size = n, replace=FALSE,prob=NULL)
|
8db793ae |
training=DatasetExperiment(data=D$data[in_training,,drop=FALSE],
sample_meta=D$sample_meta[in_training,,drop=FALSE],
|
56a019fe |
variable_meta=D$variable_meta,
name=c(D$name,'(Training set)'),
description=c(D$description,'A subset of the data has been selected as a training set'))
|
8db793ae |
testing=DatasetExperiment(data=D$data[-in_training,,drop=FALSE],
sample_meta=D$sample_meta[-in_training,,drop=FALSE],
|
56a019fe |
variable_meta=D$variable_meta,
name=c(D$name,'(Testing set)'),
description=c(D$description,'A subset of the data has been selected as a test set'))
|
8db793ae |
output_value(M,'training')=training
output_value(M,'testing')=testing
|
883f7f27 |
|
c34a3c53 |
return(M)
}
|
883f7f27 |
)
|