#' split data into sets #' #' Splits the data into a training and test set #' @export split_data split_data<-setClass( "split_data", contains = c('method'), slots=c(params.p='entity', outputs.training='entity', outputs.testing='entity' ), prototype=list(name = 'Split data', description = 'Splits the data into a training and test set', type = 'processing', predicted = 'testing', params.p=entity(name = 'Proportion in training set', description = 'The proportion of samples selected for the training set. All other samples willbe in assigned to the test set.', value = 0.75, type='numeric'), outputs.training=entity(name = 'A dataset of training data', description = 'A dataset object containing samples selected for the training set.', type='dataset', value=dataset() ), outputs.testing=entity(name = 'A dataset of data for testing', description = 'A dataset object containing samples selected for the testing set.', type='dataset', value=dataset() ) ) ) #' @export #' @template method_apply setMethod(f="method.apply", signature=c("split_data","dataset"), definition=function(M,D) { opt=param.list(M) # number of samples nMax=nrow(dataset.data(D)) # number in the training set n=floor(nMax*opt$p) # select a random subset of the data for training in_training=sample(x=1:nMax,size = n, replace=FALSE,prob=NULL) training=dataset(data=dataset.data(D)[in_training,,drop=FALSE], sample_meta=dataset.sample_meta(D)[in_training,,drop=FALSE], variable_meta=dataset.variable_meta(D), name=c(name(D),'(Training set)'), description=c(description(D),'A subset of the data has been selected as a training set')) testing=dataset(data=dataset.data(D)[-in_training,,drop=FALSE], sample_meta=dataset.sample_meta(D)[-in_training,,drop=FALSE], variable_meta=dataset.variable_meta(D), name=c(name(D),'(Testing set)'), description=c(description(D),'A subset of the data has been selected as a test set')) output.value(M,'training')=training output.value(M,'testing')=testing return(M) } )