#' split data into sets
#'
#' Splits the data into a training and test set
#' @export split_data
split_data<-setClass(
    "split_data",
    contains = c('method'),
    slots=c(params.p='entity',
        outputs.training='entity',
        outputs.testing='entity'
    ),

    prototype=list(name = 'Split data',
        description = 'Splits the data into a training and test set',
        type = 'processing',
        predicted = 'testing',

        params.p=entity(name = 'Proportion in training set',
            description = 'The proportion of samples selected for the training set. All other samples willbe in assigned to the test set.',
            value = 0.75,
            type='numeric'),

        outputs.training=entity(name = 'A dataset of training data',
            description = 'A dataset object containing samples selected for the training set.',
            type='dataset',
            value=dataset()
        ),
        outputs.testing=entity(name = 'A dataset of data for testing',
            description = 'A dataset object containing samples selected for the testing set.',
            type='dataset',
            value=dataset()
        )
    )
)

#' @export
#' @template method_apply
setMethod(f="method.apply",
    signature=c("split_data","dataset"),
    definition=function(M,D)
    {
        opt=param.list(M)
        # number of samples
        nMax=nrow(dataset.data(D))
        # number in the training set
        n=floor(nMax*opt$p)
        # select a random subset of the data for training
        in_training=sample(x=1:nMax,size = n, replace=FALSE,prob=NULL)
        training=dataset(data=dataset.data(D)[in_training,,drop=FALSE],
            sample_meta=dataset.sample_meta(D)[in_training,,drop=FALSE],
            variable_meta=dataset.variable_meta(D),
            name=c(name(D),'(Training set)'),
            description=c(description(D),'A subset of the data has been selected as a training set'))
        testing=dataset(data=dataset.data(D)[-in_training,,drop=FALSE],
            sample_meta=dataset.sample_meta(D)[-in_training,,drop=FALSE],
            variable_meta=dataset.variable_meta(D),
            name=c(name(D),'(Testing set)'),
            description=c(description(D),'A subset of the data has been selected as a test set'))
        output.value(M,'training')=training
        output.value(M,'testing')=testing

        return(M)
    }
)