Bioconductor Code: structToolbox

Raw Blame Patch Log History
#' @eval get_description('classical_lsq')
#' @import struct
#' @examples
#' D = iris_DatasetExperiment()
#' M = classical_lsq(factor_names = 'Species')
#' M = model_apply(M,D)
#' @export classical_lsq

classical_lsq = function(alpha=0.05,mtc='fdr',factor_names,intercept=TRUE,...) {

    out=struct::new_struct('classical_lsq',
        alpha = alpha,
        mtc = mtc,
        factor_names = factor_names,
        intercept = intercept,
        ...)

    return(out)
}


.classical_lsq<-setClass(
    "classical_lsq",
    contains=c('model'),
    slots=c(
        # INPUTS
        alpha='entity',
        mtc='enum',
        factor_names='entity',
        intercept='entity',
        # OUTPUTS
        coefficients='entity',
        p_value='entity',
        significant='entity',
        r_squared='entity',
        adj_r_squared='entity'
    ),
    prototype = list(
        name='Univariate Classical Least Squares Regression',
        description=paste0(
            'In univariate classical least squares regression a line is ',
            'fitted between each feature/variable and a response variable. ',
            'The fitted line minimises the sum of squared differences ',
            'between the true response and the predicted response. ',
            'The coefficients (offset, gradient) of the fit can be tested ',
            'for significance.'
        ),
        type="univariate",
        ontology='STATO:0000370',
        predicted='p_value',
        .params=c('alpha','mtc','factor_names','intercept'),
        .outputs=c('coefficients','p_value','significant',
            'r_squared','adj_r_squared'),

        intercept=entity(name='Model intercept',
            type='logical',
            description=c(
                'TRUE' = 'An intercept term is included in the model.',
                'FALSE' = 'An intercept term is not included in the model.'),
            value=TRUE
        ),

        factor_names=entity(name='Factor name',
            description=paste0('The column names to regress against. If a ',
            'character vector then the same list is used ofr all features. ',
            'If a list of character vectors is provided it is assumed there ',
            'is a different set of columns for each feature.'),
            type=c('character','list'),
            value='V1'),

        alpha=ents$alpha,

        mtc=ents$mtc,

        coefficients=entity(name='Regression coefficients',
            type='data.frame',
            description='The regression coefficients for each term in the model.',
            value=data.frame()
        ),
        p_value=ents$p_value,
        significant=ents$significant,
        r_squared=entity(name='R Squared',
            description='The value of R Squared for the fitted model',
            type='data.frame',
            value=data.frame()
        ),
        adj_r_squared=entity(name='Adjusted R Squared',
            description='The value ofAdjusted  R Squared for the fitted model',
            type='data.frame',
            value=data.frame()
        )
    )
)

#' @export
#' @template model_apply
setMethod(f="model_apply",
    signature=c("classical_lsq",'DatasetExperiment'),
    definition=function(M,D)
    {
        # for classical least squares X is the design matrix, or sample_meta in our case
        X = D$sample_meta

        #if (is(M$factor_names,'character')) { # if a character vector then apply to all features.
        # get columns requested for fit
        #  X = X[ ,M$factor_names,drop=FALSE]
        #}
        # y  is the response, or in our case the data matrix
        y = D$data
        # assume X and y are already scaled appropriately in a previous step of the workflow


        fcn=function(n) {

            # use the linear model class
            if (M$intercept) {
                LM=linear_model(formula=y~1+.)
            } else {
                LM=linear_model(formula=y~0+.)
            }

            # create a DatasetExperiment object we can use with the linear model class
            temp_y=y[,n,drop=FALSE]
            if (is(M$factor_names,'character')) {
                X=X[,M$factor_names,drop=FALSE]

            } else { # assume list
                X=X[,M$factor_names[[n]],drop=FALSE]
            }
            colnames(temp_y)='y'
            Dlm=DatasetExperiment(data=temp_y,sample_meta=X,variable_meta=colnames(temp_y))

            LM=model_train(LM,Dlm)

            S=summary(LM$lm)

            return(S)
        }

        out=apply(as.matrix(1:ncol(y)),MARGIN=1,FUN=fcn)

        # reorganise outputs

        a=lapply(out,FUN=function(x) {rownames(coefficients(x))})
        u=unique(unlist(a))
        m=matrix(NA,nrow=ncol(y),ncol=length(u))
        df=data.frame(m)
        colnames(df)=u
        rownames(df)=colnames(y)
        df2=df
        for (i in 1:length(out))
        {
            c=data.frame(coefficients(out[[i]]),stringsAsFactors =FALSE,check.names = FALSE,check.rows=FALSE)
            n=rownames(c) # names of columns
            for (k in 1:length(n)) {
                w=which(colnames(df)==n[k])
                df[i,w]=c$Estimate[k]
                df2[i,w]=c$`Pr(>|t|)`[k]
            }
        }

        M$coefficients=df

        M$p_value=df2

        df3=data.frame('r_squared'=unlist(lapply(out,function(x){return(x$r.squared)})))
        df4=data.frame('adj_r_squared'=unlist(lapply(out,function(x){return(x$adj.r.squared)})))
        rownames(df3)=rownames(df)
        rownames(df4)=rownames(df)
        M$r_squared=df3
        M$adj_r_squared=df4

        # fdr correct the p-values
        M$p_value=as.data.frame(apply(M$p_value,2,FUN=function(x) {p.adjust(x,M$mtc)}))

        M$significant=as.data.frame(M$p_value<M$alpha)

        return(M)
    }
)