Bioconductor Code: structToolbox

Raw Blame Patch Log History
#' HSD model class
#'
#' HSD model class. Tukey's honest significant difference for ANOVA objects
#'
#' @import struct
#' @import stats
#' @import agricolae
#' @include anova_class.R
#' @export HSD
HSD<-setClass(
  "HSD",
  contains=c('method','stato'),
  slots=c(
    # INPUTS
    params.alpha='entity.stato',
    params.mtc='entity.stato',
    params.formula='entity',
    params.unbalanced='entity',
    # OUTPUTS
    outputs.difference='data.frame',
    outputs.UCL='data.frame',
    outputs.LCL='data.frame',
    # outputs.means='data.frame',
    # outputs.sd='data.frame',
    # outputs.counts='data.frame',
    outputs.p_value='entity.stato',
    outputs.significant='entity'
  ),
  prototype = list(name='Tukey Honest Significant Difference',
    description='Tukey HSD post hoc test abblied to ANOVA object.',
    type="univariate",
    predicted='p_value',
    stato.id="STATO:0000187",

    params.alpha=entity.stato(name='Confidence level',
      stato.id='STATO:0000053',
      value=0.05,
      type='numeric',
      description='the p-value cutoff for determining significance.'
    ),
    params.mtc=entity.stato(name='Multiple Test Correction method',
      stato.id='OBI:0200089',
      value='none',
      type='numeric',
      description='The method used to adjust for multiple comparisons.'
    ),
    params.unbalanced=entity(name='Unbalanced model',
      description='TRUE or [FALSE]. Apply correction for unbalanced designs.',
      value=FALSE,
      type='logical'
    ),
    outputs.p_value=entity.stato(name='p value',
      stato.id='STATO:0000175',
      type='numeric',
      description='the probability of observing the calculated t-statistic.'
    ),
    outputs.significant=entity(name='Significant features',
      #stato.id='STATO:0000069',
      type='logical',
      description='TRUE if the calculated p-value is less than the supplied threhold (alpha)'
    )
  )
)

#' @export
setMethod(f="method.apply",
  signature=c("HSD",'dataset'),
  definition=function(M,D) {
    X=dataset.data(D)
    var_names=all.vars(M$formula)
    var_names_1=var_names[1]
    var_names=var_names[-1]
    y=dataset.sample_meta(D)[var_names]

    # attempt to detect within factors
    within=which(all.names(M$formula) %in% all.names(M$formula)[which('Error'== all.names(M$formula))+2])
    if (length(within)>0) {
      var_names_ex=var_names[-within]
    } else {
      var_names_ex=var_names
    }

    FF=full_fact(var_names_ex)
    FF=apply(FF,1,function(x) var_names_ex[x==1])
    FF=FF[-1]

    # set the contrasts
    O=options('contrasts') # keep the old ones
    options(contrasts = c("contr.sum","contr.poly"))

    output=apply(X,2,function(x) {
      temp=y
      temp[[var_names_1]]=x

      # check for alias columns
      ALIAS=FALSE
      dona=FALSE

      # check n levels
      temp2=na.omit(temp)
      s=sapply(var_names,function(x) summary(temp2[[x]]))
      n=nrow(temp2)

      if (any(s<=2)) { # at least one level has 1 samples
        dona=TRUE
      }

      if (all(s>2)) {
        al=alias(params$formula,data=temp)
        if ('Complete' %in% names(al)) {
          dona=TRUE
        }
      }

      # check for enough samples
      temp3=temp
      temp3[[var_names_1]]=rnorm(nrow(y))
      LM=lm(formula=M$formula,data=temp3)
      p=nrow(summary(LM)$coefficients)
      if (n<p) {
        dona=TRUE
      }

      if (dona) {
        # missing values have probably prevented one or more combinations of levels being present
        # use some fake data to generate the output table then replace all the values with NA
        temp[[var_names_1]]=rnorm(nrow(y))
        ALIAS=TRUE
      }
      LM=lm(formula=M$formula,data=temp)


      # for each combination of factors...
      out2=lapply(FF,function(x) {
        A=HSD.test(LM,x,group = FALSE)$comparison
        if (ALIAS) {
          A[!is.na(A)]=NA # replace with NA if alias are present
        }
        return(A)
      })
      out2=do.call(rbind,out2)

      return(out2)
    })


    p_value=lapply(output,function(x) x$pvalue)
    p_value=do.call(rbind,p_value)
    M$p_value=data.frame(p_value)
    colnames(M$p_value)=rownames(output[[1]])

    # fdr correct
    M$p_value=apply(M$p_value,2,p.adjust,method=M$mtc)

    M$significant=M$p_value<M$alpha

    difference=lapply(output,function(x) x$difference)
    difference=do.call(rbind,difference)
    M$difference=data.frame(difference)
    colnames(M$difference)=rownames(output[[1]])

    UCL=lapply(output,function(x) x$UCL)
    UCL=do.call(rbind,UCL)
    M$UCL=data.frame(UCL)
    colnames(M$UCL)=rownames(output[[1]])

    LCL=lapply(output,function(x) x$LCL)
    LCL=do.call(rbind,LCL)
    M$LCL=data.frame(LCL)
    colnames(M$LCL)=rownames(output[[1]])

    options(O)

    return(M)
  }
)



full_fact = function(f,N=length(f),M=NULL) {
  if (N==length(f)) {
    M=matrix(c(0,1),nrow=2,ncol=1)
    if (N>1) {
      M=full_fact(f,N-1,M)
    }
  } else {
    M0=M1=M
    M0=cbind(M0,matrix(0,nrow=nrow(M)))
    M1=cbind(M1,matrix(1,nrow=nrow(M)))
    M=rbind(M0,M1)
    if (N>1) {
      M=full_fact(f,N-1,M)
    }
  }
  return(M[order(rowSums(M)),,drop=FALSE])
}