Bioconductor Code: structToolbox

Raw Blame Patch Log History
#' feature_boxplot class
#'
#' plots a boxplot of a chosen feature for each group of a dataset
#'
#' @import struct
#' @export feature_boxplot
feature_boxplot<-setClass(
  "feature_boxplot",
  contains=c('chart','stato'),
  slots=c(
    # INPUTS
    params.label_outliers='entity',
    params.feature_to_plot='entity',
    params.factor_name='entity',
    params.show_counts='entity'
  ),
  prototype = list(name='Feature boxplot',
                   description='plots a boxplot of a chosen feature for each group of a dataset.',
                   type="boxlot",
                   params=c('label_outliers','feature_to_plot','factor_name','show_counts'),
                   stato.id='STATO:0000243',

                   params.label_outliers=entity(name='Label outliers',
                                                value=TRUE,
                                                type='logical',
                                                description='(TRUE) of FALSE to print labels for outliers.'
                   ),
                   params.feature_to_plot=entity(name='Feature to plot',
                                                 value='V1',
                                                 type='character',
                                                 description='The column name of the feature to be plotted.'
                   ),
                   params.factor_name=entity(name='Factor name',
                                             value='factor',
                                             type='character',
                                             description='The column name of meta data to use.'
                   ),
                   params.show_counts=entity(name='Show counts',
                                             value=TRUE,
                                             type='logical',
                                             description='(TRUE) or FALSE to display the number of values used to create each boxplot.'
                   )
  )
)

#' @export
setMethod(f="chart.plot",
          signature=c("feature_boxplot",'dataset'),
          definition=function(obj,dobj)
          {
            # get options
            opt=param.list(obj)
            # get data
            Xt=dataset.data(dobj)
            # column name
            if (is.numeric(opt$feature_to_plot)) {
              varn=colnames(Xt)[opt$feature_to_plot]
            } else {
            varn=opt$feature_to_plot
            }
            # get requested column
            Xt=Xt[[opt$feature_to_plot]]
            # meta data
            SM=dataset.sample_meta(dobj)
            SM=SM[[obj$factor_name]]

            # remove NA
            SM=SM[!is.na(Xt)]
            Xt=Xt[!is.na(Xt)]

            # count number of values
            L=levels(SM)
            count=numeric(length(L))
            for (i in 1:length(L)) {
              count[i]=sum(SM==L[i])
            }

            # get color pallete using pmp
            clrs=pmp::createClassAndColors(class = SM)
            SM=clrs$class

            #prep the plot
            temp=data.frame(x=SM,y=Xt)
            p<-ggplot(temp, aes_(x=~x,y=~y,color=~x)) +
              geom_boxplot() +
              xlab(opt$factor) +
              ylab('') +
              ggtitle(varn) +
              scale_colour_manual(values=clrs$manual_colors,name=opt$factor_name) +
              theme_Publication(base_size = 12) +
              theme(legend.position="none")

            if (opt$show_counts) {
              newlabels=as.character(count)
              newlabels=paste0(as.character(L),'\n(n = ',newlabels,')')
              p=p+scale_x_discrete(labels=newlabels)
            }

            if (opt$label_outliers) {
              outliers=numeric()
              for (l in L) {
                IN=which(SM==l)
                outliers=c(outliers,IN[which( Xt[IN]>(quantile(Xt[IN], 0.75) + 1.5*IQR(Xt[IN]))) ] )
                outliers=c(outliers,IN[which( Xt[IN]<(quantile(Xt[IN], 0.25) - 1.5*IQR(Xt[IN]))) ] )
              }
              outlier_df=temp[outliers,]
              outlier_df$out_label=paste0('  ',rownames(dataset.data(dobj)))[outliers]
              p=p+geom_text(data=outlier_df,aes_(group=~x,color=~x,label=~out_label),hjust='left')
            }

            return(p)

          }
)



######################################
######################################


#' mv_histogram class
#'
#' histograms indicating the numbers of missing values per sample/feature
#'
#' @import struct
#' @export mv_histogram
mv_histogram<-setClass(
  "mv_histogram",
  contains='chart',
  slots=c(
    # INPUTS
    params.label_outliers='entity',
    params.by_sample='entity'
  ),
  prototype = list(name='Missing value histogram',
                   description='Histogram ofmissing values per sample/feature.',
                   type="histogram",
                   params=c('by_sample'),
                   params.by_sample=entity(name='Plot by sample or by feature',
                                           value=TRUE,
                                           type='logical',
                                           description='(TRUE) to plot missing values per sample or FALSE to plot by feature.'
                   )
  )
)

#' @export
setMethod(f="chart.plot",
          signature=c("mv_histogram",'dataset'),
          definition=function(obj,dobj)
          {
            # get options
            opt=param.list(obj)
            # get data
            Xt=dataset.data(dobj)
            # meta data
            SM=dataset.sample_meta(dobj)[ ,1]

            if (opt$by_sample)
            {
              # count NS per sample
              count=apply(Xt,1,function(x) {sum(is.na(x))/length(x)*100})
              txt='Missing values per sample'
            } else {
              # count NS per feature
              count=apply(Xt,2,function(x) {sum(is.na(x))/length(x)*100})
              txt='Missing values per feature'
            }

            A=data.frame(x=count)
            p=ggplot (data=A, aes_(x=~x)) + geom_histogram()+
              xlab ("missing values, %")+ ggtitle(txt)+
              xlim (0,100)+
              scale_colour_Publication()+ theme_Publication(base_size = 12)

            return(p)

          }
)


######################################
######################################



#' mv_boxplot class
#'
#' boxplots indicating the numbers of missing values per sample/feature
#'
#' @import struct
#' @export mv_boxplot
mv_boxplot<-setClass(
  "mv_boxplot",
  contains='chart',
  slots=c(
    # INPUTS
    params.label_outliers='entity',
    params.by_sample='entity',
    params.factor_name='entity',
    params.show_counts='entity'
  ),
  prototype = list(name='Missing value boxplots',
                   description='Histogram ofmissing values per sample/feature.',
                   type="histogram",
                   params.label_outliers=entity(name='Label outliers',
                                                value=TRUE,
                                                type='logical',
                                                description='(TRUE) of FALSE to print labels for outliers.'
                   ),
                   params.by_sample=entity(name='Plot by sample or by feature',
                                           value=TRUE,
                                           type='logical',
                                           description='(TRUE) to plot missing values per sample or FALSE to plot by feature.'
                   ),
                   params.factor_name=entity(name='Factor name',
                                             value='factor',
                                             type='character',
                                             description='The name of the factor to be displayed on the plot. Appears on axis and legend titles, for example. By default the column name of the meta data will be used where possible.'
                   ),
                   params.show_counts=entity(name='Show counts',
                                             value=TRUE,
                                             type='logical',
                                             description='(TRUE) or FALSE to display the number of values used to create each boxplot.'
                   )
  )
)

#' @export
setMethod(f="chart.plot",
          signature=c("mv_boxplot",'dataset'),
          definition=function(obj,dobj)
          {
            # get options
            opt=param.list(obj)
            # get data
            Xt=dataset.data(dobj)
            # meta data
            SM=dataset.sample_meta(dobj)[ ,1]

            L=levels(SM)

            if (opt$by_sample)
            {
              # count NS per sample
              count=apply(Xt,1,function(x) {sum(is.na(x))/length(x)*100})
              result=matrix(0,nrow=nrow(Xt),ncol=2)
              result[,1]=count
              result[,2]=SM
              if (sum(result[,1])==0) {
                warning('No missing values were detected')
              }
              txt='Missing values per sample'
              # get color pallete using pmp
              clrs=pmp::createClassAndColors(class = SM)
              A=data.frame(x=clrs$class,y=result[,1])
            } else {
              for (i in 1:length(L)) {
                # count NS per feature per group
                count=apply(Xt[SM==L[i],,drop=FALSE],2,function(x) {sum(is.na(x))/sum(SM==L[i])*100})
                temp=data.frame(y=count,x=L[i])
                if (i==1) {
                  result=temp
                } else {
                  result=rbind(result,temp)
                }
              }
              if (sum(result$y)==0) {
                warning('No missing values were detected')
              }
              txt='Missing values per feature'
              # get color pallete using pmp
              clrs=pmp::createClassAndColors(class = as.factor(result$x))
              A=data.frame(x=clrs$class,y=result$y)
            }


            p=ggplot (data=A, aes_(x=~x,y=~y,color=~x)) +
              geom_boxplot() +
              ggtitle(txt) +
              xlab(opt$factor) +
              ylim (0,100)+
              scale_colour_manual(values=clrs$manual_colors,name=opt$factor_name) +
              theme_Publication(base_size = 12) +
              ylab ("missing values, %") +
              coord_flip()+
              theme(legend.position="none")

            if (opt$show_counts) {
              L=levels(A$x)
              num=numeric(length(L))
              for (i in 1:length(L)) {
                num[i]=sum(A$x==L[i])
              }
              newlabels=as.character(num)
              newlabels=paste0(as.character(L),'\n(n = ',newlabels,')')
              p=p+scale_x_discrete(labels=newlabels)
            }

            if (opt$label_outliers) {
              outliers=numeric()
              for (l in L) {
                IN=which(A$x==l)
                outliers=c(outliers,IN[which( A$y[IN]>(quantile(A$y[IN], 0.75) + 1.5*IQR(A$y[IN]))) ] )
                outliers=c(outliers,IN[which( A$y[IN]<(quantile(A$y[IN], 0.25) - 1.5*IQR(A$y[IN]))) ] )
              }
              outlier_df=A[outliers,]
              if (opt$by_sample) {
                outlier_df$out_label=paste0('  ',rownames(dataset.data(dobj)))[outliers]
              } else
              {
                outlier_df$out_label=paste0('  ',rep(colnames(dataset.data(dobj)),length(L))[outliers])
              }
              p=p+geom_text(data=outlier_df,aes_(group=~x,color=~x,label=~out_label,angle =~ 90),hjust='left')
            }

            return(p)
          }
)

#' distribution plot class
#'
#' visualise distributions of features/samples
#'
#' @import struct
#' @export dataset.dist
dataset.dist<-setClass(
  "dataset.dist",
  contains='chart',
  slots=c(
    # INPUTS
    params.factor_name='entity',
    params.per_class='entity'
  ),
  prototype = list(name='Distribution plot',
                   description='Plot of the distribution of all values in the data matrix.',
                   type="boxlot",
                   params=c('factor_name','per_class'),

                   params.factor_name=entity(name='Factor name',
                                             value='factor',
                                             type='character',
                                             description='The name of the factor to be displayed on the plot. Appears on axis and legend titles, for example. By default the column name of the meta data will be used where possible.'
                   ),
                   params.per_class=entity(name='Plot per class',
                                           value=TRUE,
                                           type='logical',
                                           description='[TRUE] or FALSE to plot distributions by class.'
                   )
  )
)

#' @export
#' @import pmp
setMethod(f="chart.plot",
          signature=c("dataset.dist",'dataset'),
          definition=function(obj,dobj)
          {
            opt=param.list(obj)
            X=as.matrix(dataset.data(dobj))
            S=dataset.sample_meta(dobj)[[opt$factor_name]]

            if (opt$per_class) {
              L=levels(S)
              for (k in L) {
                M=X[S==k,,drop=FALSE]
                if (k==L[1]) {
                  temp=data.frame(values=as.vector(M),group=k)
                } else {
                  temp=rbind(temp,data.frame(values=as.vector(M),group=k))
                }
              }
              out=ggplot(data=temp, aes_(x=~values,color=~group))+
                geom_freqpoly(bins=100)
            } else {
              temp=data.frame(values=as.vector(X),group='Sample')
              out=ggplot(data=temp, aes_(x=~values,color=~group)) +
                geom_freqpoly(bins=100,color='black')
            }
            out = out  +
              xlab('Values') +
              ylab('Density') +
              scale_colour_Publication(name=opt$factor_name)+
              theme_Publication(base_size = 12)

            return(out)

          }
)

#' distribution plot class
#'
#' visualise distributions of features/samples
#'
#' @import struct
#' @export dataset.boxplot
dataset.boxplot<-setClass(
  "dataset.boxplot",
  contains='chart',
  slots=c(
    # INPUTS
    params.factor_name='entity',
    params.by_sample='entity',
    params.per_class='entity',
    params.number='entity'
  ),
  prototype = list(name='Distribution plot',
                   description='Plot of the distribution of all values in the data matrix.',
                   type="boxlot",
                   params.factor_name=entity(name='Factor name',
                                             value='factor',
                                             type='character',
                                             description='The name of the factor to be displayed on the plot. Appears on axis and legend titles, for example. By default the column name of the meta data will be used where possible.'
                   ),
                   params.by_sample=entity(name='Plot by sample or by feature',
                                           value=TRUE,
                                           type='logical',
                                           description='[TRUE] to plot by sample or FALSE to plot by feature.'
                   ),
                   params.per_class=entity(name='Plot per class',
                                           value=TRUE,
                                           type='logical',
                                           description='[TRUE] or FALSE to plot distributions by class.'
                   ),
                   params.number=entity(name='Number of features/samples',
                                         value=50,
                                         type='logical',
                                         description='The number of features/samples to plot.'
                   )
  )
)

#' @export
#' @import pmp
setMethod(f="chart.plot",
          signature=c("dataset.boxplot",'dataset'),
          definition=function(obj,dobj)
          {
            opt=param.list(obj)
            X=dataset.data(dobj)
            SM=dataset.sample_meta(dobj)
            if (!opt$by_sample) {
              s=seq(from = 1, to = ncol(X), length.out = opt$number)
            } else {
              s=seq(from = 1, to = nrow(X), length.out = opt$number)
              X=t(X)
            }
            s=unique(floor(s))

            for (i in s) {
              if (!opt$by_sample){
                sm=SM[,1]
              } else {
                sm=SM[i,1]
              }
              if (!opt$per_class) {
                sm='Sample'
              }
              if (i==s[1]) {
                temp=data.frame(value=X[,i],feature=colnames(X)[i],group=sm)
              } else {
                temp=rbind(temp,data.frame(value=X[,i],feature=colnames(X)[i],group=sm))
              }
            }
            out=ggplot(data=temp, aes_(x=~feature,y=~value,color=~group)) +
              geom_boxplot()+
              xlab('Features') +
              ylab('Values') +
              scale_colour_Publication(name=opt$factor_name)+
              theme_Publication(base_size = 12)+
              coord_flip() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
              if  (!opt$per_class) {
                  out=out+theme(legend.position="none")
              }
            return(out)

          }
)





#' plots to compare distributions
#'
#' a combination of plots to compare two datasets
#'
#' @import struct
#' @export compare_dist
compare_dist<-setClass(
  "compare_dist",
  contains='chart',
  slots=c(params.factor_name='entity'),
  prototype = list(name='Compare distributions',
    description='Distributions and box plots to compare two datasets',
    type="mixed",
    params.factor_name=entity(name='Factor name',
      value='factor',
      type='character',
      description='The name of the factor to be displayed on the plot. Appears on axis and legend titles, for example.'
    )

  )
)

#' @export
#' @import gridExtra
setMethod(f="chart.plot",
          signature=c("compare_dist",'dataset'),
          definition=function(obj,dobj,eobj)
          {
            C=dataset.boxplot(by_sample=FALSE,per_class=FALSE,number=30,factor_name=obj$factor_name)

            C1=chart.plot(C,dobj)+
              labs(tag='c)')+
              theme(axis.text.x = element_text(angle = 90, hjust = 1))+ggtitle(NULL,'Before processing')

            C2=chart.plot(C,eobj)+
              labs(tag='d)')+
              theme(axis.text.x = element_text(angle = 90, hjust = 1))+ggtitle(NULL,'After processing')

            C=dataset.dist(factor_name=obj$factor_name,per_class=TRUE)

            C3=chart.plot(C,dobj)+
              labs(tag='a)')+
              theme(legend.position="none",axis.text.x = element_text(angle = 90, hjust = 1))+ggtitle(NULL,'Before processing')

            C4=chart.plot(C,eobj)+
              labs(tag='b)')+
              theme(legend.position="none",axis.text.x = element_text(angle = 90, hjust = 1))+ggtitle(NULL,'After processing')

            rC1=ggplot_build(C1)$layout$panel_scales_y[[1]]$range$range
            rC2=ggplot_build(C2)$layout$panel_scales_y[[1]]$range$range
            rC3=ggplot_build(C3)$layout$panel_scales_x[[1]]$range$range
            rC4=ggplot_build(C4)$layout$panel_scales_x[[1]]$range$range
            rC1=max(abs(rC1))
            rC2=max(abs(rC2))
            rC3=max(abs(rC3))
            rC4=max(abs(rC4))

            C1=C1+ylim(c(-rC1,rC1))
            C3=C3+xlim(c(-rC1,rC1))
            C2=C2+ylim(c(-rC2,rC2))
            C4=C4+xlim(c(-rC2,rC2))


            gA <- ggplotGrob(C3)
            gB <- ggplotGrob(C4)
            gC <- ggplotGrob(C1)
            gD <- ggplotGrob(C2)

            L=matrix(nrow=4,ncol=1)
            L=as.matrix(rbind(c(1,2),c(3,4),c(3,4),c(3,4)))

            temp=gtable_cbind(gA,gB)
            heights1=temp$heights
            gA$heights=heights1
            gB$heights=heights1

            temp=gtable_cbind(gC,gD)
            heights2=temp$heights
            gC$heights=heights2
            gD$heights=heights2

            maxWidth = grid::unit.pmax(gA$widths[2:5], gB$widths[2:5],gC$widths[2:5],gD$widths[2:5])
            gA$widths[2:5] <- as.list(maxWidth)
            gB$widths[2:5] <- as.list(maxWidth)
            gC$widths[2:5] <- as.list(maxWidth)
            gD$widths[2:5] <- as.list(maxWidth)

            p=grid.arrange(grobs=list(gA,gB,gC,gD),layout_matrix=L)

            return(p)

          }
)


#############################################################
#############################################################

#' feature_scatter class
#'
#' plots a scatterplot of a chosen feature against a chosen factor
#'
#' @import struct
#' @export feature_scatter
feature_scatter<-setClass(
  "feature_scatter",
  contains=c('chart'),
  slots=c(
    # INPUTS
    params.feature_to_plot='entity',
    params.factor_name='entity'
  ),
  prototype = list(name='Feature boxplot',
    description='plots a boxplot of a chosen feature for each group of a dataset.',
    type="scatter",

    params.feature_to_plot=entity(name='Feature to plot',
      value='V1',
      type='character',
      description='The column name of the feature to be plotted.'
    ),
    params.factor_name=entity(name='Factor name',
      value='factor',
      type='character',
      description='The column name of meta data to use.'
    )
  )
)

#' @export
setMethod(f="chart.plot",
  signature=c("feature_scatter",'dataset'),
  definition=function(obj,dobj)
  {
    # get options
    opt=param.list(obj)
    # get data
    Xt=dataset.data(dobj)
    # column name
    if (is.numeric(opt$feature_to_plot)) {
      varn=colnames(Xt)[opt$feature_to_plot]
    } else {
      varn=opt$feature_to_plot
    }
    # get requested column
    Xt=Xt[[opt$feature_to_plot]]
    # meta data
    SM=dataset.sample_meta(dobj)
    SM=SM[[obj$factor_name]]

    # prep the plot
    temp=data.frame(x=SM,y=Xt)
    p<-ggplot(temp, aes_(x=~x,y=~y,color=~x)) +
      geom_boxplot() +
      xlab(opt$factor) +
      ylab('') +
      ggtitle(varn) +
      scale_colour_manual(values=clrs$manual_colors,name=opt$factor_name) +
      theme_Publication(base_size = 12) +
      theme(legend.position="none")

    if (opt$show_counts) {
      newlabels=as.character(count)
      newlabels=paste0(as.character(L),'\n(n = ',newlabels,')')
      p=p+scale_x_discrete(labels=newlabels)
    }

    if (opt$label_outliers) {
      outliers=numeric()
      for (l in L) {
        IN=which(SM==l)
        outliers=c(outliers,IN[which( Xt[IN]>(quantile(Xt[IN], 0.75) + 1.5*IQR(Xt[IN]))) ] )
        outliers=c(outliers,IN[which( Xt[IN]<(quantile(Xt[IN], 0.25) - 1.5*IQR(Xt[IN]))) ] )
      }
      outlier_df=temp[outliers,]
      outlier_df$out_label=paste0('  ',rownames(dataset.data(dobj)))[outliers]
      p=p+geom_text(data=outlier_df,aes_(group=~x,color=~x,label=~out_label),hjust='left')
    }

    return(p)

  }
)



#############################################################
#############################################################

#' dataset.heatmap class
#'
#' plots a dataset as a heatmap
#'
#' @import struct
#' @import reshape2
#' @export dataset.heatmap
dataset.heatmap<-setClass(
  "dataset.heatmap",
  contains=c('chart'),
  slots=c(
    # INPUTS
    params.na_colour='entity'
  ),
  prototype = list(name='Dataset heatmap',
    description='plots a heatmap of a dataset',
    type="scatter",

    params.na_colour=entity(name='NA colour',
      value='#FF00E4',
      type='character',
      description='A hex colour code to use for missing values'
    )
  )
)

#' @export
setMethod(f="chart.plot",
  signature=c("dataset.heatmap",'dataset'),
  definition=function(obj,dobj)
  {
    X=melt(as.matrix(dobj$data))
    colnames(X)=c('Sample','Feature','Peak area')
    p=ggplot(data=X,aes(x=`Feature`,y=`Sample`,fill=`Peak area`)) + geom_raster() +
      scale_colour_Publication()+
      theme_Publication(base_size = 12)+
      scale_fill_viridis_c(na.value=obj$na_colour)+
      theme(axis.text.x=element_blank(),
            axis.ticks.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank()
        )


    return(p)

  }
)