Bioconductor Code: structToolbox

Raw Blame Patch Log History
#' @eval get_description('feature_boxplot')
#' @examples
#' D = MTBLS79_DatasetExperiment
#' C = feature_boxplot(factor_name='Species',feature_to_plot='Petal.Width')
#' chart_plot(C,D)
#' @export feature_boxplot
feature_boxplot = function(label_outliers=TRUE,feature_to_plot,
                           factor_name,show_counts=TRUE,style='boxplot',jitter=FALSE,fill=FALSE,...) {
  out=struct::new_struct('feature_boxplot',
                         label_outliers=label_outliers,
                         feature_to_plot=feature_to_plot,
                         factor_name=factor_name,
                         show_counts=show_counts,
                         style=style,
                         jitter=jitter,
                         fill=fill,
                         ...)
  return(out)
}


.feature_boxplot<-setClass(
  "feature_boxplot",
  contains=c('chart'),
  slots=c(
    # INPUTS
    label_outliers='entity',
    feature_to_plot='entity',
    factor_name='entity',
    show_counts='entity',
    style='enum',
    jitter='entity',
    fill='entity'
  ),
  prototype = list(name='Feature boxplot',
                   description='A boxplot to visualise the distribution of values within a feature.',
                   type="boxlot",
                   .params=c('label_outliers','feature_to_plot','factor_name','show_counts','style','jitter','fill'),
                   
                   label_outliers=entity(name='Label outliers',
                                         value=TRUE,
                                         type='logical',
                                         description=c(
                                           'TRUE' = 'The index for outlier samples is included on the plot.',
                                           'FALSE' = 'No labels are displayed.'
                                         )
                   ),
                   feature_to_plot=entity(name='Feature to plot',
                                          value='V1',
                                          type=c('character','numeric','integer'),
                                          description='The column name of the plotted feature.'
                   ),
                   factor_name=ents$factor_name,
                   show_counts=ents$show_counts,
                   style=enum(
                     name='Plot style',
                     description=c(
                       'boxplot' = 'boxplot style',
                       'violin' = 'violon plot style'
                     ),
                     allowed=c('boxplot','violin'),
                     value='boxplot',
                     max_length=1,
                     type='character'
                   ),
                   jitter=entity(
                     name = 'Jitter',
                     description = 'Include points plotted with added jitter.',
                     value=FALSE,
                     type='logical',
                     max_length=1
                   ),
                   fill=entity(
                     name = 'Fill',
                     description = 'Block fill the boxes or violins with the group colour.',
                     value=FALSE,
                     type='logical',
                     max_length=1
                   )
  )
)

#' @export
#' @template chart_plot
setMethod(f="chart_plot",
          signature=c("feature_boxplot",'DatasetExperiment'),
          definition=function(obj,dobj)
          {
            # get options
            opt=param_list(obj)
            # get data
            Xt=dobj$data
            # column name
            if (is.numeric(opt$feature_to_plot)) {
              varn=colnames(Xt)[opt$feature_to_plot]
            } else {
              varn=opt$feature_to_plot
            }
            # get requested column
            Xt=Xt[[opt$feature_to_plot]]
            # meta data
            SM=dobj$sample_meta
            SM=SM[[obj$factor_name]]
            names(SM)=rownames(dobj)
            
            # remove NA
            SM=SM[!is.na(Xt)]
            Xt=Xt[!is.na(Xt)]
            
            # get color pallete using pmp
            clrs= createClassAndColors(class = SM)
            SM=clrs$class
            
            # count number of values
            L=levels(SM)
            count=numeric(length(L))
            for (i in 1:length(L)) {
              count[i]=sum(SM==L[i])
            }
            
            #prep the plot
            temp=data.frame(x=SM,y=Xt,row.names=names(SM))
            
            if (obj$fill) {
              A = aes_string(x='x',y='y',color='x',fill='x')
            } else {
              A = aes_string(x='x',y='y',color='x')
            }
            
            
            p<-ggplot(temp, A) 
            
            if (obj$style=='boxplot') {
              p=p+geom_boxplot(alpha=0.3)
            } else {
              p=p+geom_violin(alpha=0.3, draw_quantiles = c(0.25, 0.5, 0.75), 
                              scale = "width", adjust = .5)
            }
            
            if (obj$jitter) {
              p=p+geom_jitter(show.legend = F, width = 0.1)
            }
            p=p+xlab(opt$factor) +
              ylab('') +
              ggtitle(varn) +
              scale_colour_manual(values=clrs$manual_colors,name=opt$factor_name)
            
            p=p+scale_fill_manual(values=clrs$manual_colors,name=opt$factor_name)
            
            p=p+theme_Publication(base_size = 12) +
              theme(legend.position="none")
            
            if (opt$show_counts) {
              newlabels=as.character(count)
              newlabels=paste0(as.character(L),'\n(n = ',newlabels,')')
              p=p+scale_x_discrete(labels=newlabels)
            }
            
            if (opt$label_outliers) {
              outliers=numeric()
              for (l in L) {
                IN=which(SM==l)
                outliers=c(outliers,IN[which( Xt[IN]>(quantile(Xt[IN], 0.75) + 1.5*IQR(Xt[IN]))) ] )
                outliers=c(outliers,IN[which( Xt[IN]<(quantile(Xt[IN], 0.25) - 1.5*IQR(Xt[IN]))) ] )
              }
              outlier_df=temp[outliers,]
              outlier_df$out_label=paste0('  ',rownames(temp))[outliers]
              p=p+geom_text(data=outlier_df,aes_(group=~x,color=~x,label=~out_label),hjust='left')
            }
            
            return(p)
            
          }
)




######################################
######################################


#' @eval get_description('mv_histogram')
#' @examples
#' D = MTBLS79_DatasetExperiment()
#' C = mv_histogram(label_outliers=FALSE,by_sample=FALSE)
#' chart_plot(C,D)
#'
#' @import struct
#' @param ... additional slots and values passed to struct_class
#' @return struct object
#' @export mv_histogram
mv_histogram = function(label_outliers=TRUE,by_sample=TRUE,...) {
  out=struct::new_struct('mv_histogram',
                         label_outliers=label_outliers,
                         by_sample=by_sample,
                         ...)
  return(out)
}


.mv_histogram<-setClass(
  "mv_histogram",
  contains='chart',
  slots=c(
    # INPUTS
    label_outliers='entity',
    by_sample='entity'
  ),
  prototype = list(name='Missing value histogram',
                   description='A histogram of the numbers of missing values per sample/feature',
                   type="histogram",
                   .params=c('label_outliers','by_sample'),
                   
                   by_sample=ents$by_sample,
                   label_outliers=ents$label_outliers
  )
)

#' @export
#' @template chart_plot
setMethod(f="chart_plot",
          signature=c("mv_histogram",'DatasetExperiment'),
          definition=function(obj,dobj)
          {
            # get options
            opt=param_list(obj)
            # get data
            Xt=dobj$data
            # meta data
            SM=dobj$sample_meta[ ,1]
            
            if (opt$by_sample) {
              # count NS per sample
              count=apply(Xt,1,function(x) {sum(is.na(x))/length(x)*100})
              txt='Missing values per sample'
            } else {
              # count NS per feature
              count=apply(Xt,2,function(x) {sum(is.na(x))/length(x)*100})
              txt='Missing values per feature'
            }
            
            A=data.frame(x=count)
            p=ggplot (data=A, aes_(x=~x)) + geom_histogram()+
              xlab ("missing values, %")+ ggtitle(txt)+
              xlim (0,100)+
              scale_colour_Publication()+ theme_Publication(base_size = 12)
            
            return(p)
            
          }
)


######################################
######################################


#' @eval get_description('mv_boxplot')
#' @examples
#' D = MTBLS79_DatasetExperiment()
#' C = mv_boxplot(factor_name='Class')
#' chart_plot(C,D)
#'
#' @import struct
#' @export mv_boxplot
mv_boxplot = function(label_outliers=TRUE,by_sample=TRUE,factor_name,show_counts=TRUE,...) {
  out=struct::new_struct('mv_boxplot',
                         label_outliers=label_outliers,
                         by_sample=by_sample,
                         factor_name=factor_name,
                         show_counts=show_counts,
                         ...)
  return(out)
}


.mv_boxplot<-setClass(
  "mv_boxplot",
  contains='chart',
  slots=c(
    # INPUTS
    label_outliers='entity',
    by_sample='entity',
    factor_name='entity',
    show_counts='entity'
  ),
  prototype = list(name='Missing value boxplots',
                   description='Boxplots of the number of missing values per sample/feature.',
                   type="boxplot",
                   .params=c('label_outliers','by_sample','factor_name','show_counts'),
                   
                   by_sample=ents$by_sample,
                   label_outliers=ents$label_outliers,
                   factor_name=ents$factor_name,
                   show_counts=ents$show_counts
  )
)

#' @export
#' @template chart_plot
setMethod(f="chart_plot",
          signature=c("mv_boxplot",'DatasetExperiment'),
          definition=function(obj,dobj) {
            # get options
            opt=param_list(obj)
            # get data
            Xt=dobj$data
            # meta data
            SM=dobj$sample_meta[ ,obj$factor_name]
            
            L=levels(SM)
            
            if (opt$by_sample) {
              # count NS per sample
              count=apply(Xt,1,function(x) {sum(is.na(x))/length(x)*100})
              result=matrix(0,nrow=nrow(Xt),ncol=2)
              result[,1]=count
              result[,2]=SM
              if (sum(result[,1])==0) {
                warning('No missing values were detected')
              }
              txt='Missing values per sample'
              # get color pallete using pmp
              clrs= createClassAndColors(class = SM)
              A=data.frame(x=clrs$class,y=result[,1])
            } else {
              for (i in 1:length(L)) {
                # count NS per feature per group
                count=apply(Xt[SM==L[i],,drop=FALSE],2,function(x) {sum(is.na(x))/sum(SM==L[i])*100})
                temp=data.frame(y=count,x=L[i])
                if (i==1) {
                  result=temp
                } else {
                  result=rbind(result,temp)
                }
              }
              if (sum(result$y)==0) {
                warning('No missing values were detected')
              }
              txt='Missing values per feature'
              # get color pallete using pmp
              clrs= createClassAndColors(class = as.factor(result$x))
              A=data.frame(x=clrs$class,y=result$y)
            }
            
            
            p=ggplot (data=A, aes_(x=~x,y=~y,color=~x)) +
              geom_boxplot() +
              ggtitle(txt) +
              xlab(opt$factor) +
              ylim (0,100)+
              scale_colour_manual(values=clrs$manual_colors,name=opt$factor_name) +
              theme_Publication(base_size = 12) +
              ylab ("missing values, %") +
              coord_flip()+
              theme(legend.position="none")
            
            if (opt$show_counts) {
              L=levels(A$x)
              num=numeric(length(L))
              for (i in 1:length(L)) {
                num[i]=sum(A$x==L[i])
              }
              newlabels=as.character(num)
              newlabels=paste0(as.character(L),'\n(n = ',newlabels,')')
              p=p+scale_x_discrete(labels=newlabels)
            }
            
            if (opt$label_outliers) {
              outliers=numeric()
              for (l in L) {
                IN=which(A$x==l)
                outliers=c(outliers,IN[which( A$y[IN]>(quantile(A$y[IN], 0.75) + 1.5*IQR(A$y[IN]))) ] )
                outliers=c(outliers,IN[which( A$y[IN]<(quantile(A$y[IN], 0.25) - 1.5*IQR(A$y[IN]))) ] )
              }
              outlier_df=A[outliers,]
              
              if (length(outliers)>0){
                if (opt$by_sample) {
                  outlier_df$out_label=paste0('  ',rownames(dobj$data))[outliers]
                } else
                {
                  outlier_df$out_label=paste0('  ',rep(colnames(dobj$data),length(L))[outliers])
                }
                p=p+geom_text(data=outlier_df,aes_(group=~x,color=~x,label=~out_label,angle =~ 90),hjust='left')
              }
              
            }
            
            return(p)
          }
)

#' @eval get_description('DatasetExperiment_dist')
#' @examples
#' D = MTBLS79_DatasetExperiment()
#' C = DatasetExperiment_dist(factor_name='Class')
#' chart_plot(C,D)
#' @import struct
#' @export DatasetExperiment_dist
DatasetExperiment_dist = function(factor_name,per_class=TRUE,...) {
  out=struct::new_struct('DatasetExperiment_dist',
                         factor_name=factor_name,
                         per_class=per_class,
                         ...)
  return(out)
}

.DatasetExperiment_dist<-setClass(
  "DatasetExperiment_dist",
  contains='chart',
  slots=c(
    # INPUTS
    factor_name='entity',
    per_class='entity'
  ),
  prototype = list(name='Feature distribution histogram',
                   description=paste0('A histogram to visualise the distribution of ',
                                      'values within features.'),
                   type="histogram",
                   .params=c('factor_name','per_class'),
                   
                   factor_name=ents$factor_name,
                   per_class=entity(name='Plot per class',
                                    value=TRUE,
                                    type='logical',
                                    description=c(
                                      "TRUE" = 'The distributions are plotted for each class.',
                                      "FALSE" = 'The distribution is plotted for all samples')
                   )
  )
)

#' @export
#' @template chart_plot
setMethod(f="chart_plot",
          signature=c("DatasetExperiment_dist",'DatasetExperiment'),
          definition=function(obj,dobj)
          {
            opt=param_list(obj)
            X=as.matrix(dobj$data)
            S=dobj$sample_meta[[opt$factor_name]]
            
            if (opt$per_class) {
              L=levels(S)
              for (k in L) {
                M=X[S==k,,drop=FALSE]
                if (k==L[1]) {
                  temp=data.frame(values=as.vector(M),group=k)
                } else {
                  temp=rbind(temp,data.frame(values=as.vector(M),group=k))
                }
              }
              out=ggplot(data=temp, aes_(x=~values,color=~group))+
                geom_freqpoly(bins=100)
            } else {
              temp=data.frame(values=as.vector(X),group='Sample')
              out=ggplot(data=temp, aes_(x=~values,color=~group)) +
                geom_freqpoly(bins=100,color='black')
            }
            out = out  +
              xlab('Values') +
              ylab('Density') +
              scale_colour_Publication(name=opt$factor_name)+
              theme_Publication(base_size = 12)
            
            return(out)
            
          }
)

#' @eval get_description('DatasetExperiment_boxplot')
#' @examples
#' D = MTBLS79_DatasetExperiment()
#' C = DatasetExperiment_boxplot(factor_name='Class',number=10,per_class=FALSE)
#' chart_plot(C,D)
#' @return struct object
#' @export DatasetExperiment_boxplot
DatasetExperiment_boxplot = function(factor_name,by_sample=TRUE,per_class=TRUE,number=50,...) {
  out=struct::new_struct('DatasetExperiment_boxplot',
                         factor_name=factor_name,
                         by_sample=by_sample,
                         per_class=per_class,
                         number=number,
                         ...)
  return(out)
}


.DatasetExperiment_boxplot<-setClass(
  "DatasetExperiment_boxplot",
  contains='chart',
  slots=c(
    # INPUTS
    factor_name='entity',
    by_sample='entity',
    per_class='entity',
    number='entity'
  ),
  prototype = list(name='Feature distribution histogram',
                   description=paste0('A boxplot to visualise the distribution of ',
                                      'values within a subset of features.'),
                   type="boxplot",
                   .params=c('factor_name','by_sample','per_class','number'),
                   
                   factor_name=ents$factor_name,
                   by_sample=entity(name='Plot by sample',
                                    value=TRUE,
                                    type='logical',
                                    description=c(
                                      'TRUE' = 'The data is plotted across features for a subset of samples.',
                                      'FALSE' = 'The data is plotted across samples for a subset of features.')
                   ),
                   per_class=entity(name='Plot per class',
                                    value=TRUE,
                                    type='logical',
                                    description=c(
                                      "TRUE" = 'The data is plotted for each class.',
                                      "FALSE" = 'The data is plotted for all samples')
                   ),
                   number=entity(name='Number of features/samples',
                                 value=50,
                                 type=c('numeric','integer'),
                                 description='The number of features/samples plotted.',
                                 max_length=1
                   )
  )
)

#' @export
#' @template chart_plot
setMethod(f="chart_plot",
          signature=c("DatasetExperiment_boxplot",'DatasetExperiment'),
          definition=function(obj,dobj)
          {
            opt=param_list(obj)
            X=dobj$data
            SM=dobj$sample_meta
            if (!opt$by_sample) {
              #s=sample(ncol(X),min(c(ncol(X),opt$number)),replace=FALSE)
              s=seq(from=1,to=ncol(X),length.out = min(c(opt$number,ncol(X))))
              ylabel='Features'
            } else {
              #s=sample(nrow(X),min(c(nrow(X),opt$number)),replace=FALSE)
              s=seq(from=1,to=nrow(X),length.out = min(c(opt$number,nrow(X))))
              X=as.data.frame(t(X))
              colnames(X)=rownames(dobj$data)
              ylabel='Samples'
            }
            s=unique(floor(s))
            
            for (i in s) {
              if (!opt$by_sample){
                sm=SM[[obj$factor_name]]
                fn=row
              } else {
                sm=SM[[obj$factor_name]][i]
              }
              if (!opt$per_class) {
                sm='Sample'
              }
              if (i==s[1]) {
                temp=data.frame(value=X[,i],feature=colnames(X)[i],group=sm)
              } else {
                temp=rbind(temp,data.frame(value=X[,i],feature=colnames(X)[i],group=sm))
              }
            }
            out=ggplot(data=temp, aes_(x=~feature,y=~value,color=~group)) +
              geom_boxplot()+
              xlab(ylabel) +
              ylab('Values') +
              scale_colour_Publication(name=opt$factor_name)+
              theme_Publication(base_size = 12)+
              coord_flip() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
            if  (!opt$per_class) {
              out=out+theme(legend.position="none")
            }
            return(out)
            
          }
)


#' @eval get_description('compare_dist')
#' @examples
#' D1=MTBLS79_DatasetExperiment(filtered=FALSE)
#' D2=MTBLS79_DatasetExperiment(filtered=TRUE)
#' C = compare_dist(factor_name='Class')
#' chart_plot(C,D1,D2)
#' @import struct
#' @export compare_dist
compare_dist = function(factor_name,...) {
  out=struct::new_struct('compare_dist',
                         factor_name=factor_name,
                         ...)
  return(out)
}

.compare_dist<-setClass(
  "compare_dist",
  contains=c('chart','stato'),
  slots=c(factor_name='entity'),
  prototype = list(name='Compare distributions',
                   description=paste0('Histograms and boxplots computed across samples ',
                                      'and features are used to visually compare two datasets e.g. before ',
                                      'and after filtering and/or normalisation.'),
                   type="mixed",
                   stato_id='STATO:0000161',
                   .params=c('factor_name'),
                   factor_name=ents$factor_name
  )
)

#' @export
#' @import gridExtra
#' @template chart_plot
#' @param eobj a second DatasetExperiment object to compare with the first
setMethod(f="chart_plot",
          signature=c("compare_dist",'DatasetExperiment'),
          definition=function(obj,dobj,eobj)
          {
            
            # match features across datasets
            inboth=intersect(colnames(dobj),colnames(eobj))
            dobj=dobj[,inboth]
            eobj=eobj[,inboth]
            
            C=DatasetExperiment_boxplot(by_sample=FALSE,per_class=FALSE,number=30,factor_name=obj$factor_name)
            
            C1=chart_plot(C,dobj)+
              labs(tag='c)')+
              theme(axis.text.x = element_text(angle = 90, hjust = 1))+ggtitle(NULL,'Before processing')
            
            C2=chart_plot(C,eobj)+
              labs(tag='d)')+
              theme(axis.text.x = element_text(angle = 90, hjust = 1))+ggtitle(NULL,'After processing')
            
            C=DatasetExperiment_dist(factor_name=obj$factor_name,per_class=TRUE)
            
            C3=chart_plot(C,dobj)+
              labs(tag='a)')+
              theme(legend.position="none",axis.text.x = element_text(angle = 90, hjust = 1))+ggtitle(NULL,'Before processing')
            
            C4=chart_plot(C,eobj)+
              labs(tag='b)')+
              theme(legend.position="none",axis.text.x = element_text(angle = 90, hjust = 1))+ggtitle(NULL,'After processing')
            
            rC1=ggplot_build(C1)$layout$panel_scales_y[[1]]$range$range
            rC2=ggplot_build(C2)$layout$panel_scales_y[[1]]$range$range
            rC3=ggplot_build(C3)$layout$panel_scales_x[[1]]$range$range
            rC4=ggplot_build(C4)$layout$panel_scales_x[[1]]$range$range
            rC1=max(abs(rC1))
            rC2=max(abs(rC2))
            rC3=max(abs(rC3))
            rC4=max(abs(rC4))
            
            C1=C1+ylim(c(-rC1,rC1))
            C3=C3+xlim(c(-rC1,rC1))
            C2=C2+ylim(c(-rC2,rC2))
            C4=C4+xlim(c(-rC2,rC2))
            
            
            gA <- ggplotGrob(C3)
            gB <- ggplotGrob(C4)
            gC <- ggplotGrob(C1)
            gD <- ggplotGrob(C2)
            
            L=matrix(nrow=4,ncol=1)
            L=as.matrix(rbind(c(1,2),c(3,4),c(3,4),c(3,4)))
            
            temp=gtable_cbind(gA,gB)
            heights1=temp$heights
            gA$heights=heights1
            gB$heights=heights1
            
            temp=gtable_cbind(gC,gD)
            heights2=temp$heights
            gC$heights=heights2
            gD$heights=heights2
            
            maxWidth = grid::unit.pmax(gA$widths[2:5], gB$widths[2:5],gC$widths[2:5],gD$widths[2:5])
            gA$widths[2:5] <- as.list(maxWidth)
            gB$widths[2:5] <- as.list(maxWidth)
            gC$widths[2:5] <- as.list(maxWidth)
            gD$widths[2:5] <- as.list(maxWidth)
            
            p=grid.arrange(grobs=list(gA,gB,gC,gD),layout_matrix=L)
            
            return(p)
            
          }
)


#############################################################
#############################################################

#' @eval get_description('DatasetExperiment_heatmap')
#' @export DatasetExperiment_heatmap
#' @examples
#' D = iris_DatasetExperiment()
#' C = DatasetExperiment_heatmap()
#' chart_plot(C,D)
DatasetExperiment_heatmap = function(na_colour='#FF00E4',...) {
  out=struct::new_struct('DatasetExperiment_heatmap',
                         na_colour=na_colour,...)
  return(out)
}


.DatasetExperiment_heatmap<-setClass(
  "DatasetExperiment_heatmap",
  contains=c('chart'),
  slots=c(
    # INPUTS
    na_colour='entity'
  ),
  prototype = list(name='DatasetExperiment heatmap',
                   description='A heatmap to visualise the measured values in a data matrix.',
                   type="scatter",
                   libraries='reshape2',
                   .params=c('na_colour'),
                   
                   na_colour=entity(name='Missing value colour',
                                    value='#FF00E4',
                                    type='character',
                                    description='The hex colour code used to plot missing values.',
                                    max_length=1
                   )
  )
)

#' @export
#' @template chart_plot
setMethod(f="chart_plot",
          signature=c("DatasetExperiment_heatmap",'DatasetExperiment'),
          definition=function(obj,dobj)
          {
            X=reshape2::melt(as.matrix(dobj$data))
            colnames(X)=c('Sample','Feature','Peak area')
            p=ggplot(data=X,aes(x=`Feature`,y=`Sample`,fill=`Peak area`)) + geom_raster() +
              scale_colour_Publication()+
              theme_Publication(base_size = 12)+
              scale_fill_viridis_c(na.value=obj$na_colour)+
              theme(axis.text.x=element_blank(),
                    axis.ticks.x=element_blank(),
                    axis.text.y=element_blank(),
                    axis.ticks.y=element_blank()
              )
            
            
            return(p)
            
          }
)