#' @eval get_description('feature_boxplot') #' @examples #' D = MTBLS79_DatasetExperiment #' C = feature_boxplot(factor_name='Species',feature_to_plot='Petal.Width') #' chart_plot(C,D) #' @export feature_boxplot feature_boxplot = function(label_outliers=TRUE,feature_to_plot, factor_name,show_counts=TRUE,style='boxplot',jitter=FALSE,fill=FALSE,...) { out=struct::new_struct('feature_boxplot', label_outliers=label_outliers, feature_to_plot=feature_to_plot, factor_name=factor_name, show_counts=show_counts, style=style, jitter=jitter, fill=fill, ...) return(out) } .feature_boxplot<-setClass( "feature_boxplot", contains=c('chart'), slots=c( # INPUTS label_outliers='entity', feature_to_plot='entity', factor_name='entity', show_counts='entity', style='enum', jitter='entity', fill='entity' ), prototype = list(name='Feature boxplot', description='A boxplot to visualise the distribution of values within a feature.', type="boxlot", .params=c('label_outliers','feature_to_plot','factor_name','show_counts','style','jitter','fill'), label_outliers=entity(name='Label outliers', value=TRUE, type='logical', description=c( 'TRUE' = 'The index for outlier samples is included on the plot.', 'FALSE' = 'No labels are displayed.' ) ), feature_to_plot=entity(name='Feature to plot', value='V1', type=c('character','numeric','integer'), description='The column name of the plotted feature.' ), factor_name=ents$factor_name, show_counts=ents$show_counts, style=enum( name='Plot style', description=c( 'boxplot' = 'boxplot style', 'violin' = 'violon plot style' ), allowed=c('boxplot','violin'), value='boxplot', max_length=1, type='character' ), jitter=entity( name = 'Jitter', description = 'Include points plotted with added jitter.', value=FALSE, type='logical', max_length=1 ), fill=entity( name = 'Fill', description = 'Block fill the boxes or violins with the group colour.', value=FALSE, type='logical', max_length=1 ) ) ) #' @export #' @template chart_plot setMethod(f="chart_plot", signature=c("feature_boxplot",'DatasetExperiment'), definition=function(obj,dobj) { # get options opt=param_list(obj) # get data Xt=dobj$data # column name if (is.numeric(opt$feature_to_plot)) { varn=colnames(Xt)[opt$feature_to_plot] } else { varn=opt$feature_to_plot } # get requested column Xt=Xt[[opt$feature_to_plot]] # meta data SM=dobj$sample_meta SM=SM[[obj$factor_name]] names(SM)=rownames(dobj) # remove NA SM=SM[!is.na(Xt)] Xt=Xt[!is.na(Xt)] # get color pallete using pmp clrs= createClassAndColors(class = SM) SM=clrs$class # count number of values L=levels(SM) count=numeric(length(L)) for (i in 1:length(L)) { count[i]=sum(SM==L[i]) } #prep the plot temp=data.frame(x=SM,y=Xt,row.names=names(SM)) if (obj$fill) { A = aes_string(x='x',y='y',color='x',fill='x') } else { A = aes_string(x='x',y='y',color='x') } p<-ggplot(temp, A) if (obj$style=='boxplot') { p=p+geom_boxplot(alpha=0.3) } else { p=p+geom_violin(alpha=0.3, draw_quantiles = c(0.25, 0.5, 0.75), scale = "width", adjust = .5) } if (obj$jitter) { p=p+geom_jitter(show.legend = F, width = 0.1) } p=p+xlab(opt$factor) + ylab('') + ggtitle(varn) + scale_colour_manual(values=clrs$manual_colors,name=opt$factor_name) p=p+scale_fill_manual(values=clrs$manual_colors,name=opt$factor_name) p=p+theme_Publication(base_size = 12) + theme(legend.position="none") if (opt$show_counts) { newlabels=as.character(count) newlabels=paste0(as.character(L),'\n(n = ',newlabels,')') p=p+scale_x_discrete(labels=newlabels) } if (opt$label_outliers) { outliers=numeric() for (l in L) { IN=which(SM==l) outliers=c(outliers,IN[which( Xt[IN]>(quantile(Xt[IN], 0.75) + 1.5*IQR(Xt[IN]))) ] ) outliers=c(outliers,IN[which( Xt[IN]<(quantile(Xt[IN], 0.25) - 1.5*IQR(Xt[IN]))) ] ) } outlier_df=temp[outliers,] outlier_df$out_label=paste0(' ',rownames(temp))[outliers] p=p+geom_text(data=outlier_df,aes_(group=~x,color=~x,label=~out_label),hjust='left') } return(p) } ) ###################################### ###################################### #' @eval get_description('mv_histogram') #' @examples #' D = MTBLS79_DatasetExperiment() #' C = mv_histogram(label_outliers=FALSE,by_sample=FALSE) #' chart_plot(C,D) #' #' @import struct #' @param ... additional slots and values passed to struct_class #' @return struct object #' @export mv_histogram mv_histogram = function(label_outliers=TRUE,by_sample=TRUE,...) { out=struct::new_struct('mv_histogram', label_outliers=label_outliers, by_sample=by_sample, ...) return(out) } .mv_histogram<-setClass( "mv_histogram", contains='chart', slots=c( # INPUTS label_outliers='entity', by_sample='entity' ), prototype = list(name='Missing value histogram', description='A histogram of the numbers of missing values per sample/feature', type="histogram", .params=c('label_outliers','by_sample'), by_sample=ents$by_sample, label_outliers=ents$label_outliers ) ) #' @export #' @template chart_plot setMethod(f="chart_plot", signature=c("mv_histogram",'DatasetExperiment'), definition=function(obj,dobj) { # get options opt=param_list(obj) # get data Xt=dobj$data # meta data SM=dobj$sample_meta[ ,1] if (opt$by_sample) { # count NS per sample count=apply(Xt,1,function(x) {sum(is.na(x))/length(x)*100}) txt='Missing values per sample' } else { # count NS per feature count=apply(Xt,2,function(x) {sum(is.na(x))/length(x)*100}) txt='Missing values per feature' } A=data.frame(x=count) p=ggplot (data=A, aes_(x=~x)) + geom_histogram()+ xlab ("missing values, %")+ ggtitle(txt)+ xlim (0,100)+ scale_colour_Publication()+ theme_Publication(base_size = 12) return(p) } ) ###################################### ###################################### #' @eval get_description('mv_boxplot') #' @examples #' D = MTBLS79_DatasetExperiment() #' C = mv_boxplot(factor_name='Class') #' chart_plot(C,D) #' #' @import struct #' @export mv_boxplot mv_boxplot = function(label_outliers=TRUE,by_sample=TRUE,factor_name,show_counts=TRUE,...) { out=struct::new_struct('mv_boxplot', label_outliers=label_outliers, by_sample=by_sample, factor_name=factor_name, show_counts=show_counts, ...) return(out) } .mv_boxplot<-setClass( "mv_boxplot", contains='chart', slots=c( # INPUTS label_outliers='entity', by_sample='entity', factor_name='entity', show_counts='entity' ), prototype = list(name='Missing value boxplots', description='Boxplots of the number of missing values per sample/feature.', type="boxplot", .params=c('label_outliers','by_sample','factor_name','show_counts'), by_sample=ents$by_sample, label_outliers=ents$label_outliers, factor_name=ents$factor_name, show_counts=ents$show_counts ) ) #' @export #' @template chart_plot setMethod(f="chart_plot", signature=c("mv_boxplot",'DatasetExperiment'), definition=function(obj,dobj) { # get options opt=param_list(obj) # get data Xt=dobj$data # meta data SM=dobj$sample_meta[ ,obj$factor_name] L=levels(SM) if (opt$by_sample) { # count NS per sample count=apply(Xt,1,function(x) {sum(is.na(x))/length(x)*100}) result=matrix(0,nrow=nrow(Xt),ncol=2) result[,1]=count result[,2]=SM if (sum(result[,1])==0) { warning('No missing values were detected') } txt='Missing values per sample' # get color pallete using pmp clrs= createClassAndColors(class = SM) A=data.frame(x=clrs$class,y=result[,1]) } else { for (i in 1:length(L)) { # count NS per feature per group count=apply(Xt[SM==L[i],,drop=FALSE],2,function(x) {sum(is.na(x))/sum(SM==L[i])*100}) temp=data.frame(y=count,x=L[i]) if (i==1) { result=temp } else { result=rbind(result,temp) } } if (sum(result$y)==0) { warning('No missing values were detected') } txt='Missing values per feature' # get color pallete using pmp clrs= createClassAndColors(class = as.factor(result$x)) A=data.frame(x=clrs$class,y=result$y) } p=ggplot (data=A, aes_(x=~x,y=~y,color=~x)) + geom_boxplot() + ggtitle(txt) + xlab(opt$factor) + ylim (0,100)+ scale_colour_manual(values=clrs$manual_colors,name=opt$factor_name) + theme_Publication(base_size = 12) + ylab ("missing values, %") + coord_flip()+ theme(legend.position="none") if (opt$show_counts) { L=levels(A$x) num=numeric(length(L)) for (i in 1:length(L)) { num[i]=sum(A$x==L[i]) } newlabels=as.character(num) newlabels=paste0(as.character(L),'\n(n = ',newlabels,')') p=p+scale_x_discrete(labels=newlabels) } if (opt$label_outliers) { outliers=numeric() for (l in L) { IN=which(A$x==l) outliers=c(outliers,IN[which( A$y[IN]>(quantile(A$y[IN], 0.75) + 1.5*IQR(A$y[IN]))) ] ) outliers=c(outliers,IN[which( A$y[IN]<(quantile(A$y[IN], 0.25) - 1.5*IQR(A$y[IN]))) ] ) } outlier_df=A[outliers,] if (length(outliers)>0){ if (opt$by_sample) { outlier_df$out_label=paste0(' ',rownames(dobj$data))[outliers] } else { outlier_df$out_label=paste0(' ',rep(colnames(dobj$data),length(L))[outliers]) } p=p+geom_text(data=outlier_df,aes_(group=~x,color=~x,label=~out_label,angle =~ 90),hjust='left') } } return(p) } ) #' @eval get_description('DatasetExperiment_dist') #' @examples #' D = MTBLS79_DatasetExperiment() #' C = DatasetExperiment_dist(factor_name='Class') #' chart_plot(C,D) #' @import struct #' @export DatasetExperiment_dist DatasetExperiment_dist = function(factor_name,per_class=TRUE,...) { out=struct::new_struct('DatasetExperiment_dist', factor_name=factor_name, per_class=per_class, ...) return(out) } .DatasetExperiment_dist<-setClass( "DatasetExperiment_dist", contains='chart', slots=c( # INPUTS factor_name='entity', per_class='entity' ), prototype = list(name='Feature distribution histogram', description=paste0('A histogram to visualise the distribution of ', 'values within features.'), type="histogram", .params=c('factor_name','per_class'), factor_name=ents$factor_name, per_class=entity(name='Plot per class', value=TRUE, type='logical', description=c( "TRUE" = 'The distributions are plotted for each class.', "FALSE" = 'The distribution is plotted for all samples') ) ) ) #' @export #' @template chart_plot setMethod(f="chart_plot", signature=c("DatasetExperiment_dist",'DatasetExperiment'), definition=function(obj,dobj) { opt=param_list(obj) X=as.matrix(dobj$data) S=dobj$sample_meta[[opt$factor_name]] if (opt$per_class) { L=levels(S) for (k in L) { M=X[S==k,,drop=FALSE] if (k==L[1]) { temp=data.frame(values=as.vector(M),group=k) } else { temp=rbind(temp,data.frame(values=as.vector(M),group=k)) } } out=ggplot(data=temp, aes_(x=~values,color=~group))+ geom_freqpoly(bins=100) } else { temp=data.frame(values=as.vector(X),group='Sample') out=ggplot(data=temp, aes_(x=~values,color=~group)) + geom_freqpoly(bins=100,color='black') } out = out + xlab('Values') + ylab('Density') + scale_colour_Publication(name=opt$factor_name)+ theme_Publication(base_size = 12) return(out) } ) #' @eval get_description('DatasetExperiment_boxplot') #' @examples #' D = MTBLS79_DatasetExperiment() #' C = DatasetExperiment_boxplot(factor_name='Class',number=10,per_class=FALSE) #' chart_plot(C,D) #' @return struct object #' @export DatasetExperiment_boxplot DatasetExperiment_boxplot = function(factor_name,by_sample=TRUE,per_class=TRUE,number=50,...) { out=struct::new_struct('DatasetExperiment_boxplot', factor_name=factor_name, by_sample=by_sample, per_class=per_class, number=number, ...) return(out) } .DatasetExperiment_boxplot<-setClass( "DatasetExperiment_boxplot", contains='chart', slots=c( # INPUTS factor_name='entity', by_sample='entity', per_class='entity', number='entity' ), prototype = list(name='Feature distribution histogram', description=paste0('A boxplot to visualise the distribution of ', 'values within a subset of features.'), type="boxplot", .params=c('factor_name','by_sample','per_class','number'), factor_name=ents$factor_name, by_sample=entity(name='Plot by sample', value=TRUE, type='logical', description=c( 'TRUE' = 'The data is plotted across features for a subset of samples.', 'FALSE' = 'The data is plotted across samples for a subset of features.') ), per_class=entity(name='Plot per class', value=TRUE, type='logical', description=c( "TRUE" = 'The data is plotted for each class.', "FALSE" = 'The data is plotted for all samples') ), number=entity(name='Number of features/samples', value=50, type=c('numeric','integer'), description='The number of features/samples plotted.', max_length=1 ) ) ) #' @export #' @template chart_plot setMethod(f="chart_plot", signature=c("DatasetExperiment_boxplot",'DatasetExperiment'), definition=function(obj,dobj) { opt=param_list(obj) X=dobj$data SM=dobj$sample_meta if (!opt$by_sample) { #s=sample(ncol(X),min(c(ncol(X),opt$number)),replace=FALSE) s=seq(from=1,to=ncol(X),length.out = min(c(opt$number,ncol(X)))) ylabel='Features' } else { #s=sample(nrow(X),min(c(nrow(X),opt$number)),replace=FALSE) s=seq(from=1,to=nrow(X),length.out = min(c(opt$number,nrow(X)))) X=as.data.frame(t(X)) colnames(X)=rownames(dobj$data) ylabel='Samples' } s=unique(floor(s)) for (i in s) { if (!opt$by_sample){ sm=SM[[obj$factor_name]] fn=row } else { sm=SM[[obj$factor_name]][i] } if (!opt$per_class) { sm='Sample' } if (i==s[1]) { temp=data.frame(value=X[,i],feature=colnames(X)[i],group=sm) } else { temp=rbind(temp,data.frame(value=X[,i],feature=colnames(X)[i],group=sm)) } } out=ggplot(data=temp, aes_(x=~feature,y=~value,color=~group)) + geom_boxplot()+ xlab(ylabel) + ylab('Values') + scale_colour_Publication(name=opt$factor_name)+ theme_Publication(base_size = 12)+ coord_flip() + theme(axis.text.x = element_text(angle = 90, hjust = 1)) if (!opt$per_class) { out=out+theme(legend.position="none") } return(out) } ) #' @eval get_description('compare_dist') #' @examples #' D1=MTBLS79_DatasetExperiment(filtered=FALSE) #' D2=MTBLS79_DatasetExperiment(filtered=TRUE) #' C = compare_dist(factor_name='Class') #' chart_plot(C,D1,D2) #' @import struct #' @export compare_dist compare_dist = function(factor_name,...) { out=struct::new_struct('compare_dist', factor_name=factor_name, ...) return(out) } .compare_dist<-setClass( "compare_dist", contains=c('chart','stato'), slots=c(factor_name='entity'), prototype = list(name='Compare distributions', description=paste0('Histograms and boxplots computed across samples ', 'and features are used to visually compare two datasets e.g. before ', 'and after filtering and/or normalisation.'), type="mixed", stato_id='STATO:0000161', .params=c('factor_name'), factor_name=ents$factor_name ) ) #' @export #' @import gridExtra #' @template chart_plot #' @param eobj a second DatasetExperiment object to compare with the first setMethod(f="chart_plot", signature=c("compare_dist",'DatasetExperiment'), definition=function(obj,dobj,eobj) { # match features across datasets inboth=intersect(colnames(dobj),colnames(eobj)) dobj=dobj[,inboth] eobj=eobj[,inboth] C=DatasetExperiment_boxplot(by_sample=FALSE,per_class=FALSE,number=30,factor_name=obj$factor_name) C1=chart_plot(C,dobj)+ labs(tag='c)')+ theme(axis.text.x = element_text(angle = 90, hjust = 1))+ggtitle(NULL,'Before processing') C2=chart_plot(C,eobj)+ labs(tag='d)')+ theme(axis.text.x = element_text(angle = 90, hjust = 1))+ggtitle(NULL,'After processing') C=DatasetExperiment_dist(factor_name=obj$factor_name,per_class=TRUE) C3=chart_plot(C,dobj)+ labs(tag='a)')+ theme(legend.position="none",axis.text.x = element_text(angle = 90, hjust = 1))+ggtitle(NULL,'Before processing') C4=chart_plot(C,eobj)+ labs(tag='b)')+ theme(legend.position="none",axis.text.x = element_text(angle = 90, hjust = 1))+ggtitle(NULL,'After processing') rC1=ggplot_build(C1)$layout$panel_scales_y[[1]]$range$range rC2=ggplot_build(C2)$layout$panel_scales_y[[1]]$range$range rC3=ggplot_build(C3)$layout$panel_scales_x[[1]]$range$range rC4=ggplot_build(C4)$layout$panel_scales_x[[1]]$range$range rC1=max(abs(rC1)) rC2=max(abs(rC2)) rC3=max(abs(rC3)) rC4=max(abs(rC4)) C1=C1+ylim(c(-rC1,rC1)) C3=C3+xlim(c(-rC1,rC1)) C2=C2+ylim(c(-rC2,rC2)) C4=C4+xlim(c(-rC2,rC2)) gA <- ggplotGrob(C3) gB <- ggplotGrob(C4) gC <- ggplotGrob(C1) gD <- ggplotGrob(C2) L=matrix(nrow=4,ncol=1) L=as.matrix(rbind(c(1,2),c(3,4),c(3,4),c(3,4))) temp=gtable_cbind(gA,gB) heights1=temp$heights gA$heights=heights1 gB$heights=heights1 temp=gtable_cbind(gC,gD) heights2=temp$heights gC$heights=heights2 gD$heights=heights2 maxWidth = grid::unit.pmax(gA$widths[2:5], gB$widths[2:5],gC$widths[2:5],gD$widths[2:5]) gA$widths[2:5] <- as.list(maxWidth) gB$widths[2:5] <- as.list(maxWidth) gC$widths[2:5] <- as.list(maxWidth) gD$widths[2:5] <- as.list(maxWidth) p=grid.arrange(grobs=list(gA,gB,gC,gD),layout_matrix=L) return(p) } ) ############################################################# ############################################################# #' @eval get_description('DatasetExperiment_heatmap') #' @export DatasetExperiment_heatmap #' @examples #' D = iris_DatasetExperiment() #' C = DatasetExperiment_heatmap() #' chart_plot(C,D) DatasetExperiment_heatmap = function(na_colour='#FF00E4',...) { out=struct::new_struct('DatasetExperiment_heatmap', na_colour=na_colour,...) return(out) } .DatasetExperiment_heatmap<-setClass( "DatasetExperiment_heatmap", contains=c('chart'), slots=c( # INPUTS na_colour='entity' ), prototype = list(name='DatasetExperiment heatmap', description='A heatmap to visualise the measured values in a data matrix.', type="scatter", libraries='reshape2', .params=c('na_colour'), na_colour=entity(name='Missing value colour', value='#FF00E4', type='character', description='The hex colour code used to plot missing values.', max_length=1 ) ) ) #' @export #' @template chart_plot setMethod(f="chart_plot", signature=c("DatasetExperiment_heatmap",'DatasetExperiment'), definition=function(obj,dobj) { X=reshape2::melt(as.matrix(dobj$data)) colnames(X)=c('Sample','Feature','Peak area') p=ggplot(data=X,aes(x=`Feature`,y=`Sample`,fill=`Peak area`)) + geom_raster() + scale_colour_Publication()+ theme_Publication(base_size = 12)+ scale_fill_viridis_c(na.value=obj$na_colour)+ theme(axis.text.x=element_blank(), axis.ticks.x=element_blank(), axis.text.y=element_blank(), axis.ticks.y=element_blank() ) return(p) } )