883f7f27 |
#' pca_correlation_plot class
#'
#' plots the correlation between features and selected components.
#'
#' @import struct
#' @export pca_correlation_plot
#' @include PCA_class.R
pca_correlation_plot<-setClass(
"pca_correlation_plot",
contains='chart',
slots=c(
# INPUTS
params.components='entity'
),
prototype = list(name='Feature boxplot',
description='plots a boxplot of a chosen feature for each group of a dataset.',
type="boxlot",
params=c('components'),
params.components=entity(name='Components to plot',
value=c(1,2),
type='numeric',
description='the components to be plotted e.g. c(1,2) plots component 1 on the x axis and component 2 on the y axis.'
)
)
)
#' @export
setMethod(f="chart.plot",
signature=c("pca_correlation_plot",'PCA'),
definition=function(obj,dobj)
{
opt=param.list(obj)
A=data.frame(x=output.value(dobj,'correlation')[,opt$components[1]],y=output.value(dobj,'correlation')[,opt$components[2]])
dat <- circleFun(c(0,0),2,npoints = 50)
out=ggplot(data=A,aes_(x=~x,y=~y)) +
geom_point() +
scale_colour_Publication() +
theme_Publication(base_size = 12)+
coord_fixed(xlim = c(-1,1),ylim=c(-1,1)) +
geom_path(data=dat,aes_(x=~x,y=~y),inherit.aes = FALSE)
return(out)
}
)
#################################################
#################################################
#' pca_scores_plot class
#'
#' 2d scatter plot of princpal component scores.
#'
#' @import struct
#' @export pca_scores_plot
#' @include PCA_class.R
pca_scores_plot<-setClass(
"pca_scores_plot",
contains='chart',
slots=c(
# INPUTS
params.components='entity',
params.points_to_label='enum',
params.factor_name='entity',
params.groups='entity',
params.ellipse='enum'
),
prototype = list(name='PCA scores plot',
description='Plots a 2d scatter plot of the selected components',
type="scatter",
params=c('components','points_to_label','factor_name','groups'),
params.components=entity(name='Components to plot',
value=c(1,2),
type='numeric',
description='the components to be plotted e.g. c(1,2) plots component 1 on the x axis and component 2 on the y axis.'
),
params.points_to_label=enum(name='points_to_label',
value='none',
type='character',
description='("none"), "all", or "outliers" will be labelled on the plot.'
),
params.factor_name=entity(name='Factor name',
value='factor',
type='character',
description='The name of the factor to be displayed on the plot. Appears on axis and legend titles, for example. By default the column name of the meta data will be used where possible.'
),
params.groups=entity(name='Groups',
value=factor(),
type='factor',
description='The name of the factor to be displayed on the plot. Appears on axis and legend titles, for example. By default the column name of the meta data will be used where possible.'
),
params.ellipse=enum(name = 'Plot ellipses',description=c(
'"all" will plot all ellipses',
'"group" will only plot group ellipses',
'"none" will not plot any ellipses',
'"sample" will plot ellipse for all samples (ignoring group)'),
list=c('all','group','none','sample'),
value='all')
)
)
#' @importFrom sp point.in.polygon
#' @import ggplot2
#' @importFrom scales squish
#' @export
setMethod(f="chart.plot",
signature=c("pca_scores_plot",'PCA'),
definition=function(obj,dobj)
{
if (obj$points_to_label=='outliers' & !(obj$ellipse %in% c('all','sample'))) {
warning('Outliers are only labelled when plotting the sample ellipse')
}
opt=param.list(obj)
scores=output.value(dobj,'scores')
pvar=(colSums(scores*scores)/output.value(dobj,'ssx'))*100 # percent variance
pvar=round(pvar,digits = 2) # round to 2 decimal places
shapes <- rep(19,nrow(scores)) # filled circles for all samples
slabels <- rownames(scores)
x=scores[,opt$components[1]]
y=scores[,opt$components[2]]
xlabel=paste("PC",opt$components[[1]],' (',sprintf("%.1f",pvar[opt$components[[1]]]),'%)',sep='')
ylabel=paste("PC",opt$components[[2]],' (',sprintf("%.1f",pvar[opt$components[[2]]]),'%)',sep='')
# add a space to the front of the labels to offset them from the points, because nudge_x is in data units
for (i in 1:length(slabels))
{
slabels[i]=paste0(' ',slabels[i], ' ')
}
if (is(opt$groups,'factor')) {
plotClass=pmp::createClassAndColors(opt$groups)
opt$groups=plotClass$class
}
# build the plot
A <- data.frame (group=opt$groups,x=x, y=y)
out=ggplot (data=A, aes_(x=~x,y=~y,colour=~group,label=~slabels,shapes=~shapes)) +
geom_point(na.rm=TRUE) +
xlab(xlabel) +
ylab(ylabel) +
ggtitle('PCA Scores', subtitle=NULL) +
if (obj$ellipse %in% c('all','group')) {
stat_ellipse(type='norm') # ellipse for individual groups
}
if (is(opt$groups,'factor')) # if a factor then plot by group using the colours from pmp package
{
out=out+scale_colour_manual(values=plotClass$manual_colors,name=opt$factor_name)
}
else # assume continuous and use the default colour gradient
{
out=out+scale_colour_viridis_c(limits=quantile(opt$groups,c(0.05,0.95),na.rm = TRUE),oob=squish,name=opt$factor_name)
}
out=out+theme_Publication(base_size = 12)
# add ellipse for all samples (ignoring group)
if (obj$ellipse %in% c('all','sample')) {
out=out+stat_ellipse(type='norm',mapping=aes(x=x,y=y),colour="#C0C0C0",linetype='dashed',data=A)
}
if (obj$ellipse %in% c('all','sample')) { # only do this if we plotted the sample ellipse
# identify samples outside the ellipse
build=ggplot_build(out)$data
points=build[[1]]
ell=build[[length(build)]]
# outlier for dataset ellipse
points$in.ell=as.logical(sp::point.in.polygon(points$x,points$y,ell$x,ell$y))
# label outliers if
if (opt$points_to_label=='outliers')
{
if (!all(points$in.ell))
{
temp=subset(points,!points$in.ell)
temp$group=opt$groups[!points$in.ell]
out=out+geom_text(data=temp,aes_(x=~x,y=~y,label=~label,colour=~group),vjust="inward",hjust="inward")
}
}
# add a list of outliers to the plot object
out$outliers=trimws(slabels[!points$in.ell])
}
# label all points if requested
if (opt$points_to_label=='all')
{
out=out+geom_text(vjust="inward",hjust="inward")
}
return(out)
}
)
#################################################################
#################################################################
#' pca_biplot_plot class
#'
#' 2d scatter plot of princpal component scores overlaid with principal component loadings.
#'
#' @import struct
#' @export pca_biplot_plot
#' @include PCA_class.R
pca_biplot_plot<-setClass(
"pca_biplot_plot",
contains='chart',
slots=c(
# INPUTS
params.components='entity',
params.points_to_label='entity',
params.factor_name='entity',
params.groups='entity',
params.sf='entity'
),
prototype = list(name='Feature boxplot',
description='plots a boxplot of a chosen feature for each group of a dataset.',
type="boxlot",
params=c('components','points_to_label','factor_name','groups'),
params.components=entity(name='Components to plot',
value=c(1,2),
type='numeric',
description='the components to be plotted e.g. c(1,2) plots component 1 on the x axis and component 2 on the y axis.'
),
params.points_to_label=entity(name='points_to_label',
value='none',
type='character',
description='("none"), "all", or "outliers" will be labelled on the plot.'
),
params.factor_name=entity(name='Factor name',
value='factor',
type='character',
description='The name of the factor to be displayed on the plot. Appears on axis and legend titles, for example. By default the column name of the meta data will be used where possible.'
),
params.groups=entity(name='Groups',
value=factor(),
type='factor',
description='The name of the factor to be displayed on the plot. Appears on axis and legend titles, for example. By default the column name of the meta data will be used where possible.'
),
params.scale_factor=entity(name='Loadings scale factor',
value=0.95,
type='numeric',
description='Scaling factor to apply to loadings. Default = 0.95.'
)
)
)
#' @export
setMethod(f="chart.plot",
signature=c("pca_biplot_plot",'PCA'),
definition=function(obj,dobj)
{
opt=param.list(obj)
Ts=output.value(dobj,'scores')
pvar=(colSums(Ts*Ts)/output.value(dobj,'ssx'))*100
pvar=round(pvar,digits = 1)
xlabel=paste("PC",opt$components[[1]],' (',sprintf("%.1f",pvar[opt$components[[1]]]),'%)',sep='')
ylabel=paste("PC",opt$components[[2]],' (',sprintf("%.1f",pvar[opt$components[[2]]]),'%)',sep='')
P=output.value(dobj,'loadings')
Ev=output.value(dobj,'eigenvalues')
# eigenvalues were square rooted when training PCA
Ev=Ev[,1]
Ev=Ev^2
## unscale the scores
#ev are the norms of scores
Ts=as.matrix(Ts) %*% diag(1/Ev) # these are normalised scores
# scale scores and loadings by alpha
Ts=Ts %*% diag(Ev^(1-opt$scale_factor))
P=as.matrix(P) %*% diag(Ev^(opt$scale_factor))
# additionaly scale the loadings
sf=min(max(abs(Ts[,opt$components[1]]))/max(abs(P[,opt$components[1]])),
max(abs(Ts[,opt$components[2]]))/max(abs(P[,opt$components[2]])))
output.value(obj,'scores')=as.data.frame(Ts) # nb object not returned, so only temporary scaling
# plot
A=data.frame("x"=P[,opt$components[1]]*sf*0.8,"y"=P[,opt$components[2]]*sf*0.8)
SP=pca_scores_plot()
if (opt$style=='points')
{
out=out+
geom_point(data=A,inherit.aes = FALSE,color='black',mapping = aes_(x=~x,y=~y))
}
if (opt$style=='arrow')
{
out=out+
geom_segment(data=A,inherit.aes = FALSE,color='black',mapping = aes_(x=~0,y=~0,xend=~x,yend=~y),arrow=arrow(length=unit(8,'points')))
}
out=out+ggtitle('PCA biplot', subtitle=NULL) +
xlab(xlabel) + ylab(ylabel)
#label features if requested
if (opt$label_features)
{
vlabels=opt$feature_labels
for (i in 1:length(opt$feature_labels))
{
vlabels[i]=paste0(' ',opt$feature_labels[i], ' ')
}
A$vlabels=vlabels
out=out+
geom_text(data=A,aes_(x=~x,y=~y,label=~vlabels),vjust="inward",hjust="inward",inherit.aes = FALSE)
}
return(out)
}
)
# loadings plot
pca_loadings_plot=function(obj,opt)
{
P=output.value(obj,'loadings')
# 1D plot
if (length(opt$components)==1)
{
A=data.frame("x"=1:nrow(P),"y"=P[,opt$components[1]])
out=ggplot(data=A,aes_(x=~x,y=~y)) +
geom_line() +
ggtitle('PCA Loadings', subtitle=NULL) +
xlab('Feature') +
ylab('Loading') +
scale_colour_Publication() +
theme_Publication(base_size = 12)
return(out)
}
# 2D plot
if (length(opt$components)==2)
{
A=data.frame("x"=P[,opt$components[1]],"y"=P[,opt$components[2]])
out=ggplot(data=A,aes_(x=~x,y=~y)) +
geom_point() +
ggtitle('PCA Loadings', subtitle=NULL) +
xlab(paste0('Component ',opt$components[1])) +
ylab(paste0('Component ',opt$components[2])) +
scale_colour_Publication() +
theme_Publication(base_size = 12)
return(out)
}
if (length(opt$components)>2)
{
stop('can only plot loadings for 1 or 2 components at a time')
}
}
#' pca_scree_plot class
#'
#' line plot showing percent variance and cumulative peercent variance for the computed components.
#'
#' @import struct
#' @export PCA.scree
#' @include PCA_class.R
PCA.scree<-setClass(
"PCA.scree",
contains=c('chart'),
prototype = list(name='Scree plot',
description='plots the percent and cumulative percent variance for the calculated components',
type="line"
)
)
#' @export
setMethod(f="chart.plot",
signature=c("PCA.scree",'PCA'),
definition=function(obj,dobj)
{
## percent variance
scores=output.value(dobj,'scores')
pvar=(colSums(scores*scores)/output.value(dobj,'ssx'))*100
A=data.frame("x"=1:length(pvar),"y"=c(pvar,cumsum(pvar)),"Variance"=as.factor(c(rep('Single component',length(pvar)),rep('Cumulative',length(pvar)))))
labels=round(A$y,digits = 1)
labels=format(labels,1)
out=ggplot(data=A, aes_(x=~x,y=~y,color=~Variance)) +
geom_line() +
geom_point() +
geom_text(aes_(label=~labels),color='black',vjust=0,nudge_y = 5) +
ggtitle('Scree Plot', subtitle=NULL) +
xlab('Component') +
ylab('Variance (%)') +
scale_colour_Publication() +
scale_x_continuous(breaks=0:length(pvar)) +
guides(color=guide_legend("")) +
theme_Publication(base_size = 12)
return(out)
}
)
#' pca_dstat_plot class
#'
#' line plot showing percent variance and cumulative peercent variance for the computed components.
#'
#' @import struct
#' @export PCA.dstat
#' @include PCA_class.R
PCA.dstat<-setClass(
"PCA.dstat",
contains=c('chart'),
slots=c(params.number_components='entity',
params.alpha='entity'),
prototype = list(name='d-statistic plot',
description='a bar chart of the d-statistics for samples in the input PCA model',
type="bar",
params.number_components=entity(value = 2,
name = 'number of principal components',
description = 'number of principal components to use for the plot',
type='numeric'),
params.alpha=entity(value=0.95,
name='threshold for rejecting outliers',
description='a confidence threshold for rejecting samples based on the d-statistic',
type='numeric'),
params=c('number_components','alpha')
)
)
#' @export
setMethod(f="chart.plot",
signature=c("PCA.dstat",'PCA'),
definition=function(obj,dobj)
{
opt=param.list(obj)
a=param.value(obj,'number_components')
scores=output.value(dobj,'scores')
I=nrow(scores) # number of samples
sample_names=rownames(scores)
scores=scores[,1:a]
scores=as.matrix(scores)
covT=t(scores) %*% scores # covariance matrix
covT=solve(covT/(I-1)) # inverse
H=numeric(length = I)
for (i in 1:I)
{
H[i]=scores[i,,drop=FALSE]%*%covT%*%t(scores[i,,drop=FALSE])
} #leverage value
# threshold at alpha
F=qf(p = opt$alpha,df1 = a,df2=I-a)
sf=(a*(I-1)*(I+1))/(I*(I-a))
threshold=sf*F
# ggplot
df=data.frame(x=sample_names,y=H)
out=ggplot(data=df, aes_(x=~x,y=~y)) +
geom_bar(stat="identity") +
geom_hline(yintercept=threshold, linetype='dashed', color='grey') +
ggtitle('d-statistic', subtitle=paste0('Number of components = ',a)) +
xlab('Sample') +
ylab('d-statistic') +
scale_x_discrete (limits = sample_names,breaks=NULL) +
scale_colour_Publication() +
theme_Publication(base_size = 12)
return(out)
}
)
circleFun <- function(center = c(0,0),diameter = 1, npoints = 100){
r = diameter / 2
tt <- seq(0,2*pi,length.out = npoints)
xx <- center[1] + r * cos(tt)
yy <- center[2] + r * sin(tt)
return(data.frame(x = xx, y = yy))
}
|