883f7f27 |
#' PCA model class
#'
#' Principal Component Analysis (PCA) model class. This object can be used to train/apply PCA mdoels to dataset objects.
#'
#' @import struct
#' @export PCA
PCA<-setClass(
"PCA",
contains=c('model','stato'),
slots=c(
# INPUTS
params.number_components='entity.stato',
# OUTPUTS
outputs.scores='entity',
outputs.loadings='data.frame',
outputs.eigenvalues='data.frame',
outputs.ssx='numeric',
outputs.correlation='data.frame',
outputs.that='dataset'
),
prototype = list(name='Principal Component Analysis (PCA)',
description='PCA is a multivariate data reduction technique. It summarises the data in a smaller number of Principal Components that describe the maximum variation present in the dataset.',
type="preprocessing",
params=c('number_components'),
outputs=c('scores',
'loadings',
'eigenvalues',
'ssx',
'correlation',
'that'
),
predicted='that',
stato.id="OBI:0200051",
params.number_components=entity.stato(name='Number of PCs',
stato.id='STATO:0000555',
value=2,
type='numeric'
),
outputs.scores=entity('name'='PCA scores matrix',
'description'='A matrix of PCA scores where each column corresponds to a Principal Component')
)
)
#' @export
setMethod(f="model.train",
signature=c("PCA",'dataset'),
definition=function(M,D)
{
A=param.value(M,'number_components')
X=as.matrix(dataset.data(D))
model=svd(X,A,A)
if (A==1)
{
scores=model$u * model$d[A]
} else {
scores=model$u %*% diag(model$d[1:A])
}
scores=as.data.frame(scores)
rownames(scores)=rownames(X)
varnames=rep('A',1,A)
for (i in 1:A)
{
varnames[i]=paste0('PC',i)
}
colnames(scores)=varnames
output.value(M,'scores')=scores
P=as.data.frame(model$v)
rownames(P)=colnames(X)
colnames(P)=varnames
output.value(M,'loadings')=P
E=data.frame('Eigenvalues'=sqrt(model$d[1:A]))
rownames(E)=varnames
output.value(M,'eigenvalues')=E
output.value(M,'ssx')=sum(X*X) # sum of squares of x
output.value(M,'correlation')=as.data.frame(cor(X,scores))
return(M)
}
)
#' @export
setMethod(f="model.predict",
signature=c("PCA",'dataset'),
definition=function(M,D)
{
A=param.value(M,'number_components')
X=as.matrix(dataset.data(D))
P=output.value(M,'loadings')
that=X%*%as.matrix(P)
that=as.data.frame(that)
rownames(that)=rownames(X)
varnames=rep('A',1,A)
for (i in 1:A)
{
varnames[i]=paste0('PC',i)
}
colnames(that)=varnames
# convert to dataset for preprocessing output
S=D
dataset.data(S)=that
output.value(M,'that')=S
return(M)
}
)
|