883f7f27 |
#' PCA model class
#'
|
8db793ae |
#' Principal Component Analysis (PCA) model class. This object can be used to train/apply PCA mdoels to DatasetExperiment objects.
|
883f7f27 |
#'
#' @import struct
|
a110b7e7 |
#' @param number_components The number of principal components to retain
|
9bf15429 |
#' @param ... additional slots and values passed to struct_class
|
7af536f9 |
#' @return struct object
|
883f7f27 |
#' @export PCA
|
1a7cd70b |
#' @examples
#' M = PCA()
|
acec9d54 |
PCA = function(number_components=2,...) {
|
a110b7e7 |
out=struct::new_struct('PCA',
number_components=number_components,
...)
|
8db793ae |
return(out)
}
.PCA<-setClass(
|
c34a3c53 |
"PCA",
contains=c('model','stato'),
slots=c(
# INPUTS
|
b3b2ba0e |
number_components='entity_stato',
|
c34a3c53 |
# OUTPUTS
|
b3b2ba0e |
scores='entity',
loadings='data.frame',
eigenvalues='data.frame',
ssx='numeric',
correlation='data.frame',
that='DatasetExperiment'
|
a110b7e7 |
|
c34a3c53 |
),
prototype = list(name='Principal Component Analysis (PCA)',
|
8db793ae |
description='PCA is a multivariate data reduction technique. It summarises the data in a smaller number of Principal Components that describe the maximum variation present in the DatasetExperiment.',
|
c34a3c53 |
type="preprocessing",
predicted='that',
|
8db793ae |
stato_id="OBI:0200051",
|
a110b7e7 |
.params=c('number_components'),
.outputs=c('scores','loadings','eigenvalues','ssx','correlation','that'),
|
c34a3c53 |
|
b3b2ba0e |
number_components=entity_stato(name='Number of PCs',
|
8db793ae |
stato_id='STATO:0000555',
|
c34a3c53 |
value=2,
|
82784e60 |
type=c('numeric','integer')
|
c34a3c53 |
),
|
b3b2ba0e |
scores=entity('name'='PCA scores DatasetExperiment',
|
c2786a86 |
'description'='A matrix of PCA scores where each column corresponds to a Principal Component',
|
8db793ae |
'type'='DatasetExperiment')
|
c34a3c53 |
)
|
883f7f27 |
)
#' @export
|
5d21f82d |
#' @template model_train
|
8db793ae |
setMethod(f="model_train",
signature=c("PCA",'DatasetExperiment'),
|
c34a3c53 |
definition=function(M,D)
{
|
8db793ae |
A=param_value(M,'number_components')
X=as.matrix(D$data)
|
c34a3c53 |
model=svd(X,A,A)
if (A==1)
{
scores=model$u * model$d[A]
} else {
scores=model$u %*% diag(model$d[1:A])
}
scores=as.data.frame(scores)
rownames(scores)=rownames(X)
varnames=rep('A',1,A)
for (i in 1:A)
{
varnames[i]=paste0('PC',i)
}
colnames(scores)=varnames
|
c187e3e1 |
S=DatasetExperiment(data=scores,sample_meta=D$sample_meta,variable_meta=varnames)
|
8db793ae |
output_value(M,'scores')=S
|
c34a3c53 |
P=as.data.frame(model$v)
rownames(P)=colnames(X)
colnames(P)=varnames
|
8db793ae |
output_value(M,'loadings')=P
|
c34a3c53 |
E=data.frame('Eigenvalues'=sqrt(model$d[1:A]))
rownames(E)=varnames
|
8db793ae |
output_value(M,'eigenvalues')=E
|
c34a3c53 |
|
8db793ae |
output_value(M,'ssx')=sum(X*X) # sum of squares of x
output_value(M,'correlation')=as.data.frame(cor(X,scores))
|
c34a3c53 |
return(M)
}
|
883f7f27 |
)
#' @export
|
28dfb678 |
#' @template model_predict
|
8db793ae |
setMethod(f="model_predict",
signature=c("PCA",'DatasetExperiment'),
|
c34a3c53 |
definition=function(M,D)
{
|
8db793ae |
A=param_value(M,'number_components')
X=as.matrix(D$data)
P=output_value(M,'loadings')
|
c34a3c53 |
that=X%*%as.matrix(P)
that=as.data.frame(that)
rownames(that)=rownames(X)
varnames=rep('A',1,A)
for (i in 1:A)
{
varnames[i]=paste0('PC',i)
}
colnames(that)=varnames
|
8db793ae |
# convert to DatasetExperiment for preprocessing output
|
c187e3e1 |
S=DatasetExperiment(data=that,sample_meta=D$sample_meta,variable_meta=varnames)
|
8db793ae |
output_value(M,'that')=S
|
c34a3c53 |
return(M)
}
|
883f7f27 |
)
|