R/PCA_class.R
883f7f27
 #' PCA model class
 #'
8db793ae
 #' Principal Component Analysis (PCA) model class. This object can be used to train/apply PCA mdoels to DatasetExperiment objects.
883f7f27
 #'
 #' @import struct
a110b7e7
 #' @param number_components The number of principal components to retain
9bf15429
 #' @param ... additional slots and values passed to struct_class
7af536f9
 #' @return struct object
883f7f27
 #' @export PCA
1a7cd70b
 #' @examples
 #' M = PCA()
acec9d54
 PCA = function(number_components=2,...) {
a110b7e7
     out=struct::new_struct('PCA',
         number_components=number_components,
         ...)
8db793ae
     return(out)
 }
 
 
 .PCA<-setClass(
c34a3c53
     "PCA",
     contains=c('model','stato'),
     slots=c(
         # INPUTS
b3b2ba0e
         number_components='entity_stato',
c34a3c53
 
         # OUTPUTS
b3b2ba0e
         scores='entity',
         loadings='data.frame',
         eigenvalues='data.frame',
         ssx='numeric',
         correlation='data.frame',
         that='DatasetExperiment'
a110b7e7
 
c34a3c53
     ),
     prototype = list(name='Principal Component Analysis (PCA)',
8db793ae
         description='PCA is a multivariate data reduction technique. It summarises the data in a smaller number of Principal Components that describe the maximum variation present in the DatasetExperiment.',
c34a3c53
         type="preprocessing",
         predicted='that',
8db793ae
         stato_id="OBI:0200051",
a110b7e7
         .params=c('number_components'),
         .outputs=c('scores','loadings','eigenvalues','ssx','correlation','that'),
c34a3c53
 
b3b2ba0e
         number_components=entity_stato(name='Number of PCs',
8db793ae
             stato_id='STATO:0000555',
c34a3c53
             value=2,
82784e60
             type=c('numeric','integer')
c34a3c53
         ),
b3b2ba0e
         scores=entity('name'='PCA scores DatasetExperiment',
c2786a86
             'description'='A matrix of PCA scores where each column corresponds to a Principal Component',
8db793ae
             'type'='DatasetExperiment')
c34a3c53
     )
883f7f27
 )
 
 #' @export
5d21f82d
 #' @template model_train
8db793ae
 setMethod(f="model_train",
     signature=c("PCA",'DatasetExperiment'),
c34a3c53
     definition=function(M,D)
     {
8db793ae
         A=param_value(M,'number_components')
         X=as.matrix(D$data)
c34a3c53
         model=svd(X,A,A)
         if (A==1)
         {
             scores=model$u * model$d[A]
         } else {
             scores=model$u %*% diag(model$d[1:A])
         }
 
         scores=as.data.frame(scores)
         rownames(scores)=rownames(X)
         varnames=rep('A',1,A)
         for (i in 1:A)
         {
             varnames[i]=paste0('PC',i)
         }
         colnames(scores)=varnames
c187e3e1
         S=DatasetExperiment(data=scores,sample_meta=D$sample_meta,variable_meta=varnames)
8db793ae
         output_value(M,'scores')=S
c34a3c53
 
         P=as.data.frame(model$v)
         rownames(P)=colnames(X)
         colnames(P)=varnames
8db793ae
         output_value(M,'loadings')=P
c34a3c53
 
         E=data.frame('Eigenvalues'=sqrt(model$d[1:A]))
         rownames(E)=varnames
8db793ae
         output_value(M,'eigenvalues')=E
c34a3c53
 
8db793ae
         output_value(M,'ssx')=sum(X*X) # sum of squares of x
         output_value(M,'correlation')=as.data.frame(cor(X,scores))
c34a3c53
 
         return(M)
     }
883f7f27
 )
 
 #' @export
28dfb678
 #' @template model_predict
8db793ae
 setMethod(f="model_predict",
     signature=c("PCA",'DatasetExperiment'),
c34a3c53
     definition=function(M,D)
     {
8db793ae
         A=param_value(M,'number_components')
         X=as.matrix(D$data)
         P=output_value(M,'loadings')
c34a3c53
         that=X%*%as.matrix(P)
 
         that=as.data.frame(that)
         rownames(that)=rownames(X)
         varnames=rep('A',1,A)
         for (i in 1:A)
         {
             varnames[i]=paste0('PC',i)
         }
         colnames(that)=varnames
 
8db793ae
         # convert to DatasetExperiment for preprocessing output
c187e3e1
         S=DatasetExperiment(data=that,sample_meta=D$sample_meta,variable_meta=varnames)
8db793ae
         output_value(M,'that')=S
c34a3c53
 
         return(M)
     }
883f7f27
 )