Browse code

Added manual for poplin_reduce.

Jaehyun Joo authored on 30/11/2021 03:11:39
Showing 5 changed files

... ...
@@ -1,11 +1,36 @@
1 1
 ##' Dimension reduction methods
2 2
 ##'
3
+##' In metabolomics, dimension reduction methods are often used for modeling
4
+##' and visualization.
5
+##' [poplin_reduce] is a wrapper for the following set of functions:
6
+##' \describe{
7
+##' \item{\code{\link{poplin_reduce_pca}}:}{
8
+##' principal component analysis (PCA)
9
+##' }
10
+##' \item{\code{\link{poplin_reduce_plsda}}:}{
11
+##' partial least squares-discriminant analysis (PLS-DA)
12
+##' }
13
+##' \item{\code{\link{poplin_reduce_tsne}}:}{
14
+##' t-distributed stochastic neighbor embedding
15
+##' }
16
+##' }
17
+##' @param x A matrix or \linkS4class{poplin} object.
18
+##' @param method A dimension reduction method. Default is 'pca'.
19
+##' @param poplin_in Name of a data matrix to retrieve.
20
+##' @param poplin_out Name of a data matrix to store.
21
+##' @param y A factor vector for discrete outcome required for PLS-DA. Ignored
22
+##'   otherwise.
23
+##' @param ncomp Output dimensionality.
24
+##' @param ... Argument passed to a specific dimension reduction method.
25
+##' @return A matrix or \linkS4class{poplin} object with the same number of rows
26
+##'   as \code{ncol(x)} containing the dimension reduction result.
3 27
 ##' @name poplin_reduce
28
+##' @family data reduction methods
4 29
 setMethod(
5 30
   "poplin_reduce",
6 31
   "matrix",
7
-  function(x, method, ...) {
8
-    .poplin_reduce(x, method = method, ...)
32
+  function(x, method = c("pca", "tsne", "plsda"), y, ncomp = 2, ...) {
33
+    .poplin_reduce(x, method = method, y = y, ncomp = ncomp, ...)
9 34
   }
10 35
 )
11 36
 
... ...
@@ -13,67 +38,154 @@ setMethod(
13 38
 setMethod(
14 39
   "poplin_reduce",
15 40
   "poplin",
16
-  function(x, method, poplin_in, poplin_out, ...) {
41
+  function(x, method = c("pca", "tsne", "plsda"), poplin_in, poplin_out,
42
+           y, ncomp = 2, ...) {
17 43
     m <- .verify_and_extract_input(x, poplin_in)
18
-    poplin_reduced(x, poplin_out) <- .poplin_reduce(m, method = method, ...)
44
+    poplin_reduced(x, poplin_out) <- .poplin_reduce(m, method = method, y = y,
45
+                                                    ncomp = ncomp, ...)
19 46
     x
20 47
   }
21 48
 )
22 49
 
23
-##' @rdname poplin_reduce
50
+##' Principal component analysis (PCA)
51
+##'
52
+##' Apply PCA to a matrix or \linkS4class{poplin} object. For the data without
53
+##' missing values, PCA is performed via a singular value decomposition.
54
+##' Otherwise, Bayesian PCA is performed using [pcaMethods::bpca] from the
55
+##' \pkg{pcaMethods} package. Note that Bayesian PCA does not force
56
+##' orthogonality between factor loadings.
57
+##'
58
+##' @references
59
+##' Shigeyuki Oba, Masa-aki Sato, Ichiro Takemasa, Morito Monden, Ken-ichi
60
+##' Matsubara, Shin Ishii, A Bayesian missing value estimation method for gene
61
+##' expression profile data, Bioinformatics, Volume 19, Issue 16, 1 November
62
+##' 2003, Pages 2088–2096, https://doi.org/10.1093/bioinformatics/btg287
63
+##'
64
+##' @param x A matrix or \linkS4class{poplin} object.
65
+##' @param poplin_in Name of a data matrix to retrieve.
66
+##' @param poplin_out Name of a data matrix to store.
67
+##' @param ncomp Output dimensionality.
68
+##' @param center A logical indicating mean-centering prior to PCA.
69
+##' @param scale A logical indicating unit variance scaling prior to PCA.
70
+##' @param ... Additional arguments passed to [pcaMethods::bpca].
71
+##' @return A poplin.pca matrix or \linkS4class{poplin} object with the same
72
+##'   number of rows as \code{ncol(x)} containing the dimension reduction
73
+##'   result.
74
+##' @name poplin_reduce_pca
75
+##' @family data reduction methods
24 76
 setMethod(
25 77
   "poplin_reduce_pca",
26 78
   "matrix",
27
-  function(x, ...) {
28
-    .poplin_impute_pca(x, ...)
79
+  function(x, ncomp = 2, center = TRUE, scale = FALSE, ...) {
80
+    .poplin_impute_pca(x, ncomp = ncomp, center = center, scale = scale, ...)
29 81
   }
30 82
 )
31 83
 
32
-##' @rdname poplin_reduce
84
+##' @rdname poplin_reduce_pca
33 85
 setMethod(
34 86
   "poplin_reduce_pca",
35 87
   "poplin",
36
-  function(x, poplin_in, poplin_out, ...) {
88
+  function(x, poplin_in, poplin_out, ncomp = 2, center = 2, scale = FALSE, ...) {
37 89
     .reduced_extract_and_assign(x, .poplin_reduce_pca,
38
-                               poplin_in, poplin_out, ...)
90
+                                poplin_in, poplin_out,
91
+                                ncomp = ncomp, center = center, scale = scale, ...)
39 92
   }
40 93
 )
41 94
 
42
-##' @rdname poplin_reduce
95
+##' t-distributed stochastic neighbor embedding (t-SNE)
96
+##'
97
+##' Apply t-SNE to a matrix or \linkS4class{poplin} object. This is an interface
98
+##' to the [Rtsne::Rtsne] from the \pkg{Rtsne} package. t-SNE is well-suited for
99
+##' visualizing high-dimensional data by giving each data point a location in a
100
+##' two or three-dimensional map.
101
+##'
102
+##' @references
103
+##'
104
+##' L.J.P. van der Maaten and G.E. Hinton. Visualizing High-Dimensional Data
105
+##' Using t-SNE. Journal of Machine Learning Research 9(Nov):2579-2605, 2008.
106
+##'
107
+##' L.J.P. van der Maaten. Accelerating t-SNE using Tree-Based Algorithms.
108
+##' Journal of Machine Learning Research 15(Oct):3221-3245, 2014.
109
+##'
110
+##' Jesse H. Krijthe (2015). Rtsne: T-Distributed Stochastic Neighbor Embedding
111
+##' using a Barnes-Hut Implementation, URL: https://github.com/jkrijthe/Rtsne
112
+##' 
113
+##' @param x A matrix or \linkS4class{poplin} object.
114
+##' @param poplin_in Name of a data matrix to retrieve.
115
+##' @param poplin_out Name of a data matrix to store.
116
+##' @param ncomp Number of components to calculate.
117
+##' @param normalize if \code{TRUE}, an input matrix is mean-centered and scaled
118
+##'   so that the largest absolute of the centered matrix is equal to unity. See
119
+##'   [Rtsne::normalize_input] for details.
120
+##' @param ... Additional argument passed to [Rtsne::Rtsne].
121
+##' @return A poplin.tsne matrix or \linkS4class{poplin} object with the same
122
+##'   number of rows as \code{ncol(x)} containing the dimension reduction
123
+##'   result.
124
+##' @name poplin_reduce_tsne
125
+##' @family data reduction methods
43 126
 setMethod(
44 127
   "poplin_reduce_tsne",
45 128
   "matrix",
46
-  function(x, ...) {
47
-    .poplin_impute_tsne(x, ...)
129
+  function(x, ncomp = 2, normalize = TRUE, ...) {
130
+    .poplin_impute_tsne(x, ncomp = ncomp, normalize = normalize, ...)
48 131
   }
49 132
 )
50 133
 
51
-##' @rdname poplin_reduce
134
+##' @rdname poplin_reduce_tsne
52 135
 setMethod(
53 136
   "poplin_reduce_tsne",
54 137
   "poplin",
55
-  function(x, poplin_in, poplin_out, ...) {
138
+  function(x, poplin_in, poplin_out, ncomp = 2, normalize = TRUE, ...) {
56 139
     .reduced_extract_and_assign(x, .poplin_reduce_tsne,
57
-                                poplin_in, poplin_out, ...)
140
+                                poplin_in, poplin_out,
141
+                                ncomp = ncomp, normalize = normalize, ...)
58 142
   }
59 143
 )
60 144
 
61
-##' @rdname poplin_reduce
145
+##' Partial least squares-discriminant analysis (PLS-DA)
146
+##'
147
+##' Apply PLS-DA to a matrix or \linkS4class{poplin} object. It performs
148
+##' standard PLS for classification using [pls::plsr]. If the \pkg{pls} is not
149
+##' installed, this function will stop with a note about install the package.
150
+##'
151
+##' @references
152
+##'  Kristian Hovde Liland, Bjørn-Helge Mevik and Ron Wehrens (2021). pls:
153
+##'  Partial Least Squares and Principal Component Regression. R package version
154
+##'  2.8-0. https://CRAN.R-project.org/package=pls
155
+##' 
156
+##' @param x A matrix or \linkS4class{poplin} object.
157
+##' @param method A dimension reduction method. Default is 'pca'.
158
+##' @param poplin_in Name of a data matrix to retrieve.
159
+##' @param poplin_out Name of a data matrix to store.
160
+##' @param y A factor vector for discrete outcome. 
161
+##' @param ncomp Output dimensionality.
162
+##' @param center A logical indicating mean-centering prior to PLS-DA.
163
+##' @param scale A logical indicating unit variance scaling prior to PLS-DA.
164
+##' @param ... Additional argument passed to [pls::plsr].
165
+##' @return A poplin.plsda matrix or \linkS4class{poplin} object with the same
166
+##'   number of rows as \code{ncol(x)} containing the dimension reduction
167
+##'   result.
168
+##' @name poplin_reduce_plsda
169
+##' @family data reduction methods
62 170
 setMethod(
63 171
   "poplin_reduce_plsda",
64 172
   "matrix",
65
-  function(x, ...) {
66
-    .poplin_impute_plsda(x, ...)
173
+  function(x, y, ncomp = 2, center = TRUE, scale = FALSE, ...) {
174
+    .poplin_impute_plsda(x, y = y, ncomp = ncomp,
175
+                         center = center, scale = scale, ...)
67 176
   }
68 177
 )
69 178
 
70
-##' @rdname poplin_reduce
179
+##' @rdname poplin_reduce_plsda
71 180
 setMethod(
72 181
   "poplin_reduce_plsda",
73 182
   "poplin",
74
-  function(x, poplin_in, poplin_out, ...) {
183
+  function(x, poplin_in, poplin_out, y,
184
+           ncomp = 2, center = TRUE, scale = FALSE, ...) {
75 185
     .reduced_extract_and_assign(x, .poplin_reduce_plsda,
76
-                                poplin_in, poplin_out, ...)
186
+                                poplin_in, poplin_out,
187
+                                y = y, ncomp = ncomp,
188
+                                center = center, scale = scale, ...)
77 189
   }
78 190
 )
79 191
 
... ...
@@ -3,30 +3,60 @@
3 3
 \name{poplin_reduce}
4 4
 \alias{poplin_reduce}
5 5
 \alias{poplin_reduce,poplin-method}
6
-\alias{poplin_reduce_pca,matrix-method}
7
-\alias{poplin_reduce_pca,poplin-method}
8
-\alias{poplin_reduce_tsne,matrix-method}
9
-\alias{poplin_reduce_tsne,poplin-method}
10
-\alias{poplin_reduce_plsda,matrix-method}
11
-\alias{poplin_reduce_plsda,poplin-method}
12 6
 \title{Dimension reduction methods}
13 7
 \usage{
14
-\S4method{poplin_reduce}{matrix}(x, method, ...)
8
+\S4method{poplin_reduce}{matrix}(x, method = c("pca", "tsne", "plsda"), y, ncomp = 2, ...)
15 9
 
16
-\S4method{poplin_reduce}{poplin}(x, method, poplin_in, poplin_out, ...)
10
+\S4method{poplin_reduce}{poplin}(
11
+  x,
12
+  method = c("pca", "tsne", "plsda"),
13
+  poplin_in,
14
+  poplin_out,
15
+  y,
16
+  ncomp = 2,
17
+  ...
18
+)
19
+}
20
+\arguments{
21
+\item{x}{A matrix or \linkS4class{poplin} object.}
17 22
 
18
-\S4method{poplin_reduce_pca}{matrix}(x, ...)
23
+\item{method}{A dimension reduction method. Default is 'pca'.}
19 24
 
20
-\S4method{poplin_reduce_pca}{poplin}(x, poplin_in, poplin_out, ...)
25
+\item{y}{A factor vector for discrete outcome required for PLS-DA. Ignored
26
+otherwise.}
21 27
 
22
-\S4method{poplin_reduce_tsne}{matrix}(x, ...)
28
+\item{ncomp}{Output dimensionality.}
23 29
 
24
-\S4method{poplin_reduce_tsne}{poplin}(x, poplin_in, poplin_out, ...)
30
+\item{...}{Argument passed to a specific dimension reduction method.}
25 31
 
26
-\S4method{poplin_reduce_plsda}{matrix}(x, ...)
32
+\item{poplin_in}{Name of a data matrix to retrieve.}
27 33
 
28
-\S4method{poplin_reduce_plsda}{poplin}(x, poplin_in, poplin_out, ...)
34
+\item{poplin_out}{Name of a data matrix to store.}
35
+}
36
+\value{
37
+A matrix or \linkS4class{poplin} object with the same number of rows
38
+as \code{ncol(x)} containing the dimension reduction result.
29 39
 }
30 40
 \description{
31
-Dimension reduction methods
41
+In metabolomics, dimension reduction methods are often used for modeling
42
+and visualization.
43
+\link{poplin_reduce} is a wrapper for the following set of functions:
44
+\describe{
45
+\item{\code{\link{poplin_reduce_pca}}:}{
46
+principal component analysis (PCA)
47
+}
48
+\item{\code{\link{poplin_reduce_plsda}}:}{
49
+partial least squares-discriminant analysis (PLS-DA)
50
+}
51
+\item{\code{\link{poplin_reduce_tsne}}:}{
52
+t-distributed stochastic neighbor embedding
53
+}
54
+}
55
+}
56
+\seealso{
57
+Other data reduction methods: 
58
+\code{\link{poplin_reduce_pca}()},
59
+\code{\link{poplin_reduce_plsda}()},
60
+\code{\link{poplin_reduce_tsne}()}
32 61
 }
62
+\concept{data reduction methods}
33 63
new file mode 100644
... ...
@@ -0,0 +1,59 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/reduction-methods.R
3
+\name{poplin_reduce_pca}
4
+\alias{poplin_reduce_pca}
5
+\alias{poplin_reduce_pca,poplin-method}
6
+\title{Principal component analysis (PCA)}
7
+\usage{
8
+\S4method{poplin_reduce_pca}{matrix}(x, ncomp = 2, center = TRUE, scale = FALSE, ...)
9
+
10
+\S4method{poplin_reduce_pca}{poplin}(
11
+  x,
12
+  poplin_in,
13
+  poplin_out,
14
+  ncomp = 2,
15
+  center = 2,
16
+  scale = FALSE,
17
+  ...
18
+)
19
+}
20
+\arguments{
21
+\item{x}{A matrix or \linkS4class{poplin} object.}
22
+
23
+\item{ncomp}{Output dimensionality.}
24
+
25
+\item{center}{A logical indicating mean-centering prior to PCA.}
26
+
27
+\item{scale}{A logical indicating unit variance scaling prior to PCA.}
28
+
29
+\item{...}{Additional arguments passed to \link[pcaMethods:bpca]{pcaMethods::bpca}.}
30
+
31
+\item{poplin_in}{Name of a data matrix to retrieve.}
32
+
33
+\item{poplin_out}{Name of a data matrix to store.}
34
+}
35
+\value{
36
+A poplin.pca matrix or \linkS4class{poplin} object with the same
37
+number of rows as \code{ncol(x)} containing the dimension reduction
38
+result.
39
+}
40
+\description{
41
+Apply PCA to a matrix or \linkS4class{poplin} object. For the data without
42
+missing values, PCA is performed via a singular value decomposition.
43
+Otherwise, Bayesian PCA is performed using \link[pcaMethods:bpca]{pcaMethods::bpca} from the
44
+\pkg{pcaMethods} package. Note that Bayesian PCA does not force
45
+orthogonality between factor loadings.
46
+}
47
+\references{
48
+Shigeyuki Oba, Masa-aki Sato, Ichiro Takemasa, Morito Monden, Ken-ichi
49
+Matsubara, Shin Ishii, A Bayesian missing value estimation method for gene
50
+expression profile data, Bioinformatics, Volume 19, Issue 16, 1 November
51
+2003, Pages 2088–2096, https://doi.org/10.1093/bioinformatics/btg287
52
+}
53
+\seealso{
54
+Other data reduction methods: 
55
+\code{\link{poplin_reduce_plsda}()},
56
+\code{\link{poplin_reduce_tsne}()},
57
+\code{\link{poplin_reduce}()}
58
+}
59
+\concept{data reduction methods}
0 60
new file mode 100644
... ...
@@ -0,0 +1,61 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/reduction-methods.R
3
+\name{poplin_reduce_plsda}
4
+\alias{poplin_reduce_plsda}
5
+\alias{poplin_reduce_plsda,poplin-method}
6
+\title{Partial least squares-discriminant analysis (PLS-DA)}
7
+\usage{
8
+\S4method{poplin_reduce_plsda}{matrix}(x, y, ncomp = 2, center = TRUE, scale = FALSE, ...)
9
+
10
+\S4method{poplin_reduce_plsda}{poplin}(
11
+  x,
12
+  poplin_in,
13
+  poplin_out,
14
+  y,
15
+  ncomp = 2,
16
+  center = TRUE,
17
+  scale = FALSE,
18
+  ...
19
+)
20
+}
21
+\arguments{
22
+\item{x}{A matrix or \linkS4class{poplin} object.}
23
+
24
+\item{y}{A factor vector for discrete outcome.}
25
+
26
+\item{ncomp}{Output dimensionality.}
27
+
28
+\item{center}{A logical indicating mean-centering prior to PLS-DA.}
29
+
30
+\item{scale}{A logical indicating unit variance scaling prior to PLS-DA.}
31
+
32
+\item{...}{Additional argument passed to \link[pls:mvr]{pls::plsr}.}
33
+
34
+\item{poplin_in}{Name of a data matrix to retrieve.}
35
+
36
+\item{poplin_out}{Name of a data matrix to store.}
37
+
38
+\item{method}{A dimension reduction method. Default is 'pca'.}
39
+}
40
+\value{
41
+A poplin.plsda matrix or \linkS4class{poplin} object with the same
42
+number of rows as \code{ncol(x)} containing the dimension reduction
43
+result.
44
+}
45
+\description{
46
+Apply PLS-DA to a matrix or \linkS4class{poplin} object. It performs
47
+standard PLS for classification using \link[pls:mvr]{pls::plsr}. If the \pkg{pls} is not
48
+installed, this function will stop with a note about install the package.
49
+}
50
+\references{
51
+Kristian Hovde Liland, Bjørn-Helge Mevik and Ron Wehrens (2021). pls:
52
+Partial Least Squares and Principal Component Regression. R package version
53
+2.8-0. https://CRAN.R-project.org/package=pls
54
+}
55
+\seealso{
56
+Other data reduction methods: 
57
+\code{\link{poplin_reduce_pca}()},
58
+\code{\link{poplin_reduce_tsne}()},
59
+\code{\link{poplin_reduce}()}
60
+}
61
+\concept{data reduction methods}
0 62
new file mode 100644
... ...
@@ -0,0 +1,54 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/reduction-methods.R
3
+\name{poplin_reduce_tsne}
4
+\alias{poplin_reduce_tsne}
5
+\alias{poplin_reduce_tsne,poplin-method}
6
+\title{t-distributed stochastic neighbor embedding (t-SNE)}
7
+\usage{
8
+\S4method{poplin_reduce_tsne}{matrix}(x, ncomp = 2, normalize = TRUE, ...)
9
+
10
+\S4method{poplin_reduce_tsne}{poplin}(x, poplin_in, poplin_out, ncomp = 2, normalize = TRUE, ...)
11
+}
12
+\arguments{
13
+\item{x}{A matrix or \linkS4class{poplin} object.}
14
+
15
+\item{ncomp}{Number of components to calculate.}
16
+
17
+\item{normalize}{if \code{TRUE}, an input matrix is mean-centered and scaled
18
+so that the largest absolute of the centered matrix is equal to unity. See
19
+\link[Rtsne:normalize_input]{Rtsne::normalize_input} for details.}
20
+
21
+\item{...}{Additional argument passed to \link[Rtsne:Rtsne]{Rtsne::Rtsne}.}
22
+
23
+\item{poplin_in}{Name of a data matrix to retrieve.}
24
+
25
+\item{poplin_out}{Name of a data matrix to store.}
26
+}
27
+\value{
28
+A poplin.tsne matrix or \linkS4class{poplin} object with the same
29
+number of rows as \code{ncol(x)} containing the dimension reduction
30
+result.
31
+}
32
+\description{
33
+Apply t-SNE to a matrix or \linkS4class{poplin} object. This is an interface
34
+to the \link[Rtsne:Rtsne]{Rtsne::Rtsne} from the \pkg{Rtsne} package. t-SNE is well-suited for
35
+visualizing high-dimensional data by giving each data point a location in a
36
+two or three-dimensional map.
37
+}
38
+\references{
39
+L.J.P. van der Maaten and G.E. Hinton. Visualizing High-Dimensional Data
40
+Using t-SNE. Journal of Machine Learning Research 9(Nov):2579-2605, 2008.
41
+
42
+L.J.P. van der Maaten. Accelerating t-SNE using Tree-Based Algorithms.
43
+Journal of Machine Learning Research 15(Oct):3221-3245, 2014.
44
+
45
+Jesse H. Krijthe (2015). Rtsne: T-Distributed Stochastic Neighbor Embedding
46
+using a Barnes-Hut Implementation, URL: https://github.com/jkrijthe/Rtsne
47
+}
48
+\seealso{
49
+Other data reduction methods: 
50
+\code{\link{poplin_reduce_pca}()},
51
+\code{\link{poplin_reduce_plsda}()},
52
+\code{\link{poplin_reduce}()}
53
+}
54
+\concept{data reduction methods}