... | ... |
@@ -250,7 +250,7 @@ reduceTSNE <- function(x, ncomp = 2, normalize = TRUE, ...) { |
250 | 250 |
##' to PLS-DA. |
251 | 251 |
##' * `scaled`: A logical indicating whether the data was scaled prior to |
252 | 252 |
##' PLS-DA. |
253 |
-##' |
|
253 |
+##' * `validation`: Results of the validation if requested. |
|
254 | 254 |
##' |
255 | 255 |
##' @param x A matrix-like object. |
256 | 256 |
##' @param y A factor vector for the information about each sample's group. |
... | ... |
@@ -259,6 +259,11 @@ reduceTSNE <- function(x, ncomp = 2, normalize = TRUE, ...) { |
259 | 259 |
##' mean-centered prior to PLS-DA. |
260 | 260 |
##' @param scale A logical specifying whether the unit variance scaling needs to |
261 | 261 |
##' be performed on `x` prior to PLS-DA. |
262 |
+##' @param validation A string specifying a validation method to use. See |
|
263 |
+##' [pls::plsr] for the details. |
|
264 |
+##' @param return_mvr A logical indicating whether `mvr` object is returned. |
|
265 |
+##' It is recommended for a user to use utilities functions from the `pls` |
|
266 |
+##' package for the model fit. |
|
262 | 267 |
##' @param ... Additional arguments passed to [pls::plsr]. |
263 | 268 |
##' @return A reduced.plsda object with the same number of rows as |
264 | 269 |
##' \code{ncol(x)} containing the dimension reduction result. |
... | ... |
@@ -287,6 +292,7 @@ reduceTSNE <- function(x, ncomp = 2, normalize = TRUE, ...) { |
287 | 292 |
## |
288 | 293 |
##' @export |
289 | 294 |
reducePLSDA <- function(x, y, ncomp = 2, center = TRUE, scale = FALSE, |
295 |
+ validation = c("none", "CV", "LOO"), return_mvr = FALSE, |
|
290 | 296 |
...) { |
291 | 297 |
.verify_package("pls") |
292 | 298 |
if (!is.matrix(x)) { |
... | ... |
@@ -307,8 +313,11 @@ reducePLSDA <- function(x, y, ncomp = 2, center = TRUE, scale = FALSE, |
307 | 313 |
colnames(y_dummy) <- gsub("^y", "", colnames(y_dummy)) |
308 | 314 |
|
309 | 315 |
d <- data.frame(y = I(y_dummy), x = I(xt), row.names = colnames(x)) |
310 |
- fit <- pls::plsr(y ~ x, data = d, ncomp = ncomp, |
|
311 |
- center = center, scale = scale, ...) |
|
316 |
+ fit <- pls::plsr(y ~ x, data = d, ncomp = ncomp, center = center, |
|
317 |
+ scale = scale, validation = validation, ...) |
|
318 |
+ if (return_mvr) { |
|
319 |
+ return(fit) |
|
320 |
+ } |
|
312 | 321 |
out <- pls::scores(fit) |
313 | 322 |
pred_vals <- predict(fit, ncomp = fit$ncomp) |
314 | 323 |
y_predicted <- colnames(pred_vals)[apply(pred_vals, 1, which.max)] |
... | ... |
@@ -244,6 +244,8 @@ reduceTSNE <- function(x, ncomp = 2, normalize = TRUE, ...) { |
244 | 244 |
##' * `projection`: The projection matrix. |
245 | 245 |
##' * `fitted.values`: An array of fitted values. |
246 | 246 |
##' * `residuals`: An array of regression residuals. |
247 |
+##' * `vip`: An array of VIP (Variable Importance in the Projection) |
|
248 |
+##' coefficients. |
|
247 | 249 |
##' * `centered`: A logical indicating whether the data was mean-centered prior |
248 | 250 |
##' to PLS-DA. |
249 | 251 |
##' * `scaled`: A logical indicating whether the data was scaled prior to |
... | ... |
@@ -316,7 +318,7 @@ reducePLSDA <- function(x, y, ncomp = 2, center = TRUE, scale = FALSE, |
316 | 318 |
attr(out, "predictors") <- pls::prednames(fit) |
317 | 319 |
attr(out, "coefficients") <- fit$coefficients |
318 | 320 |
attr(out, "loadings") <- fit$loadings |
319 |
- attr(out, "loadings.weights") <- fit$loadings.weights |
|
321 |
+ attr(out, "loading.weights") <- fit$loading.weights |
|
320 | 322 |
attr(out, "Y.observed") <- y |
321 | 323 |
attr(out, "Y.predicted") <- y_predicted |
322 | 324 |
attr(out, "Y.scores") <- fit$Yscores |
... | ... |
@@ -325,9 +327,24 @@ reducePLSDA <- function(x, y, ncomp = 2, center = TRUE, scale = FALSE, |
325 | 327 |
attr(out, "fitted.values") <- fitted(fit) |
326 | 328 |
attr(out, "residuals") <- residuals(fit) |
327 | 329 |
attr(out, "ncomp") <- fit$ncomp |
330 |
+ attr(out, "vip") <- .vip_mat(fit) |
|
328 | 331 |
attr(out, "centered") <- center |
329 | 332 |
attr(out, "scaled") <- scale |
330 | 333 |
attr(out, "validation") <- fit$validation |
331 | 334 |
class(out) <- c("reduced.plsda", "matrix", class(out)) |
332 | 335 |
out |
333 | 336 |
} |
337 |
+ |
|
338 |
+.vip_mat <- function(fit) { |
|
339 |
+ ncomp <- fit$ncomp |
|
340 |
+ w <- fit$loading.weights |
|
341 |
+ m <- cor(fit$model$y, fit$scores, use = "pairwise")**2 |
|
342 |
+ vip <- do.call(cbind, lapply(1:ncomp, function(x) .vip_vec(w, m, x))) |
|
343 |
+ dimnames(vip) <- dimnames(w) |
|
344 |
+ vip |
|
345 |
+} |
|
346 |
+ |
|
347 |
+.vip_vec <- function(w, m, comp) { |
|
348 |
+ rd <- colSums(m[, 1:comp, drop = FALSE]) |
|
349 |
+ as.vector(sqrt(nrow(w) * rd %*% t(w[, 1:comp]^2) / sum(rd))) |
|
350 |
+} |
... | ... |
@@ -78,15 +78,15 @@ reducePCA <- function(x, ncomp = 2, center = TRUE, scale = FALSE, ...) { |
78 | 78 |
out <- .pca_svd(xt, ncomp = ncomp) |
79 | 79 |
} else { |
80 | 80 |
miss_pct <- 100 * sum(is.na(x)) / prod(dim(x)) |
81 |
- cat("Missing value(s) in 'x'.\n") |
|
82 |
- cat(format(miss_pct, digits = 2), "% of values are missing. ") |
|
83 |
- cat("Please consider missing value imputation.\n") |
|
81 |
+ message("Missing value(s) in 'x'.\n") |
|
82 |
+ message(format(miss_pct, digits = 2), "% of values are missing. ") |
|
83 |
+ message("Please consider missing value imputation.\n") |
|
84 | 84 |
if (!requireNamespace("pcaMethods", quietly = TRUE)) { |
85 | 85 |
stop("Package 'pcaMethods' is required to perform ", |
86 | 86 |
"PCA with missing values. ", |
87 | 87 |
"Please install and try again or impute missing values.") |
88 | 88 |
} else { |
89 |
- cat("Performing NIPALS PCA...\n") |
|
89 |
+ message("Performing NIPALS PCA...\n") |
|
90 | 90 |
out <- .pca_nipals(xt, ncomp = ncomp) |
91 | 91 |
} |
92 | 92 |
} |
... | ... |
@@ -53,6 +53,8 @@ |
53 | 53 |
##' |
54 | 54 |
##' @examples |
55 | 55 |
##' |
56 |
+##' data(faahko_se) |
|
57 |
+##' |
|
56 | 58 |
##' m <- assay(faahko_se, "knn_vsn") |
57 | 59 |
##' res <- reducePCA(m, ncomp = 3) |
58 | 60 |
##' summary(res) |
... | ... |
@@ -174,6 +176,8 @@ reducePCA <- function(x, ncomp = 2, center = TRUE, scale = FALSE, ...) { |
174 | 176 |
##' |
175 | 177 |
##' @examples |
176 | 178 |
##' |
179 |
+##' data(faahko_se) |
|
180 |
+##' |
|
177 | 181 |
##' m <- assay(faahko_se, "knn_vsn") |
178 | 182 |
##' res <- reduceTSNE(m, perplexity = 3) |
179 | 183 |
##' summary(res) |
... | ... |
@@ -273,6 +277,8 @@ reduceTSNE <- function(x, ncomp = 2, normalize = TRUE, ...) { |
273 | 277 |
##' |
274 | 278 |
##' @examples |
275 | 279 |
##' |
280 |
+##' data(faahko_se) |
|
281 |
+##' |
|
276 | 282 |
##' m <- assay(faahko_se, "knn_vsn") |
277 | 283 |
##' y <- factor(colData(faahko_se)$sample_group) |
278 | 284 |
##' res <- reducePLSDA(m, y = y) |
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,328 @@ |
1 |
+##' Principal component analysis (PCA) |
|
2 |
+##' |
|
3 |
+##' Performs PCA on a matrix-like object where rows represent features and |
|
4 |
+##' columns represents samples. |
|
5 |
+##' |
|
6 |
+##' For the data without missing values, PCA is performed with the transpose of |
|
7 |
+##' `x` via singular value decomposition. Otherwise, PCA is performed with the |
|
8 |
+##' transpose of `x` using the non-linear iterative partial least squares |
|
9 |
+##' (NIPALS) algorithm via the [pcaMethods::nipalsPca]. The function returns a |
|
10 |
+##' `reduced.pca` object that is a matrix with custom attributes to summarize |
|
11 |
+##' (via [summary]) and visualize (via [plotReduced]) the PCA result. The custom |
|
12 |
+##' attributes include the following: |
|
13 |
+##' |
|
14 |
+##' * `method`: The method used to reduce the dimension of data. |
|
15 |
+##' * `ncomp`: The number of components extracted. |
|
16 |
+##' * `R2`: A vector indicating the amount of variance explained by each |
|
17 |
+##' principal component. |
|
18 |
+##' * `R2cum`: A vector of cumulative R2. |
|
19 |
+##' * `loadings`: A matrix of variable loadings. |
|
20 |
+##' * `sdev`: A vector indicating the standard deviations of the principal |
|
21 |
+##' components. |
|
22 |
+##' * `centered`: A logical indicating whether the data was mean-centered |
|
23 |
+##' prior to PCA. |
|
24 |
+##' * `scaled`: A logical indicating whether the data was scaled prior to PCA. |
|
25 |
+##' |
|
26 |
+##' @param x A matrix-like object. |
|
27 |
+##' @param ncomp An integer specifying the number of components to extract. |
|
28 |
+##' @param center A logical specifying whether `x` needs to be mean-centered |
|
29 |
+##' prior to PCA. |
|
30 |
+##' @param scale A logical specifying whether the unit variance scaling needs to |
|
31 |
+##' be performed on `x` prior to PCA. |
|
32 |
+##' @param ... Additional arguments passed to [pcaMethods::nipalsPca]. Ignored |
|
33 |
+##' if `x` has no missing values. |
|
34 |
+##' @return A reduced.pca object with the same number of rows as \code{ncol(x)} |
|
35 |
+##' containing the dimension reduction result. |
|
36 |
+##' |
|
37 |
+##' @references |
|
38 |
+##' |
|
39 |
+##' Wold, H. (1966). Estimation of principal components and related models by |
|
40 |
+##' iterative least squares. In P. R. Krishnajah (Ed.), Multivariate analysis |
|
41 |
+##' (pp. 391-420). NewYork: Academic Press. |
|
42 |
+##' |
|
43 |
+##' Stacklies, W., Redestig, H., Scholz, M., Walther, D. and Selbig, J. |
|
44 |
+##' pcaMethods -- a Bioconductor package providing PCA methods for incomplete |
|
45 |
+##' data. Bioinformatics, 2007, 23, 1164-1167 |
|
46 |
+##' |
|
47 |
+##' @seealso See [reduceFeatures] that provides a |
|
48 |
+##' \linkS4class{SummarizedExperiment}-friendly wrapper for this function. |
|
49 |
+##' |
|
50 |
+##' See [plotReduced] for visualization. |
|
51 |
+##' |
|
52 |
+##' See [pcaMethods::nipalsPca] for the underlying function that does the work. |
|
53 |
+##' |
|
54 |
+##' @examples |
|
55 |
+##' |
|
56 |
+##' m <- assay(faahko_se, "knn_vsn") |
|
57 |
+##' res <- reducePCA(m, ncomp = 3) |
|
58 |
+##' summary(res) |
|
59 |
+##' |
|
60 |
+##' @export |
|
61 |
+reducePCA <- function(x, ncomp = 2, center = TRUE, scale = FALSE, ...) { |
|
62 |
+ if (!is.matrix(x)) { |
|
63 |
+ x <- as.matrix(x) |
|
64 |
+ } |
|
65 |
+ if (ncomp > min(dim(x))) { |
|
66 |
+ stop("'ncomp' must be <= min(dim(x))") |
|
67 |
+ } |
|
68 |
+ xt <- t(x) # transpose matrix; |
|
69 |
+ if (center || scale) { |
|
70 |
+ xt <- scale(xt, center = center, scale = scale) |
|
71 |
+ } |
|
72 |
+ if (any(is.infinite(x))) { |
|
73 |
+ stop("Infinite value(s) in 'x'.") |
|
74 |
+ } |
|
75 |
+ if (!anyNA(x)) { |
|
76 |
+ out <- .pca_svd(xt, ncomp = ncomp) |
|
77 |
+ } else { |
|
78 |
+ miss_pct <- 100 * sum(is.na(x)) / prod(dim(x)) |
|
79 |
+ cat("Missing value(s) in 'x'.\n") |
|
80 |
+ cat(format(miss_pct, digits = 2), "% of values are missing. ") |
|
81 |
+ cat("Please consider missing value imputation.\n") |
|
82 |
+ if (!requireNamespace("pcaMethods", quietly = TRUE)) { |
|
83 |
+ stop("Package 'pcaMethods' is required to perform ", |
|
84 |
+ "PCA with missing values. ", |
|
85 |
+ "Please install and try again or impute missing values.") |
|
86 |
+ } else { |
|
87 |
+ cat("Performing NIPALS PCA...\n") |
|
88 |
+ out <- .pca_nipals(xt, ncomp = ncomp) |
|
89 |
+ } |
|
90 |
+ } |
|
91 |
+ attr(out, "centered") <- center |
|
92 |
+ attr(out, "scaled") <- scale |
|
93 |
+ class(out) <- c("reduced.pca", class(out)) |
|
94 |
+ ## class(out) <- "reduced.pca" |
|
95 |
+ out |
|
96 |
+} |
|
97 |
+ |
|
98 |
+.pca_svd <- function(x, ncomp) { |
|
99 |
+ pc <- prcomp(x, center = FALSE, scale. = FALSE) |
|
100 |
+ imp <- summary(pc)$importance |
|
101 |
+ out <- pc$x[, seq_len(ncomp)] |
|
102 |
+ attr(out, "method") <- "PCA (SVD)" |
|
103 |
+ attr(out, "ncomp") <- ncomp |
|
104 |
+ attr(out, "R2") <- imp[2, seq_len(ncomp)] |
|
105 |
+ attr(out, "R2cum") <- imp[3, seq_len(ncomp)] |
|
106 |
+ attr(out, "loadings") <- pc$rotation[, seq_len(ncomp)] |
|
107 |
+ attr(out, "sdev") <- pc$sdev[seq_len(ncomp)] |
|
108 |
+ out |
|
109 |
+} |
|
110 |
+ |
|
111 |
+.pca_nipals <- function(x, ncomp, ...) { |
|
112 |
+ res <- pcaMethods::nipalsPca(Matrix = x, nPcs = ncomp, ...) |
|
113 |
+ out <- res@scores |
|
114 |
+ colnames(out) <- paste0("PC", seq_len(ncol(out))) |
|
115 |
+ rownames(out) <- rownames(x) |
|
116 |
+ colnames(res@loadings) <- paste0("PC", seq_len(ncol(out))) |
|
117 |
+ rownames(res@loadings) <- colnames(x) |
|
118 |
+ attr(out, "method") <- "PCA (NIPALS)" |
|
119 |
+ attr(out, "ncomp") <- ncomp |
|
120 |
+ attr(out, "R2") <- c(res@R2cum[1], diff(res@R2cum)) |
|
121 |
+ attr(out, "R2cum") <- res@R2cum |
|
122 |
+ attr(out, "loadings") <- res@loadings |
|
123 |
+ attr(out, "sdev") <- apply(res@scores, 2, sd) |
|
124 |
+ out |
|
125 |
+} |
|
126 |
+ |
|
127 |
+ |
|
128 |
+##' t-distributed stochastic neighbor embedding (t-SNE) |
|
129 |
+##' |
|
130 |
+##' Performs t-SNE on a matrix-like object where rows represent features and |
|
131 |
+##' columns represent samples. |
|
132 |
+##' |
|
133 |
+##' t-SNE is well-suited for visualizing high-dimensional data by giving each |
|
134 |
+##' data point a location in a two or three-dimensional map. This function |
|
135 |
+##' performs t-SNE with the transpose of `x` using [Rtsne::Rtsne] and returns a |
|
136 |
+##' `reduced.tsne` object that is a matrix with custom attributes to summarize |
|
137 |
+##' (via [summary]) and visualize (via [plotReduced]) the t-SNE result. The |
|
138 |
+##' custom attributes include the following: |
|
139 |
+##' |
|
140 |
+##' * `method`: The method used to reduce the dimension of data. |
|
141 |
+##' * `ncomp`: The number of components extracted. |
|
142 |
+##' * `perplexity`: The perplexity parameter used. |
|
143 |
+##' * `theta`: The speed/accuracy trade-off parameter used. |
|
144 |
+##' * `normalized`: A logical indicating whether the data was normalized prior |
|
145 |
+##' to t-SNE. |
|
146 |
+##' |
|
147 |
+##' @param x A matrix-like object. |
|
148 |
+##' @param ncomp A integer specifying the number of components to extract. Must |
|
149 |
+##' be either 1, 2, or 3. |
|
150 |
+##' @param normalize A logical specifying whether the input matrix is |
|
151 |
+##' mean-centered and scaled so that the largest absolute of the centered |
|
152 |
+##' matrix is equal to unity. See [Rtsne::normalize_input] for details. |
|
153 |
+##' @param ... Additional arguments passed to [Rtsne::Rtsne]. |
|
154 |
+##' @return A reduced.tsne object with the same number of rows as \code{ncol(x)} |
|
155 |
+##' containing the dimension reduction result. |
|
156 |
+##' |
|
157 |
+##' @references |
|
158 |
+##' |
|
159 |
+##' L.J.P. van der Maaten and G.E. Hinton. Visualizing High-Dimensional Data |
|
160 |
+##' Using t-SNE. Journal of Machine Learning Research 9(Nov):2579-2605, 2008. |
|
161 |
+##' |
|
162 |
+##' L.J.P. van der Maaten. Accelerating t-SNE using Tree-Based Algorithms. |
|
163 |
+##' Journal of Machine Learning Research 15(Oct):3221-3245, 2014. |
|
164 |
+##' |
|
165 |
+##' Jesse H. Krijthe (2015). Rtsne: T-Distributed Stochastic Neighbor Embedding |
|
166 |
+##' using a Barnes-Hut Implementation, URL: https://github.com/jkrijthe/Rtsne |
|
167 |
+##' |
|
168 |
+##' @seealso See [reduceFeatures] that provides a |
|
169 |
+##' \linkS4class{SummarizedExperiment}-friendly wrapper for this function. |
|
170 |
+##' |
|
171 |
+##' See [plotReduced] for visualization. |
|
172 |
+##' |
|
173 |
+##' See [Rtsne::Rtsne] for the underlying function that does the work. |
|
174 |
+##' |
|
175 |
+##' @examples |
|
176 |
+##' |
|
177 |
+##' m <- assay(faahko_se, "knn_vsn") |
|
178 |
+##' res <- reduceTSNE(m, perplexity = 3) |
|
179 |
+##' summary(res) |
|
180 |
+##' |
|
181 |
+##' @export |
|
182 |
+reduceTSNE <- function(x, ncomp = 2, normalize = TRUE, ...) { |
|
183 |
+ .verify_package("Rtsne") |
|
184 |
+ if (!is.matrix(x)) { |
|
185 |
+ x <- as.matrix(x) |
|
186 |
+ } |
|
187 |
+ if (any(!is.finite(x))) { |
|
188 |
+ stop("infinite or missing values in 'x'.") |
|
189 |
+ } |
|
190 |
+ xt <- t(x) # transpose matrix; |
|
191 |
+ if (normalize) { |
|
192 |
+ xt <- Rtsne::normalize_input(xt) |
|
193 |
+ } |
|
194 |
+ res <- Rtsne::Rtsne(xt, dims = ncomp, ...) |
|
195 |
+ out <- res$Y |
|
196 |
+ colnames(out) <- paste0("tSNE", seq_len(ncol(out))) |
|
197 |
+ rownames(out) <- colnames(x) |
|
198 |
+ attr(out, "method") <- "t-SNE" |
|
199 |
+ attr(out, "ncomp") <- ncomp |
|
200 |
+ attr(out, "perplexity") <- res$perplexity |
|
201 |
+ attr(out, "theta") <- res$theta |
|
202 |
+ attr(out, "costs") <- res$cost |
|
203 |
+ attr(out, "itercosts") <- res$itercost |
|
204 |
+ attr(out, "stop_lying_iter") <- res$stop_lying_iter |
|
205 |
+ attr(out, "mom_switch_iter") <- res$mom_switch_iter |
|
206 |
+ attr(out, "momentum") <- res$momentum |
|
207 |
+ attr(out, "final_momentum") <- res$final_momentum |
|
208 |
+ attr(out, "eta") <- res$eta |
|
209 |
+ attr(out, "exaggeration_factor") <- res$exaggeration_factor |
|
210 |
+ attr(out, "normalized") <- normalize |
|
211 |
+ class(out) <- c("reduced.tsne", class(out)) |
|
212 |
+ out |
|
213 |
+} |
|
214 |
+ |
|
215 |
+##' Partial least squares-discriminant analysis (PLS-DA) |
|
216 |
+##' |
|
217 |
+##' Performs PLS-DA on a matrix-like object where rows represent features and |
|
218 |
+##' columns represent samples. |
|
219 |
+##' |
|
220 |
+##' This function performs standard PLS for classification with the transpose of |
|
221 |
+##' `x` using the [pls::plsr]. Since PLS-DA is a supervised method, users must |
|
222 |
+##' supply the information about each sample's group. Here, `y` must be a factor |
|
223 |
+##' so that it can be internally converted to an indicator matrix. The function |
|
224 |
+##' returns a `reduced.plsda` object that is a matrix with custom attributes to |
|
225 |
+##' summarize (via [summary]) and visualize (via [plotReduced]) the PLS-DA |
|
226 |
+##' result. The custom attributes include the following: |
|
227 |
+##' |
|
228 |
+##' * `method`: The method used to reduce the dimension of data. |
|
229 |
+##' * `ncomp`: The number of components extracted. |
|
230 |
+##' * `explvar`: A vector indicating the amount of X variance explained by |
|
231 |
+##' each component. |
|
232 |
+##' * `responses`: A vector indicating the levels of factor `y`. |
|
233 |
+##' * `predictors`: A vector of predictor variables. |
|
234 |
+##' * `coefficient`: An array of regression coefficients. |
|
235 |
+##' * `loadings`: A matrix of loadings. |
|
236 |
+##' * `loadings.weights`: A matrix of loading weights. |
|
237 |
+##' * `Y.observed`: A vector of observed responses. |
|
238 |
+##' * `Y.predicted`: A vector of predicted responses. |
|
239 |
+##' * `Y.scores`: A matrix of Y-scores. |
|
240 |
+##' * `Y.loadings`: A matrix of Y-loadings. |
|
241 |
+##' * `projection`: The projection matrix. |
|
242 |
+##' * `fitted.values`: An array of fitted values. |
|
243 |
+##' * `residuals`: An array of regression residuals. |
|
244 |
+##' * `centered`: A logical indicating whether the data was mean-centered prior |
|
245 |
+##' to PLS-DA. |
|
246 |
+##' * `scaled`: A logical indicating whether the data was scaled prior to |
|
247 |
+##' PLS-DA. |
|
248 |
+##' |
|
249 |
+##' |
|
250 |
+##' @param x A matrix-like object. |
|
251 |
+##' @param y A factor vector for the information about each sample's group. |
|
252 |
+##' @param ncomp A integer specifying the number of components to extract. |
|
253 |
+##' @param center A logical specifying whether `x` and `y` matrices need to be |
|
254 |
+##' mean-centered prior to PLS-DA. |
|
255 |
+##' @param scale A logical specifying whether the unit variance scaling needs to |
|
256 |
+##' be performed on `x` prior to PLS-DA. |
|
257 |
+##' @param ... Additional arguments passed to [pls::plsr]. |
|
258 |
+##' @return A reduced.plsda object with the same number of rows as |
|
259 |
+##' \code{ncol(x)} containing the dimension reduction result. |
|
260 |
+##' |
|
261 |
+##' @references |
|
262 |
+##' |
|
263 |
+##' Kristian Hovde Liland, Bjørn-Helge Mevik and Ron Wehrens (2021). pls: |
|
264 |
+##' Partial Least Squares and Principal Component Regression. R package version |
|
265 |
+##' 2.8-0. https://CRAN.R-project.org/package=pls |
|
266 |
+##' |
|
267 |
+##' @seealso See [reduceFeatures] that provides a |
|
268 |
+##' \linkS4class{SummarizedExperiment}-friendly wrapper for this function. |
|
269 |
+##' |
|
270 |
+##' See [plotReduced] for visualization. |
|
271 |
+##' |
|
272 |
+##' See [pls::plsr] for the underlying function that does the work. |
|
273 |
+##' |
|
274 |
+##' @examples |
|
275 |
+##' |
|
276 |
+##' m <- assay(faahko_se, "knn_vsn") |
|
277 |
+##' y <- factor(colData(faahko_se)$sample_group) |
|
278 |
+##' res <- reducePLSDA(m, y = y) |
|
279 |
+##' summary(res) |
|
280 |
+## |
|
281 |
+##' @export |
|
282 |
+reducePLSDA <- function(x, y, ncomp = 2, center = TRUE, scale = FALSE, |
|
283 |
+ ...) { |
|
284 |
+ .verify_package("pls") |
|
285 |
+ if (!is.matrix(x)) { |
|
286 |
+ x <- as.matrix(x) |
|
287 |
+ } |
|
288 |
+ if (!is.factor(y)) { |
|
289 |
+ stop("'y' must be a factor.") |
|
290 |
+ } |
|
291 |
+ if (any(!is.finite(y))) { |
|
292 |
+ stop("infinite or missing values in 'y'.") |
|
293 |
+ } |
|
294 |
+ if (any(!is.finite(x))) { |
|
295 |
+ stop("infinite or missing values in 'x'.") |
|
296 |
+ } |
|
297 |
+ xt <- t(x) |
|
298 |
+ y_levels <- levels(y) |
|
299 |
+ y_dummy <- model.matrix(~ y - 1) |
|
300 |
+ colnames(y_dummy) <- gsub("^y", "", colnames(y_dummy)) |
|
301 |
+ |
|
302 |
+ d <- data.frame(y = I(y_dummy), x = I(xt), row.names = colnames(x)) |
|
303 |
+ fit <- pls::plsr(y ~ x, data = d, ncomp = ncomp, |
|
304 |
+ center = center, scale = scale, ...) |
|
305 |
+ out <- pls::scores(fit) |
|
306 |
+ pred_vals <- predict(fit, ncomp = fit$ncomp) |
|
307 |
+ y_predicted <- colnames(pred_vals)[apply(pred_vals, 1, which.max)] |
|
308 |
+ |
|
309 |
+ attr(out, "method") <- paste0("PLS-DA (", fit$method, ")") |
|
310 |
+ attr(out, "responses") <- pls::respnames(fit) |
|
311 |
+ attr(out, "predictors") <- pls::prednames(fit) |
|
312 |
+ attr(out, "coefficients") <- fit$coefficients |
|
313 |
+ attr(out, "loadings") <- fit$loadings |
|
314 |
+ attr(out, "loadings.weights") <- fit$loadings.weights |
|
315 |
+ attr(out, "Y.observed") <- y |
|
316 |
+ attr(out, "Y.predicted") <- y_predicted |
|
317 |
+ attr(out, "Y.scores") <- fit$Yscores |
|
318 |
+ attr(out, "Y.loadings") <- fit$Yloadings |
|
319 |
+ attr(out, "projection") <- fit$projection |
|
320 |
+ attr(out, "fitted.values") <- fitted(fit) |
|
321 |
+ attr(out, "residuals") <- residuals(fit) |
|
322 |
+ attr(out, "ncomp") <- fit$ncomp |
|
323 |
+ attr(out, "centered") <- center |
|
324 |
+ attr(out, "scaled") <- scale |
|
325 |
+ attr(out, "validation") <- fit$validation |
|
326 |
+ class(out) <- c("reduced.plsda", "matrix", class(out)) |
|
327 |
+ out |
|
328 |
+} |