... | ... |
@@ -231,6 +231,12 @@ setGeneric( |
231 | 231 |
function(x, ...) standardGeneric("poplin_impute_mean") |
232 | 232 |
) |
233 | 233 |
|
234 |
+##' @export |
|
235 |
+setGeneric( |
|
236 |
+ "poplin_impute_simple", |
|
237 |
+ function(x, ...) standardGeneric("poplin_impute_simple") |
|
238 |
+) |
|
239 |
+ |
|
234 | 240 |
##' @export |
235 | 241 |
setGeneric( |
236 | 242 |
"poplin_impute_pca", |
... | ... |
@@ -1,19 +1,64 @@ |
1 | 1 |
.poplin_impute <- function(x, |
2 |
- method = c("knn", "halfmin", "median", |
|
3 |
- "mean", "pca", "randomforest"), |
|
2 |
+ method = c("knn", "randomforest", "pca", "simple"), |
|
4 | 3 |
...) { |
5 | 4 |
method <- match.arg(method) |
6 | 5 |
switch( |
7 | 6 |
method, |
8 | 7 |
knn = .poplin_impute_knn(x, ...), |
9 |
- halfmin = .poplin_impute_halfmin(x, ...), |
|
10 |
- median = .poplin_impute_median(x, ...), |
|
11 |
- mean = .poplin_impute_mean(x, ...), |
|
12 |
- pca = .poplin_impute_pca(x, ...), |
|
13 | 8 |
randomforest = .poplin_impute_randomforest(x, ...), |
9 |
+ pca = .poplin_impute_pca(x, ...), |
|
10 |
+ ## halfmin = .poplin_impute_halfmin(x, ...), |
|
11 |
+ ## median = .poplin_impute_median(x, ...), |
|
12 |
+ ## mean = .poplin_impute_mean(x, ...), |
|
13 |
+ simple = .poplin_impute_simple(x, ...) |
|
14 | 14 |
) |
15 | 15 |
} |
16 | 16 |
|
17 |
+## Knn imputation |
|
18 |
+.poplin_impute_knn <- function(x, by = c("feature", "sample"), ...) { |
|
19 |
+ if (!requireNamespace("VIM", quietly = TRUE)) { |
|
20 |
+ stop("Package 'VIM' is required. Please install and try again.") |
|
21 |
+ } |
|
22 |
+ by <- match.arg(by) |
|
23 |
+ if (by == "feature") { |
|
24 |
+ out <- VIM::kNN(x, ...)[, 1:ncol(x)] |
|
25 |
+ ## VIM package internally converts x as data.table, which drops rownames |
|
26 |
+ rownames(out) <- rownames(x) |
|
27 |
+ } else { |
|
28 |
+ out <- t(VIM::kNN(t(x), ...))[1:nrow(x), ] |
|
29 |
+ colnames(out) <- colnames(x) |
|
30 |
+ } |
|
31 |
+ as.matrix(out) |
|
32 |
+} |
|
33 |
+ |
|
34 |
+## Random forest imputation |
|
35 |
+.poplin_impute_randomforest <- function(x, ...) { |
|
36 |
+ if (!requireNamespace("missForest", quietly = TRUE)) { |
|
37 |
+ stop("Package 'missForest' is required. Please install and try again.") |
|
38 |
+ } |
|
39 |
+ t(missForest::missForest(t(x), ...)$ximp) |
|
40 |
+} |
|
41 |
+ |
|
42 |
+## Bayesian PCA imputation |
|
43 |
+.poplin_impute_pca <- function(x, type = c("bpca", "ppca", "nipals", "svdImpute"), ...) { |
|
44 |
+ if (!requireNamespace("pcaMethods", quietly = TRUE)) { |
|
45 |
+ stop("Package 'pcaMethods' is required. Please install and try again.") |
|
46 |
+ } |
|
47 |
+ type <- match.arg(type) |
|
48 |
+ t(pcaMethods::pca(t(x), method = type, ...)@completeObs) |
|
49 |
+} |
|
50 |
+ |
|
51 |
+## Simple univariate imputation |
|
52 |
+.poplin_impute_simple <- function(x, type = c("halfmin", "median", "mean")) { |
|
53 |
+ type <- match.arg(type) |
|
54 |
+ switch( |
|
55 |
+ type, |
|
56 |
+ halfmin = .poplin_impute_halfmin(x), |
|
57 |
+ median = .poplin_impute_median(x), |
|
58 |
+ mean = .poplin_impute_mean(x) |
|
59 |
+ ) |
|
60 |
+} |
|
61 |
+ |
|
17 | 62 |
.poplin_impute_halfmin <- function(x) { |
18 | 63 |
out <- apply(x, 1, function(x) { |
19 | 64 |
if (anyNA(x)) { |
... | ... |
@@ -46,33 +91,3 @@ |
46 | 91 |
}) |
47 | 92 |
t(out) |
48 | 93 |
} |
49 |
- |
|
50 |
-.poplin_impute_knn <- function(x, by = c("feature", "sample"), ...) { |
|
51 |
- if (!requireNamespace("VIM", quietly = TRUE)) { |
|
52 |
- stop("Package 'VIM' is required. Please install and try again.") |
|
53 |
- } |
|
54 |
- by <- match.arg(by) |
|
55 |
- if (by == "feature") { |
|
56 |
- out <- VIM::kNN(x, ...)[, 1:ncol(x)] |
|
57 |
- ## VIM package internally converts x as data.table, which drops rownames |
|
58 |
- rownames(out) <- rownames(x) |
|
59 |
- } else { |
|
60 |
- out <- t(VIM::kNN(t(x), ...))[1:nrow(x), ] |
|
61 |
- colnames(out) <- colnames(x) |
|
62 |
- } |
|
63 |
- as.matrix(out) |
|
64 |
-} |
|
65 |
- |
|
66 |
-.poplin_impute_pca <- function(x, ...) { |
|
67 |
- if (!requireNamespace("pcaMethods", quietly = TRUE)) { |
|
68 |
- stop("Package 'pcaMethods' is required. Please install and try again.") |
|
69 |
- } |
|
70 |
- t(pcaMethods::pca(t(x), method = "bpca", ...)@completeObs) |
|
71 |
-} |
|
72 |
- |
|
73 |
-.poplin_impute_randomforest <- function(x, ...) { |
|
74 |
- if (!requireNamespace("missForest", quietly = TRUE)) { |
|
75 |
- stop("Package 'missForest' is required. Please install and try again.") |
|
76 |
- } |
|
77 |
- t(missForest::missForest(t(x), ...)$ximp) |
|
78 |
-} |
... | ... |
@@ -2,12 +2,34 @@ |
2 | 2 |
##' |
3 | 3 |
##' Missing values are frequently found in metabolomics data. The \pkg{poplin} |
4 | 4 |
##' package provides a few options to handle them. |
5 |
-##' |
|
5 |
+##' [poplin_impute] is a wrapper for the following set of functions: |
|
6 |
+##' \describe{ |
|
7 |
+##' \item{\code{\link{poplin_impute_knn}}:}{ |
|
8 |
+##' k-nearest neighbor (KNN) imputation |
|
9 |
+##' } |
|
10 |
+##' \item{\code{\link{poplin_impute_pca}}:}{ |
|
11 |
+##' principal component analysis (PCA) imputation |
|
12 |
+##' } |
|
13 |
+##' \item{\code{\link{poplin_impute_randomforest}}:}{ |
|
14 |
+##' random forest imputation |
|
15 |
+##' } |
|
16 |
+##' \item{\code{\link{poplin_impute_simple}}:}{ |
|
17 |
+##' simple univariate imputation (e.g., half-minimum, mean, median) |
|
18 |
+##' } |
|
19 |
+##' } |
|
20 |
+##' @param x A matrix or \linkS4class{poplin} object. |
|
21 |
+##' @param method A imputation method. Default is 'pqn'. |
|
22 |
+##' @param poplin_in Name of a data matrix to retrieve. |
|
23 |
+##' @param poplin_out Name of a data matrix to store. |
|
24 |
+##' @param ... Argument passed to a specific imputation method. |
|
25 |
+##' @return A matrix or \linkS4class{poplin} object of the same dimension as |
|
26 |
+##' \code{x} containing the imputed intensities. |
|
6 | 27 |
##' @name poplin_impute |
28 |
+##' @family imputation methods |
|
7 | 29 |
setMethod( |
8 | 30 |
"poplin_impute", |
9 | 31 |
"matrix", |
10 |
- function(x, method, ...) { |
|
32 |
+ function(x, method = c("knn", "pca", "randomforest", "simple"), ...) { |
|
11 | 33 |
.poplin_impute(x, method = method, ...) |
12 | 34 |
} |
13 | 35 |
) |
... | ... |
@@ -16,123 +38,233 @@ setMethod( |
16 | 38 |
setMethod( |
17 | 39 |
"poplin_impute", |
18 | 40 |
"poplin", |
19 |
- function(x, method, poplin_in, poplin_out, ...) { |
|
41 |
+ function(x, method = c("knn", "pca", "randomforest", "simple"), |
|
42 |
+ poplin_in, poplin_out, ...) { |
|
20 | 43 |
m <- .verify_and_extract_input(x, poplin_in) |
21 | 44 |
poplin_data(x, poplin_out) <- .poplin_impute(m, method = method, ...) |
22 | 45 |
x |
23 | 46 |
} |
24 | 47 |
) |
25 | 48 |
|
26 |
-##' @rdname poplin_impute |
|
49 |
+##' K-nearest neighbor (KNN) imputation |
|
50 |
+##' |
|
51 |
+##' Apply k-nearest neighbor (KNN) imputation to a matrix or |
|
52 |
+##' \linkS4class{poplin} object. This is an interface to the [VIM::kNN] from the |
|
53 |
+##' \pkg{VIM} package. Since it is based on Gower's distance, standardization of |
|
54 |
+##' input data prior to KNN imputation would not affect the result. |
|
55 |
+##' |
|
56 |
+##' @references |
|
57 |
+##' Alexander Kowarik, Matthias Templ (2016). Imputation with the R Package VIM. |
|
58 |
+##' Journal of Statistical Software, 74(7), 1-16. doi:10.18637/jss.v074.i07 |
|
59 |
+##' |
|
60 |
+##' Gower, J. C. (1971). A General Coefficient of Similarity and Some of Its |
|
61 |
+##' Properties. Biometrics, 27(4), 857–871. https://doi.org/10.2307/2528823 |
|
62 |
+##' |
|
63 |
+##' @param x A matrix or \linkS4class{poplin} object. |
|
64 |
+##' @param poplin_in Name of a data matrix to retrieve. |
|
65 |
+##' @param poplin_out Name of a data matrix to store. |
|
66 |
+##' @param by Imputation by k-nearest features or by k-nearest samples. |
|
67 |
+##' @param ... Additional argument passed to [VIM::kNN]. |
|
68 |
+##' @return A matrix or \linkS4class{poplin} object of the same dimension as |
|
69 |
+##' \code{x} containing the imputed intensities. |
|
70 |
+##' @name poplin_impute_knn |
|
71 |
+##' @family imputation methods |
|
27 | 72 |
setMethod( |
28 | 73 |
"poplin_impute_knn", |
29 | 74 |
"matrix", |
30 |
- function(x, ...) { |
|
31 |
- .poplin_impute_knn(x, ...) |
|
75 |
+ function(x, by = c("feature", "sample"), ...) { |
|
76 |
+ .poplin_impute_knn(x, by = by, ...) |
|
32 | 77 |
} |
33 | 78 |
) |
34 | 79 |
|
35 |
-##' @rdname poplin_impute |
|
80 |
+##' @rdname poplin_impute_knn |
|
36 | 81 |
setMethod( |
37 | 82 |
"poplin_impute_knn", |
38 | 83 |
"poplin", |
39 |
- function(x, poplin_in, poplin_out, ...) { |
|
84 |
+ function(x, poplin_in, poplin_out, by = c("feature", "sample"), ...) { |
|
40 | 85 |
.poplin_extract_and_assign(x, .poplin_impute_knn, |
41 |
- poplin_in, poplin_out, ...) |
|
86 |
+ poplin_in, poplin_out, |
|
87 |
+ by = by, ...) |
|
42 | 88 |
} |
43 | 89 |
) |
44 | 90 |
|
45 |
-##' @rdname poplin_impute |
|
91 |
+##' Random forest imputation |
|
92 |
+##' |
|
93 |
+##' Apply random forest imputation to a matrix or \linkS4class{poplin} object. |
|
94 |
+##' This is an interface to the [missForest::missForest] from the |
|
95 |
+##' \pkg{missForest} package. Since random forest is a tree-based method, it can |
|
96 |
+##' be performed with raw intensities - invariant to monotonic transformations |
|
97 |
+##' (However, statistical analysis could be affected because, for example, |
|
98 |
+##' log(mean(predicted values) != mean(log(predicted values))). |
|
99 |
+##' |
|
100 |
+##' @references |
|
101 |
+##' |
|
102 |
+##' Daniel J. Stekhoven (2013). missForest: Nonparametric Missing Value |
|
103 |
+##' Imputation using Random Forest. R package version 1.4. |
|
104 |
+##' |
|
105 |
+##' Stekhoven D. J., & Buehlmann, P. (2012). MissForest - non-parametric missing |
|
106 |
+##' value imputation for mixed-type data. Bioinformatics, 28(1), 112-118. |
|
107 |
+##' |
|
108 |
+##' @param x A matrix or \linkS4class{poplin} object. |
|
109 |
+##' @param poplin_in Name of a data matrix to retrieve. |
|
110 |
+##' @param poplin_out Name of a data matrix to store. |
|
111 |
+##' @param ... Additional argument passed to [missForest::missForest]. |
|
112 |
+##' @return A matrix or \linkS4class{poplin} object of the same dimension as |
|
113 |
+##' \code{x} containing the imputed intensities. |
|
114 |
+##' @name poplin_impute_randomforest |
|
115 |
+##' @family imputation methods |
|
46 | 116 |
setMethod( |
47 |
- "poplin_impute_halfmin", |
|
117 |
+ "poplin_impute_randomforest", |
|
48 | 118 |
"matrix", |
49 | 119 |
function(x, ...) { |
50 |
- .poplin_impute_halfmin(x, ...) |
|
120 |
+ .poplin_impute_randomforest(x, ...) |
|
51 | 121 |
} |
52 | 122 |
) |
53 | 123 |
|
54 |
-##' @rdname poplin_impute |
|
124 |
+##' @rdname poplin_impute_randomforest |
|
55 | 125 |
setMethod( |
56 |
- "poplin_impute_halfmin", |
|
126 |
+ "poplin_impute_randomforest", |
|
57 | 127 |
"poplin", |
58 | 128 |
function(x, poplin_in, poplin_out, ...) { |
59 |
- .poplin_extract_and_assign(x, .poplin_impute_halfmin, |
|
129 |
+ .poplin_extract_and_assign(x, .poplin_impute_randomforest, |
|
60 | 130 |
poplin_in, poplin_out, ...) |
61 | 131 |
} |
62 | 132 |
) |
63 | 133 |
|
64 |
-##' @rdname poplin_impute |
|
134 |
+##' Principal component analysis (PCA) imputation |
|
135 |
+##' |
|
136 |
+##' Apply PCA imputation to a matrix or \linkS4class{poplin} object. This is a |
|
137 |
+##' interface to the [pcaMethods::pca] from the \pkg{pcaMethods} package. Here, |
|
138 |
+##' features are interpreted as variables and samples as observations. |
|
139 |
+##' Pre-processing of input (centering, scaling) may be necessary. See the |
|
140 |
+##' documentation of [pcaMethods:pca] and [pcaMethods:prep]. Note that the PCA |
|
141 |
+##' imputation could yield negative feature values that need to be |
|
142 |
+##' post-processed. |
|
143 |
+##' |
|
144 |
+##' @references |
|
145 |
+##' Stacklies, W., Redestig, H., Scholz, M., Walther, D. and Selbig, J. |
|
146 |
+##' pcaMethods -- a Bioconductor package providing PCA methods for incomplete |
|
147 |
+##' data. Bioinformatics, 2007, 23, 1164-1167 |
|
148 |
+##' |
|
149 |
+##' @param x A matrix or \linkS4class{poplin} object. |
|
150 |
+##' @param poplin_in Name of a data matrix to retrieve. |
|
151 |
+##' @param poplin_out Name of a data matrix to store. |
|
152 |
+##' @param type A method for performing PCA. |
|
153 |
+##' @param ... Additional argument passed to [pcaMethods::pca]. |
|
154 |
+##' @return A matrix or \linkS4class{poplin} object of the same dimension as |
|
155 |
+##' \code{x} containing the imputed intensities. |
|
156 |
+##' @name poplin_impute_pca |
|
157 |
+##' @family imputation methods |
|
65 | 158 |
setMethod( |
66 |
- "poplin_impute_median", |
|
159 |
+ "poplin_impute_pca", |
|
67 | 160 |
"matrix", |
68 |
- function(x, ...) { |
|
69 |
- .poplin_impute_median(x, ...) |
|
161 |
+ function(x, type = c("bpca", "ppca", "nipals", "svdImpute"), ...) { |
|
162 |
+ .poplin_impute_pca(x, type = type, ...) |
|
70 | 163 |
} |
71 | 164 |
) |
72 | 165 |
|
73 |
-##' @rdname poplin_impute |
|
166 |
+##' @rdname poplin_impute_pca |
|
74 | 167 |
setMethod( |
75 |
- "poplin_impute_median", |
|
168 |
+ "poplin_impute_pca", |
|
76 | 169 |
"poplin", |
77 |
- function(x, poplin_in, poplin_out, ...) { |
|
78 |
- .poplin_extract_and_assign(x, .poplin_impute_median, |
|
79 |
- poplin_in, poplin_out, ...) |
|
170 |
+ function(x, poplin_in, poplin_out, |
|
171 |
+ type = c("bpca", "ppca", "nipals", "svdImpute"), ...) { |
|
172 |
+ .poplin_extract_and_assign(x, .poplin_impute_pca, |
|
173 |
+ poplin_in, poplin_out, |
|
174 |
+ type = type, ...) |
|
80 | 175 |
} |
81 | 176 |
) |
82 | 177 |
|
83 |
-##' @rdname poplin_impute |
|
178 |
+##' Simple univariate imputation |
|
179 |
+##' |
|
180 |
+##' Apply univariate imputation to a matrix or \linkS4class{poplin} object. The |
|
181 |
+##' supported methods include |
|
182 |
+##' \itemize{ |
|
183 |
+##' \item Half-minimum imputation: for each feature, missing values are replaced |
|
184 |
+##' with half the observed minimum. |
|
185 |
+##' \item Median imputation: for each feature, missing values are replaced with |
|
186 |
+##' the median of non-missing values. |
|
187 |
+##' \item Mean imputation: for each feature, missing values are replaced with |
|
188 |
+##' the mean of non-missing values. |
|
189 |
+##' } |
|
190 |
+##' @references |
|
191 |
+##' Wei, R., Wang, J., Su, M. et al. Missing Value Imputation Approach for Mass |
|
192 |
+##' Spectrometry-based Metabolomics Data. Sci Rep 8, 663 (2018). |
|
193 |
+##' https://doi.org/10.1038/s41598-017-19120-0 |
|
194 |
+##' |
|
195 |
+##' @param x A matrix or \linkS4class{poplin} object. |
|
196 |
+##' @param poplin_in Name of a data matrix to retrieve. |
|
197 |
+##' @param poplin_out Name of a data matrix to store. |
|
198 |
+##' @param type A method for doing univariate imputation. |
|
199 |
+##' @return A matrix or \linkS4class{poplin} object of the same dimension as |
|
200 |
+##' \code{x} containing the imputed intensities. |
|
201 |
+##' @name poplin_impute_simple |
|
202 |
+##' @family imputation methods |
|
84 | 203 |
setMethod( |
85 |
- "poplin_impute_mean", |
|
204 |
+ "poplin_impute_simple", |
|
86 | 205 |
"matrix", |
87 |
- function(x, ...) { |
|
88 |
- .poplin_impute_mean(x, ...) |
|
206 |
+ function(x, type = c("halfmin", "median", "mean")) { |
|
207 |
+ .poplin_impute_simple(x, type = type) |
|
89 | 208 |
} |
90 | 209 |
) |
91 | 210 |
|
92 |
-##' @rdname poplin_impute |
|
211 |
+##' @rdname poplin_impute_simple |
|
93 | 212 |
setMethod( |
94 |
- "poplin_impute_mean", |
|
213 |
+ "poplin_impute_simple", |
|
95 | 214 |
"poplin", |
96 |
- function(x, poplin_in, poplin_out, ...) { |
|
97 |
- .poplin_extract_and_assign(x, .poplin_impute_mean, |
|
98 |
- poplin_in, poplin_out, ...) |
|
215 |
+ function(x, poplin_in, poplin_out, type = c("halfmin", "median", "mean")) { |
|
216 |
+ .poplin_extract_and_assign(x, .poplin_impute_halfmin, |
|
217 |
+ poplin_in, poplin_out, type = type) |
|
99 | 218 |
} |
100 | 219 |
) |
101 | 220 |
|
102 |
-##' @rdname poplin_impute |
|
103 | 221 |
setMethod( |
104 |
- "poplin_impute_pca", |
|
222 |
+ "poplin_impute_halfmin", |
|
223 |
+ "matrix", |
|
224 |
+ function(x) { |
|
225 |
+ .poplin_impute_halfmin(x) |
|
226 |
+ } |
|
227 |
+) |
|
228 |
+ |
|
229 |
+setMethod( |
|
230 |
+ "poplin_impute_halfmin", |
|
231 |
+ "poplin", |
|
232 |
+ function(x, poplin_in, poplin_out) { |
|
233 |
+ .poplin_extract_and_assign(x, .poplin_impute_halfmin, |
|
234 |
+ poplin_in, poplin_out) |
|
235 |
+ } |
|
236 |
+) |
|
237 |
+ |
|
238 |
+setMethod( |
|
239 |
+ "poplin_impute_median", |
|
105 | 240 |
"matrix", |
106 | 241 |
function(x, ...) { |
107 |
- .poplin_impute_pca(x, ...) |
|
242 |
+ .poplin_impute_median(x, ...) |
|
108 | 243 |
} |
109 | 244 |
) |
110 | 245 |
|
111 |
-##' @rdname poplin_impute |
|
112 | 246 |
setMethod( |
113 |
- "poplin_impute_pca", |
|
247 |
+ "poplin_impute_median", |
|
114 | 248 |
"poplin", |
115 | 249 |
function(x, poplin_in, poplin_out, ...) { |
116 |
- .poplin_extract_and_assign(x, .poplin_impute_pca, |
|
250 |
+ .poplin_extract_and_assign(x, .poplin_impute_median, |
|
117 | 251 |
poplin_in, poplin_out, ...) |
118 | 252 |
} |
119 | 253 |
) |
120 | 254 |
|
121 |
-##' @rdname poplin_impute |
|
122 | 255 |
setMethod( |
123 |
- "poplin_impute_randomforest", |
|
256 |
+ "poplin_impute_mean", |
|
124 | 257 |
"matrix", |
125 | 258 |
function(x, ...) { |
126 |
- .poplin_impute_randomforest(x, ...) |
|
259 |
+ .poplin_impute_mean(x, ...) |
|
127 | 260 |
} |
128 | 261 |
) |
129 | 262 |
|
130 |
-##' @rdname poplin_impute |
|
131 | 263 |
setMethod( |
132 |
- "poplin_impute_randomforest", |
|
264 |
+ "poplin_impute_mean", |
|
133 | 265 |
"poplin", |
134 | 266 |
function(x, poplin_in, poplin_out, ...) { |
135 |
- .poplin_extract_and_assign(x, .poplin_impute_randomforest, |
|
267 |
+ .poplin_extract_and_assign(x, .poplin_impute_mean, |
|
136 | 268 |
poplin_in, poplin_out, ...) |
137 | 269 |
} |
138 | 270 |
) |
... | ... |
@@ -63,13 +63,13 @@ setMethod( |
63 | 63 |
|
64 | 64 |
##' Probabilistic quotient normalization (PQN) |
65 | 65 |
##' |
66 |
-##' Apply probabilistic quotient normalization to a matrix or |
|
66 |
+##' Apply probabilistic quotient normalization (PQN) to a matrix or |
|
67 | 67 |
##' \linkS4class{poplin} object. For the calculation of quotients, a reference |
68 | 68 |
##' spectrum needs to be obtained from a mean or median spectrum based on all |
69 | 69 |
##' spectra of the study or a subset of the study. Feature intensities are |
70 | 70 |
##' normalized by the median of quotients. See Dieterle et al. (2006) for |
71 | 71 |
##' details. |
72 |
-##' |
|
72 |
+##' |
|
73 | 73 |
##' @param x A matrix or \linkS4class{poplin} object. |
74 | 74 |
##' @param poplin_in Name of a data matrix to retrieve. |
75 | 75 |
##' @param poplin_out Name of a data matrix to store. |
... | ... |
@@ -116,7 +116,7 @@ setMethod( |
116 | 116 |
##' Apply sum normalization to a matrix or \linkS4class{poplin} object. For each |
117 | 117 |
##' sample, feature intensities are divided by its Total Ion Current (TIC), |
118 | 118 |
##' i.e., every feature is divided by the sum of all intensity values. |
119 |
-##' |
|
119 |
+##' |
|
120 | 120 |
##' @param x A matrix or \linkS4class{poplin} object. |
121 | 121 |
##' @param poplin_in Name of a data matrix to retrieve. |
122 | 122 |
##' @param poplin_out Name of a data matrix to store. |
... | ... |
@@ -153,7 +153,7 @@ setMethod( |
153 | 153 |
##' Apply mean normalization to a matrix or \linkS4class{poplin} object. For |
154 | 154 |
##' each sample, feature intensities are divided by its mean. The mean of |
155 | 155 |
##' intensity values for individual samples will be one as a result. |
156 |
-##' |
|
156 |
+##' |
|
157 | 157 |
##' @param x A matrix or \linkS4class{poplin} object. |
158 | 158 |
##' @param poplin_in Name of a data matrix to retrieve. |
159 | 159 |
##' @param poplin_out Name of a data matrix to store. |
... | ... |
@@ -190,7 +190,7 @@ setMethod( |
190 | 190 |
##' Apply median normalization to a matrix or \linkS4class{poplin} object. For |
191 | 191 |
##' each sample, feature intensities are divided by its median. The median of |
192 | 192 |
##' intensity values for individual samples will be one as a result. |
193 |
-##' |
|
193 |
+##' |
|
194 | 194 |
##' @param x A matrix or \linkS4class{poplin} object. |
195 | 195 |
##' @param poplin_in Name of a data matrix to retrieve. |
196 | 196 |
##' @param poplin_out Name of a data matrix to store. |
... | ... |
@@ -228,7 +228,7 @@ setMethod( |
228 | 228 |
##' \linkS4class{poplin} object. For each sample, feature intensities are scaled |
229 | 229 |
##' by its MAD. The MAD of intensity values for individual samples will be one |
230 | 230 |
##' as a result. |
231 |
-##' |
|
231 |
+##' |
|
232 | 232 |
##' @param x A matrix or \linkS4class{poplin} object. |
233 | 233 |
##' @param poplin_in Name of a data matrix to retrieve. |
234 | 234 |
##' @param poplin_out Name of a data matrix to store. |
... | ... |
@@ -347,7 +347,7 @@ setMethod( |
347 | 347 |
##' from the \pkg{vsn} package (see [vsn::vsn2] for help). The vsn produces |
348 | 348 |
##' normalized intensities based on a glog (generalized logarithm) scale to base |
349 | 349 |
##' 2. See Huber et al. (2002) for details. |
350 |
-##' |
|
350 |
+##' |
|
351 | 351 |
##' @references |
352 | 352 |
##' Huber W, von Heydebreck A, Sültmann H, Poustka A, Vingron M. Variance |
353 | 353 |
##' stabilization applied to microarray data calibration and to the |
... | ... |
@@ -384,7 +384,7 @@ setMethod( |
384 | 384 |
) |
385 | 385 |
|
386 | 386 |
##' Feature-based scaling |
387 |
-##' |
|
387 |
+##' |
|
388 | 388 |
##' Apply feature-based scaling to a matrix or \linkS4class{poplin} object. The |
389 | 389 |
##' supported methods include |
390 | 390 |
##' \itemize{ |
... | ... |
@@ -3,49 +3,57 @@ |
3 | 3 |
\name{poplin_impute} |
4 | 4 |
\alias{poplin_impute} |
5 | 5 |
\alias{poplin_impute,poplin-method} |
6 |
-\alias{poplin_impute_knn,matrix-method} |
|
7 |
-\alias{poplin_impute_knn,poplin-method} |
|
8 |
-\alias{poplin_impute_halfmin,matrix-method} |
|
9 |
-\alias{poplin_impute_halfmin,poplin-method} |
|
10 |
-\alias{poplin_impute_median,matrix-method} |
|
11 |
-\alias{poplin_impute_median,poplin-method} |
|
12 |
-\alias{poplin_impute_mean,matrix-method} |
|
13 |
-\alias{poplin_impute_mean,poplin-method} |
|
14 |
-\alias{poplin_impute_pca,matrix-method} |
|
15 |
-\alias{poplin_impute_pca,poplin-method} |
|
16 |
-\alias{poplin_impute_randomforest,matrix-method} |
|
17 |
-\alias{poplin_impute_randomforest,poplin-method} |
|
18 | 6 |
\title{Imputation methods} |
19 | 7 |
\usage{ |
20 |
-\S4method{poplin_impute}{matrix}(x, method, ...) |
|
21 |
- |
|
22 |
-\S4method{poplin_impute}{poplin}(x, method, poplin_in, poplin_out, ...) |
|
23 |
- |
|
24 |
-\S4method{poplin_impute_knn}{matrix}(x, ...) |
|
25 |
- |
|
26 |
-\S4method{poplin_impute_knn}{poplin}(x, poplin_in, poplin_out, ...) |
|
27 |
- |
|
28 |
-\S4method{poplin_impute_halfmin}{matrix}(x, ...) |
|
29 |
- |
|
30 |
-\S4method{poplin_impute_halfmin}{poplin}(x, poplin_in, poplin_out, ...) |
|
31 |
- |
|
32 |
-\S4method{poplin_impute_median}{matrix}(x, ...) |
|
33 |
- |
|
34 |
-\S4method{poplin_impute_median}{poplin}(x, poplin_in, poplin_out, ...) |
|
35 |
- |
|
36 |
-\S4method{poplin_impute_mean}{matrix}(x, ...) |
|
37 |
- |
|
38 |
-\S4method{poplin_impute_mean}{poplin}(x, poplin_in, poplin_out, ...) |
|
8 |
+\S4method{poplin_impute}{matrix}(x, method = c("knn", "pca", "randomforest", "simple"), ...) |
|
9 |
+ |
|
10 |
+\S4method{poplin_impute}{poplin}( |
|
11 |
+ x, |
|
12 |
+ method = c("knn", "pca", "randomforest", "simple"), |
|
13 |
+ poplin_in, |
|
14 |
+ poplin_out, |
|
15 |
+ ... |
|
16 |
+) |
|
17 |
+} |
|
18 |
+\arguments{ |
|
19 |
+\item{x}{A matrix or \linkS4class{poplin} object.} |
|
39 | 20 |
|
40 |
-\S4method{poplin_impute_pca}{matrix}(x, ...) |
|
21 |
+\item{method}{A imputation method. Default is 'pqn'.} |
|
41 | 22 |
|
42 |
-\S4method{poplin_impute_pca}{poplin}(x, poplin_in, poplin_out, ...) |
|
23 |
+\item{...}{Argument passed to a specific imputation method.} |
|
43 | 24 |
|
44 |
-\S4method{poplin_impute_randomforest}{matrix}(x, ...) |
|
25 |
+\item{poplin_in}{Name of a data matrix to retrieve.} |
|
45 | 26 |
|
46 |
-\S4method{poplin_impute_randomforest}{poplin}(x, poplin_in, poplin_out, ...) |
|
27 |
+\item{poplin_out}{Name of a data matrix to store.} |
|
28 |
+} |
|
29 |
+\value{ |
|
30 |
+A matrix or \linkS4class{poplin} object of the same dimension as |
|
31 |
+\code{x} containing the imputed intensities. |
|
47 | 32 |
} |
48 | 33 |
\description{ |
49 | 34 |
Missing values are frequently found in metabolomics data. The \pkg{poplin} |
50 | 35 |
package provides a few options to handle them. |
36 |
+\link{poplin_impute} is a wrapper for the following set of functions: |
|
37 |
+\describe{ |
|
38 |
+\item{\code{\link{poplin_impute_knn}}:}{ |
|
39 |
+k-nearest neighbor (KNN) imputation |
|
40 |
+} |
|
41 |
+\item{\code{\link{poplin_impute_pca}}:}{ |
|
42 |
+principal component analysis (PCA) imputation |
|
43 |
+} |
|
44 |
+\item{\code{\link{poplin_impute_randomforest}}:}{ |
|
45 |
+random forest imputation |
|
46 |
+} |
|
47 |
+\item{\code{\link{poplin_impute_simple}}:}{ |
|
48 |
+simple univariate imputation (e.g., half-minimum, mean, median) |
|
49 |
+} |
|
50 |
+} |
|
51 |
+} |
|
52 |
+\seealso{ |
|
53 |
+Other imputation methods: |
|
54 |
+\code{\link{poplin_impute_knn}()}, |
|
55 |
+\code{\link{poplin_impute_pca}()}, |
|
56 |
+\code{\link{poplin_impute_randomforest}()}, |
|
57 |
+\code{\link{poplin_impute_simple}()} |
|
51 | 58 |
} |
59 |
+\concept{imputation methods} |
52 | 60 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,47 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/imputation-methods.R |
|
3 |
+\name{poplin_impute_knn} |
|
4 |
+\alias{poplin_impute_knn} |
|
5 |
+\alias{poplin_impute_knn,poplin-method} |
|
6 |
+\title{K-nearest neighbor (KNN) imputation} |
|
7 |
+\usage{ |
|
8 |
+\S4method{poplin_impute_knn}{matrix}(x, by = c("feature", "sample"), ...) |
|
9 |
+ |
|
10 |
+\S4method{poplin_impute_knn}{poplin}(x, poplin_in, poplin_out, by = c("feature", "sample"), ...) |
|
11 |
+} |
|
12 |
+\arguments{ |
|
13 |
+\item{x}{A matrix or \linkS4class{poplin} object.} |
|
14 |
+ |
|
15 |
+\item{by}{Imputation by k-nearest features or by k-nearest samples.} |
|
16 |
+ |
|
17 |
+\item{...}{Additional argument passed to \link[VIM:kNN]{VIM::kNN}.} |
|
18 |
+ |
|
19 |
+\item{poplin_in}{Name of a data matrix to retrieve.} |
|
20 |
+ |
|
21 |
+\item{poplin_out}{Name of a data matrix to store.} |
|
22 |
+} |
|
23 |
+\value{ |
|
24 |
+A matrix or \linkS4class{poplin} object of the same dimension as |
|
25 |
+\code{x} containing the imputed intensities. |
|
26 |
+} |
|
27 |
+\description{ |
|
28 |
+Apply k-nearest neighbor (KNN) imputation to a matrix or |
|
29 |
+\linkS4class{poplin} object. This is an interface to the \link[VIM:kNN]{VIM::kNN} from the |
|
30 |
+\pkg{VIM} package. Since it is based on Gower's distance, standardization of |
|
31 |
+input data prior to KNN imputation would not affect the result. |
|
32 |
+} |
|
33 |
+\references{ |
|
34 |
+Alexander Kowarik, Matthias Templ (2016). Imputation with the R Package VIM. |
|
35 |
+Journal of Statistical Software, 74(7), 1-16. doi:10.18637/jss.v074.i07 |
|
36 |
+ |
|
37 |
+Gower, J. C. (1971). A General Coefficient of Similarity and Some of Its |
|
38 |
+Properties. Biometrics, 27(4), 857–871. https://doi.org/10.2307/2528823 |
|
39 |
+} |
|
40 |
+\seealso{ |
|
41 |
+Other imputation methods: |
|
42 |
+\code{\link{poplin_impute_pca}()}, |
|
43 |
+\code{\link{poplin_impute_randomforest}()}, |
|
44 |
+\code{\link{poplin_impute_simple}()}, |
|
45 |
+\code{\link{poplin_impute}()} |
|
46 |
+} |
|
47 |
+\concept{imputation methods} |
0 | 48 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,54 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/imputation-methods.R |
|
3 |
+\name{poplin_impute_pca} |
|
4 |
+\alias{poplin_impute_pca} |
|
5 |
+\alias{poplin_impute_pca,poplin-method} |
|
6 |
+\title{Principal component analysis (PCA) imputation} |
|
7 |
+\usage{ |
|
8 |
+\S4method{poplin_impute_pca}{matrix}(x, type = c("bpca", "ppca", "nipals", "svdImpute"), ...) |
|
9 |
+ |
|
10 |
+\S4method{poplin_impute_pca}{poplin}( |
|
11 |
+ x, |
|
12 |
+ poplin_in, |
|
13 |
+ poplin_out, |
|
14 |
+ type = c("bpca", "ppca", "nipals", "svdImpute"), |
|
15 |
+ ... |
|
16 |
+) |
|
17 |
+} |
|
18 |
+\arguments{ |
|
19 |
+\item{x}{A matrix or \linkS4class{poplin} object.} |
|
20 |
+ |
|
21 |
+\item{type}{A method for performing PCA.} |
|
22 |
+ |
|
23 |
+\item{...}{Additional argument passed to \link[pcaMethods:pca]{pcaMethods::pca}.} |
|
24 |
+ |
|
25 |
+\item{poplin_in}{Name of a data matrix to retrieve.} |
|
26 |
+ |
|
27 |
+\item{poplin_out}{Name of a data matrix to store.} |
|
28 |
+} |
|
29 |
+\value{ |
|
30 |
+A matrix or \linkS4class{poplin} object of the same dimension as |
|
31 |
+\code{x} containing the imputed intensities. |
|
32 |
+} |
|
33 |
+\description{ |
|
34 |
+Apply PCA imputation to a matrix or \linkS4class{poplin} object. This is a |
|
35 |
+interface to the \link[pcaMethods:pca]{pcaMethods::pca} from the \pkg{pcaMethods} package. Here, |
|
36 |
+features are interpreted as variables and samples as observations. |
|
37 |
+Pre-processing of input (centering, scaling) may be necessary. See the |
|
38 |
+documentation of \link{pcaMethods:pca} and \link{pcaMethods:prep}. Note that the PCA |
|
39 |
+imputation could yield negative feature values that need to be |
|
40 |
+post-processed. |
|
41 |
+} |
|
42 |
+\references{ |
|
43 |
+Stacklies, W., Redestig, H., Scholz, M., Walther, D. and Selbig, J. |
|
44 |
+pcaMethods -- a Bioconductor package providing PCA methods for incomplete |
|
45 |
+data. Bioinformatics, 2007, 23, 1164-1167 |
|
46 |
+} |
|
47 |
+\seealso{ |
|
48 |
+Other imputation methods: |
|
49 |
+\code{\link{poplin_impute_knn}()}, |
|
50 |
+\code{\link{poplin_impute_randomforest}()}, |
|
51 |
+\code{\link{poplin_impute_simple}()}, |
|
52 |
+\code{\link{poplin_impute}()} |
|
53 |
+} |
|
54 |
+\concept{imputation methods} |
0 | 55 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,47 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/imputation-methods.R |
|
3 |
+\name{poplin_impute_randomforest} |
|
4 |
+\alias{poplin_impute_randomforest} |
|
5 |
+\alias{poplin_impute_randomforest,poplin-method} |
|
6 |
+\title{Random forest imputation} |
|
7 |
+\usage{ |
|
8 |
+\S4method{poplin_impute_randomforest}{matrix}(x, ...) |
|
9 |
+ |
|
10 |
+\S4method{poplin_impute_randomforest}{poplin}(x, poplin_in, poplin_out, ...) |
|
11 |
+} |
|
12 |
+\arguments{ |
|
13 |
+\item{x}{A matrix or \linkS4class{poplin} object.} |
|
14 |
+ |
|
15 |
+\item{...}{Additional argument passed to \link[missForest:missForest]{missForest::missForest}.} |
|
16 |
+ |
|
17 |
+\item{poplin_in}{Name of a data matrix to retrieve.} |
|
18 |
+ |
|
19 |
+\item{poplin_out}{Name of a data matrix to store.} |
|
20 |
+} |
|
21 |
+\value{ |
|
22 |
+A matrix or \linkS4class{poplin} object of the same dimension as |
|
23 |
+\code{x} containing the imputed intensities. |
|
24 |
+} |
|
25 |
+\description{ |
|
26 |
+Apply random forest imputation to a matrix or \linkS4class{poplin} object. |
|
27 |
+This is an interface to the \link[missForest:missForest]{missForest::missForest} from the |
|
28 |
+\pkg{missForest} package. Since random forest is a tree-based method, it can |
|
29 |
+be performed with raw intensities - invariant to monotonic transformations |
|
30 |
+(However, statistical analysis could be affected because, for example, |
|
31 |
+log(mean(predicted values) != mean(log(predicted values))). |
|
32 |
+} |
|
33 |
+\references{ |
|
34 |
+Daniel J. Stekhoven (2013). missForest: Nonparametric Missing Value |
|
35 |
+Imputation using Random Forest. R package version 1.4. |
|
36 |
+ |
|
37 |
+Stekhoven D. J., & Buehlmann, P. (2012). MissForest - non-parametric missing |
|
38 |
+value imputation for mixed-type data. Bioinformatics, 28(1), 112-118. |
|
39 |
+} |
|
40 |
+\seealso{ |
|
41 |
+Other imputation methods: |
|
42 |
+\code{\link{poplin_impute_knn}()}, |
|
43 |
+\code{\link{poplin_impute_pca}()}, |
|
44 |
+\code{\link{poplin_impute_simple}()}, |
|
45 |
+\code{\link{poplin_impute}()} |
|
46 |
+} |
|
47 |
+\concept{imputation methods} |
0 | 48 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,54 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/imputation-methods.R |
|
3 |
+\name{poplin_impute_simple} |
|
4 |
+\alias{poplin_impute_simple} |
|
5 |
+\alias{poplin_impute_simple,poplin-method} |
|
6 |
+\title{Simple univariate imputation} |
|
7 |
+\usage{ |
|
8 |
+\S4method{poplin_impute_simple}{matrix}(x, type = c("halfmin", "median", "mean")) |
|
9 |
+ |
|
10 |
+\S4method{poplin_impute_simple}{poplin}( |
|
11 |
+ x, |
|
12 |
+ poplin_in, |
|
13 |
+ poplin_out, |
|
14 |
+ type = c("halfmin", "median", "mean") |
|
15 |
+) |
|
16 |
+} |
|
17 |
+\arguments{ |
|
18 |
+\item{x}{A matrix or \linkS4class{poplin} object.} |
|
19 |
+ |
|
20 |
+\item{type}{A method for doing univariate imputation.} |
|
21 |
+ |
|
22 |
+\item{poplin_in}{Name of a data matrix to retrieve.} |
|
23 |
+ |
|
24 |
+\item{poplin_out}{Name of a data matrix to store.} |
|
25 |
+} |
|
26 |
+\value{ |
|
27 |
+A matrix or \linkS4class{poplin} object of the same dimension as |
|
28 |
+\code{x} containing the imputed intensities. |
|
29 |
+} |
|
30 |
+\description{ |
|
31 |
+Apply univariate imputation to a matrix or \linkS4class{poplin} object. The |
|
32 |
+supported methods include |
|
33 |
+\itemize{ |
|
34 |
+\item Half-minimum imputation: for each feature, missing values are replaced |
|
35 |
+with half the observed minimum. |
|
36 |
+\item Median imputation: for each feature, missing values are replaced with |
|
37 |
+the median of non-missing values. |
|
38 |
+\item Mean imputation: for each feature, missing values are replaced with |
|
39 |
+the mean of non-missing values. |
|
40 |
+} |
|
41 |
+} |
|
42 |
+\references{ |
|
43 |
+Wei, R., Wang, J., Su, M. et al. Missing Value Imputation Approach for Mass |
|
44 |
+Spectrometry-based Metabolomics Data. Sci Rep 8, 663 (2018). |
|
45 |
+https://doi.org/10.1038/s41598-017-19120-0 |
|
46 |
+} |
|
47 |
+\seealso{ |
|
48 |
+Other imputation methods: |
|
49 |
+\code{\link{poplin_impute_knn}()}, |
|
50 |
+\code{\link{poplin_impute_pca}()}, |
|
51 |
+\code{\link{poplin_impute_randomforest}()}, |
|
52 |
+\code{\link{poplin_impute}()} |
|
53 |
+} |
|
54 |
+\concept{imputation methods} |
... | ... |
@@ -43,7 +43,7 @@ A matrix or \linkS4class{poplin} object of the same dimension as |
43 | 43 |
\code{x} containing the normalized intensities. |
44 | 44 |
} |
45 | 45 |
\description{ |
46 |
-Apply probabilistic quotient normalization to a matrix or |
|
46 |
+Apply probabilistic quotient normalization (PQN) to a matrix or |
|
47 | 47 |
\linkS4class{poplin} object. For the calculation of quotients, a reference |
48 | 48 |
spectrum needs to be obtained from a mean or median spectrum based on all |
49 | 49 |
spectra of the study or a subset of the study. Feature intensities are |