Browse code

Added manual for poplin_impute.

Jaehyun Joo authored on 29/11/2021 05:59:07
Showing 12 changed files

... ...
@@ -64,6 +64,7 @@ export(poplin_impute_mean)
64 64
 export(poplin_impute_median)
65 65
 export(poplin_impute_pca)
66 66
 export(poplin_impute_randomforest)
67
+export(poplin_impute_simple)
67 68
 export(poplin_naplot)
68 69
 export(poplin_normalize)
69 70
 export(poplin_normalize_auto)
... ...
@@ -231,6 +231,12 @@ setGeneric(
231 231
   function(x, ...) standardGeneric("poplin_impute_mean")
232 232
 )
233 233
 
234
+##' @export
235
+setGeneric(
236
+  "poplin_impute_simple",
237
+  function(x, ...) standardGeneric("poplin_impute_simple")
238
+)
239
+
234 240
 ##' @export
235 241
 setGeneric(
236 242
   "poplin_impute_pca",
... ...
@@ -1,19 +1,64 @@
1 1
 .poplin_impute <- function(x,
2
-                           method = c("knn", "halfmin", "median",
3
-                                      "mean", "pca", "randomforest"),
2
+                           method = c("knn", "randomforest", "pca", "simple"),
4 3
                            ...) {
5 4
   method <- match.arg(method)
6 5
   switch(
7 6
     method,
8 7
     knn = .poplin_impute_knn(x, ...),
9
-    halfmin = .poplin_impute_halfmin(x, ...),
10
-    median = .poplin_impute_median(x, ...),
11
-    mean = .poplin_impute_mean(x, ...),
12
-    pca = .poplin_impute_pca(x, ...),
13 8
     randomforest = .poplin_impute_randomforest(x, ...),
9
+    pca = .poplin_impute_pca(x, ...),
10
+    ## halfmin = .poplin_impute_halfmin(x, ...),
11
+    ## median = .poplin_impute_median(x, ...),
12
+    ## mean = .poplin_impute_mean(x, ...),
13
+    simple = .poplin_impute_simple(x, ...)
14 14
     )
15 15
 }
16 16
 
17
+## Knn imputation
18
+.poplin_impute_knn <- function(x, by = c("feature", "sample"), ...) {
19
+  if (!requireNamespace("VIM", quietly = TRUE)) {
20
+    stop("Package 'VIM' is required. Please install and try again.")
21
+  }
22
+  by <- match.arg(by)
23
+  if (by == "feature") {
24
+    out <- VIM::kNN(x, ...)[, 1:ncol(x)]
25
+    ## VIM package internally converts x as data.table, which drops rownames
26
+    rownames(out) <- rownames(x)
27
+  } else {
28
+    out <- t(VIM::kNN(t(x), ...))[1:nrow(x), ]
29
+    colnames(out) <- colnames(x)
30
+  }
31
+  as.matrix(out)
32
+}
33
+
34
+## Random forest imputation
35
+.poplin_impute_randomforest <- function(x, ...) {
36
+  if (!requireNamespace("missForest", quietly = TRUE)) {
37
+    stop("Package 'missForest' is required. Please install and try again.")
38
+  } 
39
+  t(missForest::missForest(t(x), ...)$ximp)
40
+}
41
+
42
+## Bayesian PCA imputation
43
+.poplin_impute_pca <- function(x, type = c("bpca", "ppca", "nipals", "svdImpute"), ...) {
44
+  if (!requireNamespace("pcaMethods", quietly = TRUE)) {
45
+    stop("Package 'pcaMethods' is required. Please install and try again.")
46
+  }
47
+  type <- match.arg(type)
48
+  t(pcaMethods::pca(t(x), method = type, ...)@completeObs)
49
+}
50
+
51
+## Simple univariate imputation
52
+.poplin_impute_simple <- function(x, type = c("halfmin", "median", "mean")) {
53
+  type <- match.arg(type)
54
+  switch(
55
+    type,
56
+    halfmin = .poplin_impute_halfmin(x),
57
+    median = .poplin_impute_median(x),
58
+    mean = .poplin_impute_mean(x)
59
+  )
60
+}
61
+
17 62
 .poplin_impute_halfmin <- function(x) {
18 63
   out <- apply(x, 1, function(x) {
19 64
     if (anyNA(x)) {
... ...
@@ -46,33 +91,3 @@
46 91
   })
47 92
   t(out)
48 93
 }
49
-
50
-.poplin_impute_knn <- function(x, by = c("feature", "sample"), ...) {
51
-  if (!requireNamespace("VIM", quietly = TRUE)) {
52
-    stop("Package 'VIM' is required. Please install and try again.")
53
-  }
54
-  by <- match.arg(by)
55
-  if (by == "feature") {
56
-    out <- VIM::kNN(x, ...)[, 1:ncol(x)]
57
-    ## VIM package internally converts x as data.table, which drops rownames
58
-    rownames(out) <- rownames(x)
59
-  } else {
60
-    out <- t(VIM::kNN(t(x), ...))[1:nrow(x), ]
61
-    colnames(out) <- colnames(x)
62
-  }
63
-  as.matrix(out)
64
-}
65
-
66
-.poplin_impute_pca <- function(x, ...) {
67
-  if (!requireNamespace("pcaMethods", quietly = TRUE)) {
68
-    stop("Package 'pcaMethods' is required. Please install and try again.")
69
-  } 
70
-  t(pcaMethods::pca(t(x), method = "bpca", ...)@completeObs)
71
-}
72
-
73
-.poplin_impute_randomforest <- function(x, ...) {
74
-  if (!requireNamespace("missForest", quietly = TRUE)) {
75
-    stop("Package 'missForest' is required. Please install and try again.")
76
-  } 
77
-  t(missForest::missForest(t(x), ...)$ximp)
78
-}
... ...
@@ -2,12 +2,34 @@
2 2
 ##'
3 3
 ##' Missing values are frequently found in metabolomics data. The \pkg{poplin}
4 4
 ##' package provides a few options to handle them.
5
-##'
5
+##' [poplin_impute] is a wrapper for the following set of functions:
6
+##' \describe{
7
+##' \item{\code{\link{poplin_impute_knn}}:}{
8
+##' k-nearest neighbor (KNN) imputation
9
+##' }
10
+##' \item{\code{\link{poplin_impute_pca}}:}{
11
+##' principal component analysis (PCA) imputation
12
+##' }
13
+##' \item{\code{\link{poplin_impute_randomforest}}:}{
14
+##' random forest imputation
15
+##' }
16
+##' \item{\code{\link{poplin_impute_simple}}:}{
17
+##' simple univariate imputation (e.g., half-minimum, mean, median)
18
+##' }
19
+##' }
20
+##' @param x A matrix or \linkS4class{poplin} object.
21
+##' @param method A imputation method. Default is 'pqn'.
22
+##' @param poplin_in Name of a data matrix to retrieve.
23
+##' @param poplin_out Name of a data matrix to store.
24
+##' @param ... Argument passed to a specific imputation method.
25
+##' @return A matrix or \linkS4class{poplin} object of the same dimension as
26
+##'   \code{x} containing the imputed intensities.
6 27
 ##' @name poplin_impute
28
+##' @family imputation methods
7 29
 setMethod(
8 30
   "poplin_impute",
9 31
   "matrix",
10
-  function(x, method, ...) {
32
+  function(x, method = c("knn", "pca", "randomforest", "simple"), ...) {
11 33
     .poplin_impute(x, method = method, ...)
12 34
   }
13 35
 )
... ...
@@ -16,123 +38,233 @@ setMethod(
16 38
 setMethod(
17 39
   "poplin_impute",
18 40
   "poplin",
19
-  function(x, method, poplin_in, poplin_out, ...) {
41
+  function(x, method = c("knn", "pca", "randomforest", "simple"),
42
+           poplin_in, poplin_out, ...) {
20 43
     m <- .verify_and_extract_input(x, poplin_in)
21 44
     poplin_data(x, poplin_out) <- .poplin_impute(m, method = method, ...)
22 45
     x
23 46
   }
24 47
 )
25 48
 
26
-##' @rdname poplin_impute
49
+##' K-nearest neighbor (KNN) imputation
50
+##'
51
+##' Apply k-nearest neighbor (KNN) imputation to a matrix or
52
+##' \linkS4class{poplin} object. This is an interface to the [VIM::kNN] from the
53
+##' \pkg{VIM} package. Since it is based on Gower's distance, standardization of
54
+##' input data prior to KNN imputation would not affect the result.
55
+##'
56
+##' @references
57
+##' Alexander Kowarik, Matthias Templ (2016). Imputation with the R Package VIM.
58
+##' Journal of Statistical Software, 74(7), 1-16. doi:10.18637/jss.v074.i07
59
+##'
60
+##' Gower, J. C. (1971). A General Coefficient of Similarity and Some of Its
61
+##' Properties. Biometrics, 27(4), 857–871. https://doi.org/10.2307/2528823
62
+##' 
63
+##' @param x A matrix or \linkS4class{poplin} object.
64
+##' @param poplin_in Name of a data matrix to retrieve.
65
+##' @param poplin_out Name of a data matrix to store.
66
+##' @param by Imputation by k-nearest features or by k-nearest samples.
67
+##' @param ... Additional argument passed to [VIM::kNN].
68
+##' @return A matrix or \linkS4class{poplin} object of the same dimension as
69
+##'   \code{x} containing the imputed intensities.
70
+##' @name poplin_impute_knn
71
+##' @family imputation methods
27 72
 setMethod(
28 73
   "poplin_impute_knn",
29 74
   "matrix",
30
-  function(x, ...) {
31
-    .poplin_impute_knn(x, ...)
75
+  function(x, by = c("feature", "sample"),  ...) {
76
+    .poplin_impute_knn(x, by = by, ...)
32 77
   }
33 78
 )
34 79
 
35
-##' @rdname poplin_impute
80
+##' @rdname poplin_impute_knn
36 81
 setMethod(
37 82
   "poplin_impute_knn",
38 83
   "poplin",
39
-  function(x, poplin_in, poplin_out, ...) {
84
+  function(x, poplin_in, poplin_out, by = c("feature", "sample"), ...) {
40 85
     .poplin_extract_and_assign(x, .poplin_impute_knn,
41
-                               poplin_in, poplin_out, ...)
86
+                               poplin_in, poplin_out,
87
+                               by = by, ...)
42 88
   }
43 89
 )
44 90
 
45
-##' @rdname poplin_impute
91
+##' Random forest imputation
92
+##'
93
+##' Apply random forest imputation to a matrix or \linkS4class{poplin} object.
94
+##' This is an interface to the [missForest::missForest] from the
95
+##' \pkg{missForest} package. Since random forest is a tree-based method, it can
96
+##' be performed with raw intensities - invariant to monotonic transformations
97
+##' (However, statistical analysis could be affected because, for example,
98
+##' log(mean(predicted values) != mean(log(predicted values))).
99
+##'
100
+##' @references
101
+##'
102
+##' Daniel J. Stekhoven (2013). missForest: Nonparametric Missing Value
103
+##' Imputation using Random Forest. R package version 1.4.
104
+##' 
105
+##' Stekhoven D. J., & Buehlmann, P. (2012). MissForest - non-parametric missing
106
+##' value imputation for mixed-type data. Bioinformatics, 28(1), 112-118.
107
+##' 
108
+##' @param x A matrix or \linkS4class{poplin} object.
109
+##' @param poplin_in Name of a data matrix to retrieve.
110
+##' @param poplin_out Name of a data matrix to store.
111
+##' @param ... Additional argument passed to [missForest::missForest].
112
+##' @return A matrix or \linkS4class{poplin} object of the same dimension as
113
+##'   \code{x} containing the imputed intensities.
114
+##' @name poplin_impute_randomforest
115
+##' @family imputation methods
46 116
 setMethod(
47
-  "poplin_impute_halfmin",
117
+  "poplin_impute_randomforest",
48 118
   "matrix",
49 119
   function(x, ...) {
50
-    .poplin_impute_halfmin(x, ...)
120
+    .poplin_impute_randomforest(x, ...)
51 121
   }
52 122
 )
53 123
 
54
-##' @rdname poplin_impute
124
+##' @rdname poplin_impute_randomforest
55 125
 setMethod(
56
-  "poplin_impute_halfmin",
126
+  "poplin_impute_randomforest",
57 127
   "poplin",
58 128
   function(x, poplin_in, poplin_out, ...) {
59
-    .poplin_extract_and_assign(x, .poplin_impute_halfmin,
129
+    .poplin_extract_and_assign(x, .poplin_impute_randomforest,
60 130
                                poplin_in, poplin_out, ...)
61 131
   }
62 132
 )
63 133
 
64
-##' @rdname poplin_impute
134
+##' Principal component analysis (PCA) imputation
135
+##'
136
+##' Apply PCA imputation to a matrix or \linkS4class{poplin} object. This is a
137
+##' interface to the [pcaMethods::pca] from the \pkg{pcaMethods} package. Here,
138
+##' features are interpreted as variables and samples as observations.
139
+##' Pre-processing of input (centering, scaling) may be necessary. See the
140
+##' documentation of [pcaMethods:pca] and [pcaMethods:prep]. Note that the PCA
141
+##' imputation could yield negative feature values that need to be
142
+##' post-processed.
143
+##'
144
+##' @references
145
+##' Stacklies, W., Redestig, H., Scholz, M., Walther, D. and Selbig, J.
146
+##' pcaMethods -- a Bioconductor package providing PCA methods for incomplete
147
+##' data. Bioinformatics, 2007, 23, 1164-1167
148
+##'
149
+##' @param x A matrix or \linkS4class{poplin} object.
150
+##' @param poplin_in Name of a data matrix to retrieve.
151
+##' @param poplin_out Name of a data matrix to store.
152
+##' @param type A method for performing PCA.
153
+##' @param ... Additional argument passed to [pcaMethods::pca].
154
+##' @return A matrix or \linkS4class{poplin} object of the same dimension as
155
+##'   \code{x} containing the imputed intensities.
156
+##' @name poplin_impute_pca
157
+##' @family imputation methods
65 158
 setMethod(
66
-  "poplin_impute_median",
159
+  "poplin_impute_pca",
67 160
   "matrix",
68
-  function(x, ...) {
69
-    .poplin_impute_median(x, ...)
161
+  function(x, type = c("bpca", "ppca", "nipals", "svdImpute"), ...) {
162
+    .poplin_impute_pca(x, type = type, ...)
70 163
   }
71 164
 )
72 165
 
73
-##' @rdname poplin_impute
166
+##' @rdname poplin_impute_pca
74 167
 setMethod(
75
-  "poplin_impute_median",
168
+  "poplin_impute_pca",
76 169
   "poplin",
77
-  function(x, poplin_in, poplin_out, ...) {
78
-    .poplin_extract_and_assign(x, .poplin_impute_median,
79
-                               poplin_in, poplin_out, ...)
170
+  function(x, poplin_in, poplin_out,
171
+           type = c("bpca", "ppca", "nipals", "svdImpute"), ...) {
172
+    .poplin_extract_and_assign(x, .poplin_impute_pca,
173
+                               poplin_in, poplin_out,
174
+                               type = type, ...)
80 175
   }
81 176
 )
82 177
 
83
-##' @rdname poplin_impute
178
+##' Simple univariate imputation
179
+##' 
180
+##' Apply univariate imputation to a matrix or \linkS4class{poplin} object. The
181
+##' supported methods include
182
+##' \itemize{
183
+##' \item Half-minimum imputation: for each feature, missing values are replaced
184
+##' with half the observed minimum.
185
+##' \item Median imputation: for each feature, missing values are replaced with
186
+##' the median of non-missing values.
187
+##' \item Mean imputation: for each feature, missing values are replaced with
188
+##' the mean of non-missing values.
189
+##' }
190
+##' @references
191
+##' Wei, R., Wang, J., Su, M. et al. Missing Value Imputation Approach for Mass
192
+##' Spectrometry-based Metabolomics Data. Sci Rep 8, 663 (2018).
193
+##' https://doi.org/10.1038/s41598-017-19120-0
194
+##' 
195
+##' @param x A matrix or \linkS4class{poplin} object.
196
+##' @param poplin_in Name of a data matrix to retrieve.
197
+##' @param poplin_out Name of a data matrix to store.
198
+##' @param type A method for doing univariate imputation.
199
+##' @return A matrix or \linkS4class{poplin} object of the same dimension as
200
+##'   \code{x} containing the imputed intensities.
201
+##' @name poplin_impute_simple
202
+##' @family imputation methods
84 203
 setMethod(
85
-  "poplin_impute_mean",
204
+  "poplin_impute_simple",
86 205
   "matrix",
87
-  function(x, ...) {
88
-    .poplin_impute_mean(x, ...)
206
+  function(x, type = c("halfmin", "median", "mean")) {
207
+    .poplin_impute_simple(x, type = type)
89 208
   }
90 209
 )
91 210
 
92
-##' @rdname poplin_impute
211
+##' @rdname poplin_impute_simple
93 212
 setMethod(
94
-  "poplin_impute_mean",
213
+  "poplin_impute_simple",
95 214
   "poplin",
96
-  function(x, poplin_in, poplin_out, ...) {
97
-    .poplin_extract_and_assign(x, .poplin_impute_mean,
98
-                               poplin_in, poplin_out, ...)
215
+  function(x, poplin_in, poplin_out, type = c("halfmin", "median", "mean")) {
216
+    .poplin_extract_and_assign(x, .poplin_impute_halfmin,
217
+                               poplin_in, poplin_out, type = type)
99 218
   }
100 219
 )
101 220
 
102
-##' @rdname poplin_impute
103 221
 setMethod(
104
-  "poplin_impute_pca",
222
+  "poplin_impute_halfmin",
223
+  "matrix",
224
+  function(x) {
225
+    .poplin_impute_halfmin(x)
226
+  }
227
+)
228
+
229
+setMethod(
230
+  "poplin_impute_halfmin",
231
+  "poplin",
232
+  function(x, poplin_in, poplin_out) {
233
+    .poplin_extract_and_assign(x, .poplin_impute_halfmin,
234
+                               poplin_in, poplin_out)
235
+  }
236
+)
237
+
238
+setMethod(
239
+  "poplin_impute_median",
105 240
   "matrix",
106 241
   function(x, ...) {
107
-    .poplin_impute_pca(x, ...)
242
+    .poplin_impute_median(x, ...)
108 243
   }
109 244
 )
110 245
 
111
-##' @rdname poplin_impute
112 246
 setMethod(
113
-  "poplin_impute_pca",
247
+  "poplin_impute_median",
114 248
   "poplin",
115 249
   function(x, poplin_in, poplin_out, ...) {
116
-    .poplin_extract_and_assign(x, .poplin_impute_pca,
250
+    .poplin_extract_and_assign(x, .poplin_impute_median,
117 251
                                poplin_in, poplin_out, ...)
118 252
   }
119 253
 )
120 254
 
121
-##' @rdname poplin_impute
122 255
 setMethod(
123
-  "poplin_impute_randomforest",
256
+  "poplin_impute_mean",
124 257
   "matrix",
125 258
   function(x, ...) {
126
-    .poplin_impute_randomforest(x, ...)
259
+    .poplin_impute_mean(x, ...)
127 260
   }
128 261
 )
129 262
 
130
-##' @rdname poplin_impute
131 263
 setMethod(
132
-  "poplin_impute_randomforest",
264
+  "poplin_impute_mean",
133 265
   "poplin",
134 266
   function(x, poplin_in, poplin_out, ...) {
135
-    .poplin_extract_and_assign(x, .poplin_impute_randomforest,
267
+    .poplin_extract_and_assign(x, .poplin_impute_mean,
136 268
                                poplin_in, poplin_out, ...)
137 269
   }
138 270
 )
... ...
@@ -394,7 +394,7 @@
394 394
     range = .poplin_normalize_range(x),
395 395
     pareto = .poplin_normalize_pareto(x),
396 396
     vast = .poplin_normalize_vast(x),
397
-    level = .poplin_normalize_level(x),
397
+    level = .poplin_normalize_level(x)
398 398
   )
399 399
 }
400 400
 
... ...
@@ -63,13 +63,13 @@ setMethod(
63 63
 
64 64
 ##' Probabilistic quotient normalization (PQN)
65 65
 ##'
66
-##' Apply probabilistic quotient normalization to a matrix or
66
+##' Apply probabilistic quotient normalization (PQN) to a matrix or
67 67
 ##' \linkS4class{poplin} object. For the calculation of quotients, a reference
68 68
 ##' spectrum needs to be obtained from a mean or median spectrum based on all
69 69
 ##' spectra of the study or a subset of the study. Feature intensities are
70 70
 ##' normalized by the median of quotients. See Dieterle et al. (2006) for
71 71
 ##' details.
72
-##' 
72
+##'
73 73
 ##' @param x A matrix or \linkS4class{poplin} object.
74 74
 ##' @param poplin_in Name of a data matrix to retrieve.
75 75
 ##' @param poplin_out Name of a data matrix to store.
... ...
@@ -116,7 +116,7 @@ setMethod(
116 116
 ##' Apply sum normalization to a matrix or \linkS4class{poplin} object. For each
117 117
 ##' sample, feature intensities are divided by its Total Ion Current (TIC),
118 118
 ##' i.e., every feature is divided by the sum of all intensity values.
119
-##' 
119
+##'
120 120
 ##' @param x A matrix or \linkS4class{poplin} object.
121 121
 ##' @param poplin_in Name of a data matrix to retrieve.
122 122
 ##' @param poplin_out Name of a data matrix to store.
... ...
@@ -153,7 +153,7 @@ setMethod(
153 153
 ##' Apply mean normalization to a matrix or \linkS4class{poplin} object. For
154 154
 ##' each sample, feature intensities are divided by its mean. The mean of
155 155
 ##' intensity values for individual samples will be one as a result.
156
-##' 
156
+##'
157 157
 ##' @param x A matrix or \linkS4class{poplin} object.
158 158
 ##' @param poplin_in Name of a data matrix to retrieve.
159 159
 ##' @param poplin_out Name of a data matrix to store.
... ...
@@ -190,7 +190,7 @@ setMethod(
190 190
 ##' Apply median normalization to a matrix or \linkS4class{poplin} object. For
191 191
 ##' each sample, feature intensities are divided by its median. The median of
192 192
 ##' intensity values for individual samples will be one as a result.
193
-##' 
193
+##'
194 194
 ##' @param x A matrix or \linkS4class{poplin} object.
195 195
 ##' @param poplin_in Name of a data matrix to retrieve.
196 196
 ##' @param poplin_out Name of a data matrix to store.
... ...
@@ -228,7 +228,7 @@ setMethod(
228 228
 ##' \linkS4class{poplin} object. For each sample, feature intensities are scaled
229 229
 ##' by its MAD. The MAD of intensity values for individual samples will be one
230 230
 ##' as a result.
231
-##' 
231
+##'
232 232
 ##' @param x A matrix or \linkS4class{poplin} object.
233 233
 ##' @param poplin_in Name of a data matrix to retrieve.
234 234
 ##' @param poplin_out Name of a data matrix to store.
... ...
@@ -347,7 +347,7 @@ setMethod(
347 347
 ##' from the \pkg{vsn} package (see [vsn::vsn2] for help). The vsn produces
348 348
 ##' normalized intensities based on a glog (generalized logarithm) scale to base
349 349
 ##' 2. See Huber et al. (2002) for details.
350
-##' 
350
+##'
351 351
 ##' @references
352 352
 ##' Huber W, von Heydebreck A, Sültmann H, Poustka A, Vingron M. Variance
353 353
 ##' stabilization applied to microarray data calibration and to the
... ...
@@ -384,7 +384,7 @@ setMethod(
384 384
 )
385 385
 
386 386
 ##' Feature-based scaling
387
-##' 
387
+##'
388 388
 ##' Apply feature-based scaling to a matrix or \linkS4class{poplin} object. The
389 389
 ##' supported methods include
390 390
 ##' \itemize{
... ...
@@ -3,49 +3,57 @@
3 3
 \name{poplin_impute}
4 4
 \alias{poplin_impute}
5 5
 \alias{poplin_impute,poplin-method}
6
-\alias{poplin_impute_knn,matrix-method}
7
-\alias{poplin_impute_knn,poplin-method}
8
-\alias{poplin_impute_halfmin,matrix-method}
9
-\alias{poplin_impute_halfmin,poplin-method}
10
-\alias{poplin_impute_median,matrix-method}
11
-\alias{poplin_impute_median,poplin-method}
12
-\alias{poplin_impute_mean,matrix-method}
13
-\alias{poplin_impute_mean,poplin-method}
14
-\alias{poplin_impute_pca,matrix-method}
15
-\alias{poplin_impute_pca,poplin-method}
16
-\alias{poplin_impute_randomforest,matrix-method}
17
-\alias{poplin_impute_randomforest,poplin-method}
18 6
 \title{Imputation methods}
19 7
 \usage{
20
-\S4method{poplin_impute}{matrix}(x, method, ...)
21
-
22
-\S4method{poplin_impute}{poplin}(x, method, poplin_in, poplin_out, ...)
23
-
24
-\S4method{poplin_impute_knn}{matrix}(x, ...)
25
-
26
-\S4method{poplin_impute_knn}{poplin}(x, poplin_in, poplin_out, ...)
27
-
28
-\S4method{poplin_impute_halfmin}{matrix}(x, ...)
29
-
30
-\S4method{poplin_impute_halfmin}{poplin}(x, poplin_in, poplin_out, ...)
31
-
32
-\S4method{poplin_impute_median}{matrix}(x, ...)
33
-
34
-\S4method{poplin_impute_median}{poplin}(x, poplin_in, poplin_out, ...)
35
-
36
-\S4method{poplin_impute_mean}{matrix}(x, ...)
37
-
38
-\S4method{poplin_impute_mean}{poplin}(x, poplin_in, poplin_out, ...)
8
+\S4method{poplin_impute}{matrix}(x, method = c("knn", "pca", "randomforest", "simple"), ...)
9
+
10
+\S4method{poplin_impute}{poplin}(
11
+  x,
12
+  method = c("knn", "pca", "randomforest", "simple"),
13
+  poplin_in,
14
+  poplin_out,
15
+  ...
16
+)
17
+}
18
+\arguments{
19
+\item{x}{A matrix or \linkS4class{poplin} object.}
39 20
 
40
-\S4method{poplin_impute_pca}{matrix}(x, ...)
21
+\item{method}{A imputation method. Default is 'pqn'.}
41 22
 
42
-\S4method{poplin_impute_pca}{poplin}(x, poplin_in, poplin_out, ...)
23
+\item{...}{Argument passed to a specific imputation method.}
43 24
 
44
-\S4method{poplin_impute_randomforest}{matrix}(x, ...)
25
+\item{poplin_in}{Name of a data matrix to retrieve.}
45 26
 
46
-\S4method{poplin_impute_randomforest}{poplin}(x, poplin_in, poplin_out, ...)
27
+\item{poplin_out}{Name of a data matrix to store.}
28
+}
29
+\value{
30
+A matrix or \linkS4class{poplin} object of the same dimension as
31
+\code{x} containing the imputed intensities.
47 32
 }
48 33
 \description{
49 34
 Missing values are frequently found in metabolomics data. The \pkg{poplin}
50 35
 package provides a few options to handle them.
36
+\link{poplin_impute} is a wrapper for the following set of functions:
37
+\describe{
38
+\item{\code{\link{poplin_impute_knn}}:}{
39
+k-nearest neighbor (KNN) imputation
40
+}
41
+\item{\code{\link{poplin_impute_pca}}:}{
42
+principal component analysis (PCA) imputation
43
+}
44
+\item{\code{\link{poplin_impute_randomforest}}:}{
45
+random forest imputation
46
+}
47
+\item{\code{\link{poplin_impute_simple}}:}{
48
+simple univariate imputation (e.g., half-minimum, mean, median)
49
+}
50
+}
51
+}
52
+\seealso{
53
+Other imputation methods: 
54
+\code{\link{poplin_impute_knn}()},
55
+\code{\link{poplin_impute_pca}()},
56
+\code{\link{poplin_impute_randomforest}()},
57
+\code{\link{poplin_impute_simple}()}
51 58
 }
59
+\concept{imputation methods}
52 60
new file mode 100644
... ...
@@ -0,0 +1,47 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/imputation-methods.R
3
+\name{poplin_impute_knn}
4
+\alias{poplin_impute_knn}
5
+\alias{poplin_impute_knn,poplin-method}
6
+\title{K-nearest neighbor (KNN) imputation}
7
+\usage{
8
+\S4method{poplin_impute_knn}{matrix}(x, by = c("feature", "sample"), ...)
9
+
10
+\S4method{poplin_impute_knn}{poplin}(x, poplin_in, poplin_out, by = c("feature", "sample"), ...)
11
+}
12
+\arguments{
13
+\item{x}{A matrix or \linkS4class{poplin} object.}
14
+
15
+\item{by}{Imputation by k-nearest features or by k-nearest samples.}
16
+
17
+\item{...}{Additional argument passed to \link[VIM:kNN]{VIM::kNN}.}
18
+
19
+\item{poplin_in}{Name of a data matrix to retrieve.}
20
+
21
+\item{poplin_out}{Name of a data matrix to store.}
22
+}
23
+\value{
24
+A matrix or \linkS4class{poplin} object of the same dimension as
25
+\code{x} containing the imputed intensities.
26
+}
27
+\description{
28
+Apply k-nearest neighbor (KNN) imputation to a matrix or
29
+\linkS4class{poplin} object. This is an interface to the \link[VIM:kNN]{VIM::kNN} from the
30
+\pkg{VIM} package. Since it is based on Gower's distance, standardization of
31
+input data prior to KNN imputation would not affect the result.
32
+}
33
+\references{
34
+Alexander Kowarik, Matthias Templ (2016). Imputation with the R Package VIM.
35
+Journal of Statistical Software, 74(7), 1-16. doi:10.18637/jss.v074.i07
36
+
37
+Gower, J. C. (1971). A General Coefficient of Similarity and Some of Its
38
+Properties. Biometrics, 27(4), 857–871. https://doi.org/10.2307/2528823
39
+}
40
+\seealso{
41
+Other imputation methods: 
42
+\code{\link{poplin_impute_pca}()},
43
+\code{\link{poplin_impute_randomforest}()},
44
+\code{\link{poplin_impute_simple}()},
45
+\code{\link{poplin_impute}()}
46
+}
47
+\concept{imputation methods}
0 48
new file mode 100644
... ...
@@ -0,0 +1,54 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/imputation-methods.R
3
+\name{poplin_impute_pca}
4
+\alias{poplin_impute_pca}
5
+\alias{poplin_impute_pca,poplin-method}
6
+\title{Principal component analysis (PCA) imputation}
7
+\usage{
8
+\S4method{poplin_impute_pca}{matrix}(x, type = c("bpca", "ppca", "nipals", "svdImpute"), ...)
9
+
10
+\S4method{poplin_impute_pca}{poplin}(
11
+  x,
12
+  poplin_in,
13
+  poplin_out,
14
+  type = c("bpca", "ppca", "nipals", "svdImpute"),
15
+  ...
16
+)
17
+}
18
+\arguments{
19
+\item{x}{A matrix or \linkS4class{poplin} object.}
20
+
21
+\item{type}{A method for performing PCA.}
22
+
23
+\item{...}{Additional argument passed to \link[pcaMethods:pca]{pcaMethods::pca}.}
24
+
25
+\item{poplin_in}{Name of a data matrix to retrieve.}
26
+
27
+\item{poplin_out}{Name of a data matrix to store.}
28
+}
29
+\value{
30
+A matrix or \linkS4class{poplin} object of the same dimension as
31
+\code{x} containing the imputed intensities.
32
+}
33
+\description{
34
+Apply PCA imputation to a matrix or \linkS4class{poplin} object. This is a
35
+interface to the \link[pcaMethods:pca]{pcaMethods::pca} from the \pkg{pcaMethods} package. Here,
36
+features are interpreted as variables and samples as observations.
37
+Pre-processing of input (centering, scaling) may be necessary. See the
38
+documentation of \link{pcaMethods:pca} and \link{pcaMethods:prep}. Note that the PCA
39
+imputation could yield negative feature values that need to be
40
+post-processed.
41
+}
42
+\references{
43
+Stacklies, W., Redestig, H., Scholz, M., Walther, D. and Selbig, J.
44
+pcaMethods -- a Bioconductor package providing PCA methods for incomplete
45
+data. Bioinformatics, 2007, 23, 1164-1167
46
+}
47
+\seealso{
48
+Other imputation methods: 
49
+\code{\link{poplin_impute_knn}()},
50
+\code{\link{poplin_impute_randomforest}()},
51
+\code{\link{poplin_impute_simple}()},
52
+\code{\link{poplin_impute}()}
53
+}
54
+\concept{imputation methods}
0 55
new file mode 100644
... ...
@@ -0,0 +1,47 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/imputation-methods.R
3
+\name{poplin_impute_randomforest}
4
+\alias{poplin_impute_randomforest}
5
+\alias{poplin_impute_randomforest,poplin-method}
6
+\title{Random forest imputation}
7
+\usage{
8
+\S4method{poplin_impute_randomforest}{matrix}(x, ...)
9
+
10
+\S4method{poplin_impute_randomforest}{poplin}(x, poplin_in, poplin_out, ...)
11
+}
12
+\arguments{
13
+\item{x}{A matrix or \linkS4class{poplin} object.}
14
+
15
+\item{...}{Additional argument passed to \link[missForest:missForest]{missForest::missForest}.}
16
+
17
+\item{poplin_in}{Name of a data matrix to retrieve.}
18
+
19
+\item{poplin_out}{Name of a data matrix to store.}
20
+}
21
+\value{
22
+A matrix or \linkS4class{poplin} object of the same dimension as
23
+\code{x} containing the imputed intensities.
24
+}
25
+\description{
26
+Apply random forest imputation to a matrix or \linkS4class{poplin} object.
27
+This is an interface to the \link[missForest:missForest]{missForest::missForest} from the
28
+\pkg{missForest} package. Since random forest is a tree-based method, it can
29
+be performed with raw intensities - invariant to monotonic transformations
30
+(However, statistical analysis could be affected because, for example,
31
+log(mean(predicted values) != mean(log(predicted values))).
32
+}
33
+\references{
34
+Daniel J. Stekhoven (2013). missForest: Nonparametric Missing Value
35
+Imputation using Random Forest. R package version 1.4.
36
+
37
+Stekhoven D. J., & Buehlmann, P. (2012). MissForest - non-parametric missing
38
+value imputation for mixed-type data. Bioinformatics, 28(1), 112-118.
39
+}
40
+\seealso{
41
+Other imputation methods: 
42
+\code{\link{poplin_impute_knn}()},
43
+\code{\link{poplin_impute_pca}()},
44
+\code{\link{poplin_impute_simple}()},
45
+\code{\link{poplin_impute}()}
46
+}
47
+\concept{imputation methods}
0 48
new file mode 100644
... ...
@@ -0,0 +1,54 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/imputation-methods.R
3
+\name{poplin_impute_simple}
4
+\alias{poplin_impute_simple}
5
+\alias{poplin_impute_simple,poplin-method}
6
+\title{Simple univariate imputation}
7
+\usage{
8
+\S4method{poplin_impute_simple}{matrix}(x, type = c("halfmin", "median", "mean"))
9
+
10
+\S4method{poplin_impute_simple}{poplin}(
11
+  x,
12
+  poplin_in,
13
+  poplin_out,
14
+  type = c("halfmin", "median", "mean")
15
+)
16
+}
17
+\arguments{
18
+\item{x}{A matrix or \linkS4class{poplin} object.}
19
+
20
+\item{type}{A method for doing univariate imputation.}
21
+
22
+\item{poplin_in}{Name of a data matrix to retrieve.}
23
+
24
+\item{poplin_out}{Name of a data matrix to store.}
25
+}
26
+\value{
27
+A matrix or \linkS4class{poplin} object of the same dimension as
28
+\code{x} containing the imputed intensities.
29
+}
30
+\description{
31
+Apply univariate imputation to a matrix or \linkS4class{poplin} object. The
32
+supported methods include
33
+\itemize{
34
+\item Half-minimum imputation: for each feature, missing values are replaced
35
+with half the observed minimum.
36
+\item Median imputation: for each feature, missing values are replaced with
37
+the median of non-missing values.
38
+\item Mean imputation: for each feature, missing values are replaced with
39
+the mean of non-missing values.
40
+}
41
+}
42
+\references{
43
+Wei, R., Wang, J., Su, M. et al. Missing Value Imputation Approach for Mass
44
+Spectrometry-based Metabolomics Data. Sci Rep 8, 663 (2018).
45
+https://doi.org/10.1038/s41598-017-19120-0
46
+}
47
+\seealso{
48
+Other imputation methods: 
49
+\code{\link{poplin_impute_knn}()},
50
+\code{\link{poplin_impute_pca}()},
51
+\code{\link{poplin_impute_randomforest}()},
52
+\code{\link{poplin_impute}()}
53
+}
54
+\concept{imputation methods}
... ...
@@ -43,7 +43,7 @@ A matrix or \linkS4class{poplin} object of the same dimension as
43 43
 \code{x} containing the normalized intensities.
44 44
 }
45 45
 \description{
46
-Apply probabilistic quotient normalization to a matrix or
46
+Apply probabilistic quotient normalization (PQN) to a matrix or
47 47
 \linkS4class{poplin} object. For the calculation of quotients, a reference
48 48
 spectrum needs to be obtained from a mean or median spectrum based on all
49 49
 spectra of the study or a subset of the study. Feature intensities are