Browse code

Improve data loading in sumstats2qtle - When there are multiple p-values for the same test and state, take the mean value

dunstone-a authored on 02/10/2024 12:13:27
Showing 8 changed files

... ...
@@ -46,16 +46,19 @@
46 46
 #' betas <- matrix(rnorm(nStates * nQTL), ncol=nStates)
47 47
 #' error <- matrix(abs(rnorm(nStates * nQTL)), ncol=nStates)
48 48
 #'
49
-#' qtle <- QTLExperiment(assays=list(betas=betas, errors=error),
50
-#'                       feature_id=sample(1:10, nQTL, replace=TRUE),
51
-#'                       variant_id=sample(seq(1e3:1e5), nQTL),
52
-#'                       state_id=LETTERS[1:nStates])
49
+#' qtle <- QTLExperiment(
50
+#'     assays=list(betas=betas, errors=error),
51
+#'     feature_id=sample(1:10, nQTL, replace=TRUE),
52
+#'     variant_id=sample(seq(1e3:1e5), nQTL),
53
+#'     state_id=LETTERS[1:nStates])
53 54
 #' qtle
54 55
 #'
55 56
 #' ## coercion from SummarizedExperiment
56 57
 #' mock_sumstats <- mockSummaryStats(nStates=10, nQTL=100)
57
-#' se <- SummarizedExperiment(assays=list(betas=mock_sumstats$betas,
58
-#'                                        errors=mock_sumstats$errors))
58
+#' se <- SummarizedExperiment(
59
+#'     assays=list(
60
+#'         betas=mock_sumstats$betas,
61
+#'         errors=mock_sumstats$errors))
59 62
 #' as(se, "QTLExperiment")
60 63
 #'
61 64
 #' @docType class
... ...
@@ -21,8 +21,9 @@
21 21
 #'
22 22
 #' qtle2 <- mash2qtle(
23 23
 #'     mashr_sim,
24
-#'     rowData=DataFrame(feature_id=row.names(mashr_sim$Bhat),
25
-#'                       variant_id=sample(seq_len(nQTL))))
24
+#'     rowData=DataFrame(
25
+#'         feature_id=row.names(mashr_sim$Bhat),
26
+#'         variant_id=sample(seq_len(nQTL))))
26 27
 #' dim(qtle2)
27 28
 #'
28 29
 #'
... ...
@@ -76,11 +76,11 @@ sumstats2qtle <- function(
76 76
         fmutate(id=paste0(feature_id, "|", variant_id))
77 77
 
78 78
     betas <- data %>% 
79
-        pivot_wider(names_from=state, values_from=betas, id_cols=id) %>%
79
+        pivot_wider(names_from=state, values_from=betas, values_fn=mean, id_cols=id) %>%
80 80
         tibble::column_to_rownames(var="id") %>% qDF()
81 81
 
82 82
     errors <- data %>% 
83
-        pivot_wider(names_from=state, values_from=errors, id_cols=id) %>%
83
+        pivot_wider(names_from=state, values_from=errors, values_fn=mean, id_cols=id) %>%
84 84
         tibble::column_to_rownames(var="id") %>% qDF()
85 85
 
86 86
     object <- QTLExperiment(
... ...
@@ -94,7 +94,7 @@ sumstats2qtle <- function(
94 94
 
95 95
     if(!is.null(pvalues)){
96 96
         pvalues <- data %>% 
97
-            pivot_wider(names_from=state, values_from=pvalues, id_cols=id) %>%
97
+            pivot_wider(names_from=state, values_from=pvalues, values_fn=mean, id_cols=id) %>%
98 98
             tibble::column_to_rownames(var="id") %>% qDF()
99 99
 
100 100
         assay(object, "pvalues") <- pvalues
... ...
@@ -24,7 +24,7 @@
24 24
 #'
25 25
 #' \describe{
26 26
 #' \item{\code{x[i, j, ...] <- value}:}{Replaces all data for rows \code{i} and
27
-#' columns {j} with the corresponding fields in a QTLExperiment
27
+#' columns \code{j} with the corresponding fields in a QTLExperiment
28 28
 #' \code{value}, where \code{i} and \code{j} can be a logical, integer, or
29 29
 #' character vector of subscripts, indicating the rows and columns,
30 30
 #' respectively, to retain. If either \code{i} or \code{j} is missing, than
... ...
@@ -64,16 +64,19 @@ nQTL <- 100
64 64
 betas <- matrix(rnorm(nStates * nQTL), ncol=nStates)
65 65
 error <- matrix(abs(rnorm(nStates * nQTL)), ncol=nStates)
66 66
 
67
-qtle <- QTLExperiment(assays=list(betas=betas, errors=error),
68
-                      feature_id=sample(1:10, nQTL, replace=TRUE),
69
-                      variant_id=sample(seq(1e3:1e5), nQTL),
70
-                      state_id=LETTERS[1:nStates])
67
+qtle <- QTLExperiment(
68
+    assays=list(betas=betas, errors=error),
69
+    feature_id=sample(1:10, nQTL, replace=TRUE),
70
+    variant_id=sample(seq(1e3:1e5), nQTL),
71
+    state_id=LETTERS[1:nStates])
71 72
 qtle
72 73
 
73 74
 ## coercion from SummarizedExperiment
74 75
 mock_sumstats <- mockSummaryStats(nStates=10, nQTL=100)
75
-se <- SummarizedExperiment(assays=list(betas=mock_sumstats$betas,
76
-                                       errors=mock_sumstats$errors))
76
+se <- SummarizedExperiment(
77
+    assays=list(
78
+        betas=mock_sumstats$betas,
79
+        errors=mock_sumstats$errors))
77 80
 as(se, "QTLExperiment")
78 81
 
79 82
 }
... ...
@@ -36,8 +36,9 @@ mashr_sim <- mockMASHR(nStates, nQTL)
36 36
 
37 37
 qtle2 <- mash2qtle(
38 38
     mashr_sim,
39
-    rowData=DataFrame(feature_id=row.names(mashr_sim$Bhat),
40
-                      variant_id=sample(seq_len(nQTL))))
39
+    rowData=DataFrame(
40
+        feature_id=row.names(mashr_sim$Bhat),
41
+        variant_id=sample(seq_len(nQTL))))
41 42
 dim(qtle2)
42 43
 
43 44
 
... ...
@@ -38,7 +38,7 @@ In the following, \code{x} is a \linkS4class{QTLExperiment} object.
38 38
 
39 39
 \describe{
40 40
 \item{\code{x[i, j, ...] <- value}:}{Replaces all data for rows \code{i} and
41
-columns {j} with the corresponding fields in a QTLExperiment
41
+columns \code{j} with the corresponding fields in a QTLExperiment
42 42
 \code{value}, where \code{i} and \code{j} can be a logical, integer, or
43 43
 character vector of subscripts, indicating the rows and columns,
44 44
 respectively, to retain. If either \code{i} or \code{j} is missing, than
... ...
@@ -1,4 +1,4 @@
1
-# Setting up the options for a mock MultiStateQTLExperiment.
1
+# Setting up the options for a mock QTLExperiment
2 2
 
3 3
 set.seed(42)
4 4
 nQTL <- 100