... | ... |
@@ -1,6 +1,6 @@ |
1 | 1 |
Package: gcatest |
2 | 2 |
Title: Genotype Conditional Association TEST |
3 |
-Version: 2.0.2.9000 |
|
3 |
+Version: 2.0.3.9000 |
|
4 | 4 |
Author: Wei Hao, Minsun Song, Alejandro Ochoa, John D. Storey |
5 | 5 |
Maintainer: Alejandro Ochoa <[email protected]>, Wei Hao <[email protected]>, John D. Storey <[email protected]> |
6 | 6 |
Encoding: UTF-8 |
... | ... |
@@ -33,4 +33,11 @@ Internally there was major code restructuring, and added unit tests for all func |
33 | 33 |
|
34 | 34 |
- Added internal tests for deviance calculations against `stats::glm`. |
35 | 35 |
- Deviance code (internal `delta_deviance_snp`) now returns `NA` instead of stopping when an "impossible" case is encountered (when the genotype `x` is non-zero but the fitted probabilities under either null or alternative model are zero, or the alternative allele dosage (`x-2`) has the same problem). |
36 |
- These cases are clearly model fitting failures, and can arise for common ill-defined problems, particularly under binary `adjustment` variables passed to `gcat` together with rare variants; these individual cases are not handled any better by `stats:glm`, so it seemed most sensible to return `NA` at such loci and not stop. |
|
36 |
+ These cases are clearly model fitting failures, and can arise for common ill-defined problems, particularly under binary `adjustment` variables passed to `gcat` together with rare variants; these individual cases are not handled any better by `stats::glm`, so it seemed most sensible to return `NA` at such loci and not stop. |
|
37 |
+ |
|
38 |
+# 2021-05-11 - gcatest 2.0.3.9000 |
|
39 |
+ |
|
40 |
+- Added function `delta_deviance_lf`, which calculates the delta deviance from two logistic models and the genotype matrix data. |
|
41 |
+ This function is a more general version of `gcat.stat` (which uses the new function internally), to essentially consider models that differ by more than one degree of freedom. |
|
42 |
+ It was written in particular for an external application in mind, namely the `jackstraw` package. |
|
43 |
+- Internal function `assoc_snp` was renamed to `delta_deviance_snp_lf` and its last argument changed to match that of `delta_deviance_lf` (alternative logistic factors instead of trait). |
37 | 44 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,68 @@ |
1 |
+#' Calculate delta deviance of logistic fits of a null and alternative model |
|
2 |
+#' |
|
3 |
+#' This function fits, at each locus of a given genotype matrix, two logistic models, and under the assumption that the models are nested, calculates the delta deviance between the two. |
|
4 |
+#' This general function is intended for testing models in a broad setting; for the specific problem of genetic association, the interface in [gcat()] and [gcat.stat()] are more user-friendly. |
|
5 |
+#' |
|
6 |
+#' @inheritParams lfa::lfa |
|
7 |
+#' @param LF0 Logistic factors for null model. |
|
8 |
+#' @param LF1 Logistic factors for alternative model. |
|
9 |
+#' |
|
10 |
+#' @return The vector of delta deviance values, one per locus of `X`. |
|
11 |
+#' |
|
12 |
+#' @examples |
|
13 |
+#' \dontrun{ |
|
14 |
+#' devdiff <- delta_deviance_lf( X, LF0, LF1 ) |
|
15 |
+#' } |
|
16 |
+#' |
|
17 |
+#' @export |
|
18 |
+delta_deviance_lf <- function( X, LF0, LF1 ){ |
|
19 |
+ if ( missing( X ) ) |
|
20 |
+ stop( 'Genotype matrix `X` is required!' ) |
|
21 |
+ if ( missing( LF0 ) ) |
|
22 |
+ stop( "`LF0` matrix is required!" ) |
|
23 |
+ if ( missing( LF1 ) ) |
|
24 |
+ stop( "`LF1` matrix is required!" ) |
|
25 |
+ |
|
26 |
+ # check class |
|
27 |
+ is_BEDMatrix <- FALSE |
|
28 |
+ if ( "BEDMatrix" %in% class(X) ) { |
|
29 |
+ is_BEDMatrix <- TRUE |
|
30 |
+ } else if ( !is.matrix( X ) ) |
|
31 |
+ stop( '`X` must be a matrix!' ) |
|
32 |
+ |
|
33 |
+ # get dimensions |
|
34 |
+ if ( is_BEDMatrix ) { |
|
35 |
+ m <- ncol(X) |
|
36 |
+ n <- nrow(X) |
|
37 |
+ } else { |
|
38 |
+ n <- ncol(X) |
|
39 |
+ # m not used in this case |
|
40 |
+ } |
|
41 |
+ |
|
42 |
+ # check dimensions |
|
43 |
+ if ( nrow(LF0) != n ) |
|
44 |
+ stop( 'Number of individuals in `X` must equal number of individuals (rows) in `LF0`' ) |
|
45 |
+ if ( nrow(LF1) != n ) |
|
46 |
+ stop( 'Number of individuals in `X` must equal number of individuals (rows) in `LF1`' ) |
|
47 |
+ |
|
48 |
+ # check LFs for missing values |
|
49 |
+ if ( anyNA( LF0 ) ) |
|
50 |
+ stop( '`LF0` must not have missing values!' ) |
|
51 |
+ if ( anyNA( LF1 ) ) |
|
52 |
+ stop( '`LF1` must not have missing values!' ) |
|
53 |
+ |
|
54 |
+ # start processing |
|
55 |
+ if ( is_BEDMatrix ) { |
|
56 |
+ # explicit loop for BEDMatrix |
|
57 |
+ devdiff <- vector('numeric', m) |
|
58 |
+ for ( i in 1 : m ) { |
|
59 |
+ # get locus i genotype vector |
|
60 |
+ xi <- X[ , i ] |
|
61 |
+ # calculate and store result |
|
62 |
+ devdiff[ i ] <- delta_deviance_snp_lf( xi, LF0, LF1 ) |
|
63 |
+ } |
|
64 |
+ } else |
|
65 |
+ devdiff <- apply( X, 1, delta_deviance_snp_lf, LF0, LF1 ) |
|
66 |
+ return( devdiff ) |
|
67 |
+} |
|
68 |
+ |
0 | 69 |
similarity index 55% |
1 | 70 |
rename from R/assoc_snp.R |
2 | 71 |
rename to R/delta_deviance_snp_lf.R |
... | ... |
@@ -1,35 +1,35 @@ |
1 |
-assoc_snp <- function(xi, LF, trait){ |
|
1 |
+delta_deviance_snp_lf <- function( xi, LF0, LF1 ){ |
|
2 | 2 |
if ( missing( xi ) ) |
3 | 3 |
stop( 'Genotype vector `xi` is required!' ) |
4 |
- if ( missing( LF ) ) |
|
5 |
- stop( "`LF` matrix is required!" ) |
|
6 |
- if ( missing( trait ) ) |
|
7 |
- stop( "`trait` is required!" ) |
|
4 |
+ if ( missing( LF0 ) ) |
|
5 |
+ stop( "`LF0` matrix is required!" ) |
|
6 |
+ if ( missing( LF1 ) ) |
|
7 |
+ stop( "`LF1` matrix is required!" ) |
|
8 | 8 |
|
9 | 9 |
# check dimensions |
10 | 10 |
n <- length( xi ) |
11 |
- if ( nrow(LF) != n ) |
|
12 |
- stop( 'Number of individuals in `xi` must equal number of individuals (rows) in `LF`' ) |
|
13 |
- if ( length(trait) != n ) |
|
14 |
- stop( 'Number of individuals in `xi` must equal number of individuals in `trait`' ) |
|
11 |
+ if ( nrow(LF0) != n ) |
|
12 |
+ stop( 'Number of individuals in `xi` must equal number of individuals (rows) in `LF0`' ) |
|
13 |
+ if ( nrow(LF1) != n ) |
|
14 |
+ stop( 'Number of individuals in `xi` must equal number of individuals (rows) in `LF1`' ) |
|
15 | 15 |
|
16 | 16 |
# check trait and LF for missing values |
17 |
- if ( anyNA( trait ) ) |
|
18 |
- stop( '`trait` must not have missing values!' ) |
|
19 |
- if ( anyNA( LF ) ) |
|
20 |
- stop( '`LF` must not have missing values!' ) |
|
17 |
+ if ( anyNA( LF0 ) ) |
|
18 |
+ stop( '`LF0` must not have missing values!' ) |
|
19 |
+ if ( anyNA( LF1 ) ) |
|
20 |
+ stop( '`LF1` must not have missing values!' ) |
|
21 | 21 |
|
22 | 22 |
# remove individuals whose genotypes are NA |
23 | 23 |
# though `lfa::af_snp` allows NAs, the resulting imputed allele frequencies do not contribute to the deviance (because the xi's are factors of the sum)! So best to exclude them from the beginning. |
24 | 24 |
indexes_keep <- !is.na(xi) |
25 | 25 |
xi <- xi[indexes_keep] |
26 |
- LF <- LF[indexes_keep,] |
|
27 |
- trait <- trait[indexes_keep] |
|
28 |
- |
|
29 |
- # perform two logistic regressions, under the null and alternative (add trait to LF/covariates), resulting in estimated allele frequencies under each model |
|
30 |
- p0 <- lfa::af_snp( xi, LF ) |
|
31 |
- p1 <- lfa::af_snp( xi, cbind(LF, trait) ) |
|
32 |
- |
|
26 |
+ LF0 <- LF0[indexes_keep,] |
|
27 |
+ LF1 <- LF1[indexes_keep,] |
|
28 |
+ |
|
29 |
+ # perform two logistic regressions, under the null and alternative, resulting in estimated allele frequencies under each model |
|
30 |
+ p0 <- lfa::af_snp( xi, LF0 ) |
|
31 |
+ p1 <- lfa::af_snp( xi, LF1 ) |
|
32 |
+ |
|
33 | 33 |
# now compute delta deviance |
34 | 34 |
# uses a special numerically stable algorithm |
35 | 35 |
devdiff <- delta_deviance_snp( xi, p0, p1 ) |
... | ... |
@@ -11,21 +11,13 @@ gcat.stat <- function( X, LF, trait, adjustment = NULL ){ |
11 | 11 |
if ( missing( trait ) ) |
12 | 12 |
stop( "`trait` is required!" ) |
13 | 13 |
|
14 |
- # check class |
|
15 |
- is_BEDMatrix <- FALSE |
|
14 |
+ # check class, get dimensions |
|
16 | 15 |
if ( "BEDMatrix" %in% class(X) ) { |
17 |
- is_BEDMatrix <- TRUE |
|
18 |
- } else if ( !is.matrix( X ) ) |
|
19 |
- stop( '`X` must be a matrix!' ) |
|
20 |
- |
|
21 |
- # get dimensions |
|
22 |
- if ( is_BEDMatrix ) { |
|
23 |
- m <- ncol(X) |
|
24 | 16 |
n <- nrow(X) |
25 |
- } else { |
|
17 |
+ } else if ( is.matrix( X ) ) { |
|
26 | 18 |
n <- ncol(X) |
27 |
- # m not used in this case |
|
28 |
- } |
|
19 |
+ } else |
|
20 |
+ stop( '`X` must be a matrix!' ) |
|
29 | 21 |
|
30 | 22 |
# check dimensions |
31 | 23 |
if ( nrow(LF) != n ) |
... | ... |
@@ -48,18 +40,14 @@ gcat.stat <- function( X, LF, trait, adjustment = NULL ){ |
48 | 40 |
# if all good, add adjustments to LFs |
49 | 41 |
LF <- cbind( LF, adjustment ) |
50 | 42 |
} |
43 |
+ |
|
44 |
+ # bind matrices |
|
45 |
+ LF1 <- cbind(LF, trait) |
|
46 |
+ |
|
47 |
+ # this performs the calculations in a broader setting |
|
48 |
+ # (shared with `jackstraw` package) |
|
49 |
+ devdiff <- delta_deviance_lf( X, LF, LF1 ) |
|
51 | 50 |
|
52 |
- if ( is_BEDMatrix ) { |
|
53 |
- # explicit loop for BEDMatrix |
|
54 |
- devdiff <- vector('numeric', m) |
|
55 |
- for ( i in 1 : m ) { |
|
56 |
- # get locus i genotype vector |
|
57 |
- xi <- X[ , i ] |
|
58 |
- # calculate and store result |
|
59 |
- devdiff[ i ] <- assoc_snp( xi, LF, trait ) |
|
60 |
- } |
|
61 |
- } else |
|
62 |
- devdiff <- apply( X, 1, assoc_snp, LF, trait ) |
|
63 | 51 |
return( devdiff ) |
64 | 52 |
} |
65 | 53 |
|
66 | 54 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,31 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/delta_deviance_lf.R |
|
3 |
+\name{delta_deviance_lf} |
|
4 |
+\alias{delta_deviance_lf} |
|
5 |
+\title{Calculate delta deviance of logistic fits of a null and alternative model} |
|
6 |
+\usage{ |
|
7 |
+delta_deviance_lf(X, LF0, LF1) |
|
8 |
+} |
|
9 |
+\arguments{ |
|
10 |
+\item{X}{A matrix of SNP genotypes, i.e. an integer matrix of 0's, |
|
11 |
+1's, 2's and \code{NA}s. |
|
12 |
+BEDMatrix is supported. |
|
13 |
+Sparse matrices of class Matrix are not supported (yet).} |
|
14 |
+ |
|
15 |
+\item{LF0}{Logistic factors for null model.} |
|
16 |
+ |
|
17 |
+\item{LF1}{Logistic factors for alternative model.} |
|
18 |
+} |
|
19 |
+\value{ |
|
20 |
+The vector of delta deviance values, one per locus of \code{X}. |
|
21 |
+} |
|
22 |
+\description{ |
|
23 |
+This function fits, at each locus of a given genotype matrix, two logistic models, and under the assumption that the models are nested, calculates the delta deviance between the two. |
|
24 |
+This general function is intended for testing models in a broad setting; for the specific problem of genetic association, the interface in \code{\link[=gcat]{gcat()}} and \code{\link[=gcat.stat]{gcat.stat()}} are more user-friendly. |
|
25 |
+} |
|
26 |
+\examples{ |
|
27 |
+\dontrun{ |
|
28 |
+devdiff <- delta_deviance_lf( X, LF0, LF1 ) |
|
29 |
+} |
|
30 |
+ |
|
31 |
+} |
... | ... |
@@ -169,11 +169,12 @@ test_that( "delta_deviance_snp works", { |
169 | 169 |
expect_true( is.na( delta_deviance_snp( xi, pi, pi_bad2 ) ) ) |
170 | 170 |
}) |
171 | 171 |
|
172 |
-test_that("assoc_snp works", { |
|
172 |
+test_that("delta_deviance_snp_lf works", { |
|
173 | 173 |
# test a single SNP |
174 | 174 |
i <- 1 |
175 |
+ LF1 <- cbind(LFs, trait) |
|
175 | 176 |
expect_silent( |
176 |
- devdiff <- assoc_snp( X[ i, ] , LFs, trait ) |
|
177 |
+ devdiff <- delta_deviance_snp_lf( X[ i, ] , LFs, LF1 ) |
|
177 | 178 |
) |
178 | 179 |
expect_equal( length( devdiff ), 1 ) |
179 | 180 |
expect_true( is.numeric( devdiff ) ) |
... | ... |
@@ -188,6 +189,28 @@ test_that("assoc_snp works", { |
188 | 189 |
expect_equal( devdiff, devdiff_glm ) |
189 | 190 |
}) |
190 | 191 |
|
192 |
+test_that("delta_deviance_lf works", { |
|
193 |
+ LF1 <- cbind(LFs, trait) |
|
194 |
+ expect_silent( |
|
195 |
+ devdiff <- delta_deviance_lf( X , LFs, LF1 ) |
|
196 |
+ ) |
|
197 |
+ expect_equal( length( devdiff ), m_loci ) |
|
198 |
+ expect_true( is.numeric( devdiff ) ) |
|
199 |
+ # delta deviances can be NA if LFA/glm.fit fail to converge |
|
200 |
+ expect_true( !any( is.na( devdiff ) ) ) |
|
201 |
+ # theoretically this is true, but in practice it depends on LFA fitting these models well, so testing this is not appropriate here (fails sometimes) |
|
202 |
+ ## expect_true( all( devdiff >= 0 ) ) |
|
203 |
+ |
|
204 |
+ # loop through `glm` version |
|
205 |
+ devdiff_glm <- vector( 'numeric', m_loci ) |
|
206 |
+ for ( i in 1 : m_loci ) { |
|
207 |
+ suppressWarnings( |
|
208 |
+ devdiff_glm[i] <- delta_deviance_snp_glm( X[ i, ], LFs, trait ) |
|
209 |
+ ) |
|
210 |
+ } |
|
211 |
+ expect_equal( devdiff, devdiff_glm ) |
|
212 |
+}) |
|
213 |
+ |
|
191 | 214 |
test_that("gcat.stat works", { |
192 | 215 |
expect_silent( |
193 | 216 |
devdiff <- gcat.stat( X, LFs, trait ) |
... | ... |
@@ -253,6 +276,15 @@ if ( |
253 | 276 |
# load as a BEDMatrix object |
254 | 277 |
X_BEDMatrix <- suppressMessages(suppressWarnings( BEDMatrix( file_bed ) )) |
255 | 278 |
|
279 |
+ test_that("delta_deviance_lf works with BEDMatrix", { |
|
280 |
+ LF1 <- cbind( LFs, trait ) |
|
281 |
+ devdiff_basic <- delta_deviance_lf( X, LFs, LF1 ) |
|
282 |
+ expect_silent( |
|
283 |
+ devdiff_BM <- delta_deviance_lf( X_BEDMatrix, LFs, LF1 ) |
|
284 |
+ ) |
|
285 |
+ expect_equal( devdiff_basic, devdiff_BM ) |
|
286 |
+ }) |
|
287 |
+ |
|
256 | 288 |
test_that("gcat.stat works with BEDMatrix", { |
257 | 289 |
devdiff_basic <- gcat.stat( X, LFs, trait ) |
258 | 290 |
expect_silent( |