... | ... |
@@ -15,6 +15,11 @@ |
15 | 15 |
#' @param geno.only A logical value that flags the model to use either just the \code{matrix} `Kin` or the supplied random effects. |
16 | 16 |
#' @param solver a character value that determines which optimisation algorithm is used for the variance components. Must be either |
17 | 17 |
#' HE (Haseman-Elston regression) or Fisher (Fisher scoring). |
18 |
+#' @param intercept.type A character scalar, either \emph{fixed} or \emph{random} that sets the type of the global |
|
19 |
+#' intercept variable in the model. This only applies to the GLMM case where additional random effects variables are |
|
20 |
+#' already included. Setting \code{intercept.type="fixed"} or \code{intercept.type="random"} will require the user to |
|
21 |
+#' test their model for failures with each. In the case of using a kinship matrix, \code{intercept.type="fixed"} is |
|
22 |
+#' set automatically. |
|
18 | 23 |
#' |
19 | 24 |
#' @details |
20 | 25 |
#' This function runs a negative binomial generalised linear mixed effects model. If mixed effects are detected in testNhoods, |
... | ... |
@@ -81,6 +86,7 @@ fitGLMM <- function(X, Z, y, offsets, init.theta=NULL, Kin=NULL, |
81 | 86 |
init.sigma=NULL, init.beta=NULL, |
82 | 87 |
init.u=NULL, solver=NULL), |
83 | 88 |
dispersion = 1, geno.only=FALSE, |
89 |
+ intercept.type="fixed", |
|
84 | 90 |
solver=NULL){ |
85 | 91 |
|
86 | 92 |
if(!is.null(solver)){ |
... | ... |
@@ -273,8 +279,12 @@ fitGLMM <- function(X, Z, y, offsets, init.theta=NULL, Kin=NULL, |
273 | 279 |
diag(eyeN) <- 1 |
274 | 280 |
errMat <- (e0 %*% t(e0))/sig0 - eyeN |
275 | 281 |
|
276 |
- exp.levels <- random.levels |
|
277 |
- exp.levels <- exp.levels[!grepl(exp.levels, pattern="residual")] |
|
282 |
+ if(intercept.type == "random"){ |
|
283 |
+ exp.levels <- random.levels |
|
284 |
+ exp.levels <- exp.levels[!grepl(exp.levels, pattern="residual")] |
|
285 |
+ } else{ |
|
286 |
+ exp.levels <- random.levels |
|
287 |
+ } |
|
278 | 288 |
|
279 | 289 |
curr_sigma.vec <- sapply(exp.levels, FUN=function(lvls, bigZ, errVec){ |
280 | 290 |
ijZ <- bigZ[, lvls] |
... | ... |
@@ -291,8 +301,11 @@ fitGLMM <- function(X, Z, y, offsets, init.theta=NULL, Kin=NULL, |
291 | 301 |
}, bigZ=full.Z, errVec=errMat) |
292 | 302 |
|
293 | 303 |
# add the residual variance |
294 |
- res.var <- abs(sig0 - sum(curr_sigma.vec)) |
|
295 |
- curr_sigma.vec <- c(curr_sigma.vec, res.var) |
|
304 |
+ if(intercept.type == "random"){ |
|
305 |
+ res.var <- abs(sig0 - sum(curr_sigma.vec)) |
|
306 |
+ curr_sigma.vec <- c(curr_sigma.vec, res.var) |
|
307 |
+ } |
|
308 |
+ |
|
296 | 309 |
curr_sigma <- Matrix(abs(curr_sigma.vec), ncol=1, sparse=TRUE) |
297 | 310 |
# curr_sigma = Matrix(rep(var(y)/(ncol(Z) + 2), ncol(Z)), ncol=1, sparse=TRUE) # split evenly |
298 | 311 |
# curr_sigma <- Matrix(runif(ncol(Z), 0, 1), ncol=1, sparse = TRUE) |
... | ... |
@@ -55,6 +55,11 @@ |
55 | 55 |
#' these should have the same \code{length} as \code{nrow} of \code{nhoodCounts}. If numeric, then these are assumed |
56 | 56 |
#' to correspond to indices of \code{nhoodCounts} - if the maximal index is greater than \code{nrow(nhoodCounts(x))} |
57 | 57 |
#' an error will be produced. |
58 |
+#' @param intercept.type A character scalar, either \emph{fixed} or \emph{random} that sets the type of the global |
|
59 |
+#' intercept variable in the model. This only applies to the GLMM case where additional random effects variables are |
|
60 |
+#' already included. Setting \code{intercept.type="fixed"} or \code{intercept.type="random"} will require the user to |
|
61 |
+#' test their model for failures with each. In the case of using a kinship matrix, \code{intercept.type="fixed"} is |
|
62 |
+#' set automatically. |
|
58 | 63 |
#' @param fail.on.error A logical scalar the determines the behaviour of the error reporting. Used for debugging only. |
59 | 64 |
#' @param BPPARAM A \linkS4class{BiocParallelParam} object specifying the arguments for parallelisation. By default |
60 | 65 |
#' this will evaluate using \code{SerialParam()}. See \code{details}on how to use parallelisation in \code{testNhoods}. |
... | ... |
@@ -163,10 +168,15 @@ testNhoods <- function(x, design, design.df, kinship=NULL, |
163 | 168 |
min.mean=0, model.contrasts=NULL, robust=TRUE, reduced.dim="PCA", REML=TRUE, |
164 | 169 |
norm.method=c("TMM", "RLE", "logMS"), cell.sizes=NULL, |
165 | 170 |
max.iters = 50, max.tol = 1e-5, glmm.solver=NULL, |
166 |
- subset.nhoods=NULL, fail.on.error=FALSE, BPPARAM=SerialParam(), |
|
167 |
- force=FALSE){ |
|
171 |
+ subset.nhoods=NULL, intercept.type=c("fixed", "random"), |
|
172 |
+ fail.on.error=FALSE, BPPARAM=SerialParam(), force=FALSE){ |
|
168 | 173 |
is.lmm <- FALSE |
169 | 174 |
geno.only <- FALSE |
175 |
+ |
|
176 |
+ if(!any(intercept.type %in% c("fixed", "random"))){ |
|
177 |
+ stop("intercept.type: ", intercept.type, " not recognised, must be either 'fixed' or 'random'") |
|
178 |
+ } |
|
179 |
+ |
|
170 | 180 |
if(is(design, "formula")){ |
171 | 181 |
# parse to find random and fixed effects - need to double check the formula is valid |
172 | 182 |
parse <- unlist(strsplit(gsub(design, pattern="~", replacement=""), split= "+", fixed=TRUE)) |
... | ... |
@@ -185,14 +195,25 @@ testNhoods <- function(x, design, design.df, kinship=NULL, |
185 | 195 |
is.lmm <- TRUE |
186 | 196 |
if(find_re | is.null(kinship)){ |
187 | 197 |
# make model matrices for fixed and random effects |
188 |
- z.model <- .parse_formula(design, design.df, vtype="re") |
|
198 |
+ if(length(intercept.type) > 1){ |
|
199 |
+ intercept.type <- intercept.type[1] |
|
200 |
+ } |
|
201 |
+ |
|
202 |
+ if(intercept.type == "random"){ |
|
203 |
+ rand.int <- TRUE |
|
204 |
+ } else{ |
|
205 |
+ rand.int <- FALSE |
|
206 |
+ } |
|
207 |
+ |
|
208 |
+ z.model <- .parse_formula(design, design.df, vtype="re", add.int=rand.int) |
|
189 | 209 |
rownames(z.model) <- rownames(design.df) |
190 | 210 |
} else if(find_re & !is.null(kinship)){ |
191 | 211 |
if(!all(rownames(kinship) == rownames(design.df))){ |
192 | 212 |
stop("Genotype rownames do not match design.df rownames") |
193 | 213 |
} |
194 | 214 |
|
195 |
- z.model <- .parse_formula(design, design.df, vtype="re") |
|
215 |
+ # genetic model MUST have fixed intercept |
|
216 |
+ z.model <- .parse_formula(design, design.df, vtype="re", add.int=FALSE) |
|
196 | 217 |
rownames(z.model) <- rownames(design.df) |
197 | 218 |
} else if(!find_re & !is.null(kinship)){ |
198 | 219 |
z.model <- diag(nrow(kinship)) |
... | ... |
@@ -203,7 +224,13 @@ testNhoods <- function(x, design, design.df, kinship=NULL, |
203 | 224 |
|
204 | 225 |
# this will always implicitly include a fixed intercept term - perhaps |
205 | 226 |
# this shouldn't be the case? |
206 |
- x.model <- .parse_formula(design, design.df, vtype="fe") |
|
227 |
+ if(intercept.type == "fixed"){ |
|
228 |
+ fixed.int <- TRUE |
|
229 |
+ } else{ |
|
230 |
+ fixed.int <- FALSE |
|
231 |
+ } |
|
232 |
+ |
|
233 |
+ x.model <- .parse_formula(design, design.df, vtype="fe", add.int=fixed.int) |
|
207 | 234 |
rownames(x.model) <- rownames(design.df) |
208 | 235 |
max.iters <- max.iters |
209 | 236 |
|
... | ... |
@@ -459,22 +486,24 @@ testNhoods <- function(x, design, design.df, kinship=NULL, |
459 | 486 |
|
460 | 487 |
#wrapper function is the same for all analyses |
461 | 488 |
glmmWrapper <- function(Y, disper, Xmodel, Zmodel, off.sets, randlevels, |
462 |
- reml, glmm.contr, genonly=FALSE, kin.ship=NULL, |
|
489 |
+ reml, glmm.contr, int.type, genonly=FALSE, kin.ship=NULL, |
|
463 | 490 |
BPPARAM=BPPARAM, error.fail=FALSE){ |
464 | 491 |
#bp.list <- NULL |
465 | 492 |
# this needs to be able to run with BiocParallel |
466 | 493 |
bp.list <- bptry({bplapply(seq_len(nrow(Y)), BPOPTIONS=bpoptions(stop.on.error = error.fail), |
467 | 494 |
FUN=function(i, Xmodel, Zmodel, Y, off.sets, |
468 | 495 |
randlevels, disper, genonly, |
469 |
- kin.ship, glmm.contr, reml){ |
|
496 |
+ kin.ship, glmm.contr, reml, int.type){ |
|
470 | 497 |
fitGLMM(X=Xmodel, Z=Zmodel, y=Y[i, ], offsets=off.sets, |
471 | 498 |
random.levels=randlevels, REML = reml, |
472 | 499 |
dispersion=disper[i], geno.only=genonly, |
473 |
- Kin=kinship, glmm.control=glmm.contr) |
|
500 |
+ Kin=kinship, glmm.control=glmm.contr, |
|
501 |
+ intercept.type=int.type) |
|
474 | 502 |
}, BPPARAM=BPPARAM, |
475 | 503 |
Xmodel=Xmodel, Zmodel=Zmodel, Y=Y, off.sets=off.sets, |
476 | 504 |
randlevels=randlevels, disper=disper, genonly=genonly, |
477 |
- kin.ship=kin.ship, glmm.cont=glmm.cont, reml=reml) |
|
505 |
+ kin.ship=kin.ship, glmm.cont=glmm.cont, reml=reml, |
|
506 |
+ int.type=intercept.type) |
|
478 | 507 |
}) # need to handle this output which is a bplist_error object |
479 | 508 |
|
480 | 509 |
# parse the bplist_error object |
... | ... |
@@ -512,26 +541,14 @@ testNhoods <- function(x, design, design.df, kinship=NULL, |
512 | 541 |
} else{ |
513 | 542 |
message("Running genetic model with ", nrow(z.model), " observations") |
514 | 543 |
} |
515 |
- |
|
516 |
- if(geno.only){ |
|
517 |
- fit <- glmmWrapper(Y=dge$counts, disper = 1/dispersion, Xmodel=x.model, Zmodel=z.model, |
|
518 |
- off.sets=offsets, randlevels=rand.levels, reml=REML, glmm.contr = glmm.cont, |
|
519 |
- genonly = geno.only, kin.ship=kinship, |
|
520 |
- BPPARAM=BPPARAM, error.fail=fail.on.error) |
|
521 |
- } else{ |
|
522 |
- fit <- glmmWrapper(Y=dge$counts, disper = 1/dispersion, Xmodel=x.model, Zmodel=z.model, |
|
523 |
- off.sets=offsets, randlevels=rand.levels, reml=REML, glmm.contr = glmm.cont, |
|
524 |
- genonly = geno.only, kin.ship=kinship, |
|
525 |
- BPPARAM=BPPARAM, error.fail=fail.on.error) |
|
526 |
- } |
|
527 |
- |
|
528 |
- } else{ |
|
529 |
- fit <- glmmWrapper(Y=dge$counts, disper = 1/dispersion, Xmodel=x.model, Zmodel=z.model, |
|
530 |
- off.sets=offsets, randlevels=rand.levels, reml=REML, glmm.contr = glmm.cont, |
|
531 |
- genonly = geno.only, kin.ship=kinship, |
|
532 |
- BPPARAM=BPPARAM, error.fail=fail.on.error) |
|
533 | 544 |
} |
534 | 545 |
|
546 |
+ fit <- glmmWrapper(Y=dge$counts, disper = 1/dispersion, Xmodel=x.model, Zmodel=z.model, |
|
547 |
+ off.sets=offsets, randlevels=rand.levels, reml=REML, glmm.contr = glmm.cont, |
|
548 |
+ genonly = geno.only, kin.ship=kinship, |
|
549 |
+ BPPARAM=BPPARAM, error.fail=fail.on.error, |
|
550 |
+ int.type=intercept.type) |
|
551 |
+ |
|
535 | 552 |
# give warning about how many neighborhoods didn't converge and error if > 50% nhoods failed |
536 | 553 |
n.nhoods <- length(fit) |
537 | 554 |
half.n <- floor(n.nhoods * 0.5) |
... | ... |
@@ -66,7 +66,7 @@ |
66 | 66 |
|
67 | 67 |
# parse design formula |
68 | 68 |
#' @export |
69 |
-.parse_formula <- function(in.form, design.df, vtype=c("re", "fe")){ |
|
69 |
+.parse_formula <- function(in.form, design.df, vtype=c("re", "fe"), add.int=FALSE){ |
|
70 | 70 |
## parse the formula and return the X and Z matrices |
71 | 71 |
# need to decide on how to handle intercept terms - i.e. FE or RE |
72 | 72 |
sp.form <- unlist(strsplit(as.character(in.form), |
... | ... |
@@ -83,9 +83,13 @@ |
83 | 83 |
function(x) as.integer(factor(x)))), ncol = length(v.terms)) |
84 | 84 |
} |
85 | 85 |
|
86 |
- # add the residual variance term |
|
87 |
- d.mat <- cbind(d.mat, matrix(data=1L, nrow=nrow(d.mat), ncol=1)) |
|
88 |
- colnames(d.mat) <- c(trimws(v.terms), "residual") |
|
86 |
+ # add the residual variance term if appropriate |
|
87 |
+ if(isTRUE(add.int)){ |
|
88 |
+ d.mat <- cbind(d.mat, matrix(data=1L, nrow=nrow(d.mat), ncol=1)) |
|
89 |
+ colnames(d.mat) <- c(trimws(v.terms), "residual") |
|
90 |
+ } else{ |
|
91 |
+ colnames(d.mat) <- trimws(v.terms) |
|
92 |
+ } |
|
89 | 93 |
|
90 | 94 |
} else if(vtype %in% c("fe")){ |
91 | 95 |
v.terms <- trimws(unlist(sp.form[!grepl(trimws(sp.form), pattern="~|\\|")])) |
... | ... |
@@ -93,8 +97,15 @@ |
93 | 97 |
v.terms <- paste(v.terms, collapse=" + ") |
94 | 98 |
} |
95 | 99 |
|
100 |
+ # the intercept is a fixed effect in this model |
|
96 | 101 |
d.mat <- model.matrix(as.formula(paste("~ 1 +", v.terms)), data = design.df) |
97 |
- d.mat <- d.mat[ ,!grepl("1*\\|", colnames(d.mat))] # drop the intercept term |
|
102 |
+ d.mat <- d.mat[ ,!grepl("1*\\|", colnames(d.mat))] # drop the random terms |
|
103 |
+ |
|
104 |
+ if(isFALSE(add.int)){ |
|
105 |
+ # drop the intercept term if required. |
|
106 |
+ d.mat[, !grepl("Intercept", colnames(d.mat)), drop=FALSE] |
|
107 |
+ } |
|
108 |
+ |
|
98 | 109 |
} else{ |
99 | 110 |
stop("vtype ", vtype, " not recognised") |
100 | 111 |
} |
... | ... |
@@ -167,7 +167,7 @@ set.seed(42) |
167 | 167 |
rownames(pbmc.design) <- pbmc.design$ObsID |
168 | 168 |
|
169 | 169 |
da_results <- testNhoods(pbmc.milo, design = ~ adjmfc.time + (1|tenx_lane), design.df = pbmc.design, fdr.weighting="graph-overlap", |
170 |
- glmm.solver="Fisher", REML=TRUE, norm.method="TMM", BPPARAM = bpparam, fail.on.error=FALSE) |
|
170 |
+ glmm.solver="Fisher", REML=TRUE, norm.method="TMM", BPPARAM = bpparam, fail.on.error=FALSE, intercept.type="fixed") |
|
171 | 171 |
table(da_results$SpatialFDR < 0.1) |
172 | 172 |
``` |
173 | 173 |
|
... | ... |
@@ -219,7 +219,7 @@ the results table when using the GLM. |
219 | 219 |
We can inspect the distribution of the variance parameter estimates, and compare between the converged vs. not converged nhoods. |
220 | 220 |
|
221 | 221 |
```{r} |
222 |
-ggplot(da_results, aes(x=Converged, y=`tenx_lane variance`)) + |
|
222 |
+ggplot(da_results, aes(x=Converged, y=`tenx_lane_variance`)) + |
|
223 | 223 |
geom_boxplot() + |
224 | 224 |
scale_y_log10() + |
225 | 225 |
NULL |