Bioconductor Code: miloR

Browse code

allow user to set intercept type

MikeDMorgan authored on 11/09/2024 14:48:33
Showing 4 changed files

R/glmm.R index 81f6b3f..bb8c298 100644
R/testNhoods.R index bf685e1..e4ed6ea 100644
R/utils.R index e0e6cf7..6a02bf1 100644
vignettes/milo_glmm.Rmd index 49fe360..9fabd6a 100644

History View file @ 58d5661

@@ -15,6 +15,11 @@
                      #' @param geno.only A logical value that flags the model to use either just the \code{matrix} `Kin` or the supplied random effects.
                      #' @param solver a character value that determines which optimisation algorithm is used for the variance components. Must be either
                      #' HE (Haseman-Elston regression) or Fisher (Fisher scoring).
                     +#' @param intercept.type A character scalar, either \emph{fixed} or \emph{random} that sets the type of the global
                     +#' intercept variable in the model. This only applies to the GLMM case where additional random effects variables are
                     +#' already included. Setting \code{intercept.type="fixed"} or \code{intercept.type="random"} will require the user to
                     +#' test their model for failures with each. In the case of using a kinship matrix, \code{intercept.type="fixed"} is
                     +#' set automatically.
                      #'
                      #' @details
                      #' This function runs a negative binomial generalised linear mixed effects model. If mixed effects are detected in testNhoods,
@@ -81,6 +86,7 @@ fitGLMM <- function(X, Z, y, offsets, init.theta=NULL, Kin=NULL,
                                                            init.sigma=NULL, init.beta=NULL,
                                                            init.u=NULL, solver=NULL),
                                          dispersion = 1, geno.only=FALSE,
                     +                    intercept.type="fixed",
                                          solver=NULL){
                          if(!is.null(solver)){
@@ -273,8 +279,12 @@ fitGLMM <- function(X, Z, y, offsets, init.theta=NULL, Kin=NULL,
                                  diag(eyeN) <- 1
                                  errMat <- (e0 %*% t(e0))/sig0 - eyeN
                     -            exp.levels <- random.levels
                     -            exp.levels <- exp.levels[!grepl(exp.levels, pattern="residual")]
                     +            if(intercept.type == "random"){
                     +                exp.levels <- random.levels
                     +                exp.levels <- exp.levels[!grepl(exp.levels, pattern="residual")]
                     +            } else{
                     +                exp.levels <- random.levels
                     +            }
                                  curr_sigma.vec <- sapply(exp.levels, FUN=function(lvls, bigZ, errVec){
                                      ijZ <- bigZ[, lvls]
@@ -291,8 +301,11 @@ fitGLMM <- function(X, Z, y, offsets, init.theta=NULL, Kin=NULL,
                                  }, bigZ=full.Z, errVec=errMat)
                                  # add the residual variance
                     -            res.var <- abs(sig0 - sum(curr_sigma.vec))
                     -            curr_sigma.vec <- c(curr_sigma.vec, res.var)
                     +            if(intercept.type == "random"){
                     +                res.var <- abs(sig0 - sum(curr_sigma.vec))
                     +                curr_sigma.vec <- c(curr_sigma.vec, res.var)
                     +            }
+                    +
                                  curr_sigma <- Matrix(abs(curr_sigma.vec), ncol=1, sparse=TRUE)
                                  # curr_sigma = Matrix(rep(var(y)/(ncol(Z) + 2), ncol(Z)), ncol=1, sparse=TRUE) # split evenly
                                  # curr_sigma <- Matrix(runif(ncol(Z), 0, 1), ncol=1, sparse = TRUE)

R/testNhoods.R

History View file @ 58d5661

@@ -55,6 +55,11 @@
                      #' these should have the same \code{length} as \code{nrow} of \code{nhoodCounts}. If numeric, then these are assumed
                      #' to correspond to indices of \code{nhoodCounts} - if the maximal index is greater than \code{nrow(nhoodCounts(x))}
                      #' an error will be produced.
                     +#' @param intercept.type A character scalar, either \emph{fixed} or \emph{random} that sets the type of the global
                     +#' intercept variable in the model. This only applies to the GLMM case where additional random effects variables are
                     +#' already included. Setting \code{intercept.type="fixed"} or \code{intercept.type="random"} will require the user to
                     +#' test their model for failures with each. In the case of using a kinship matrix, \code{intercept.type="fixed"} is
                     +#' set automatically.
                      #' @param fail.on.error A logical scalar the determines the behaviour of the error reporting. Used for debugging only.
                      #' @param BPPARAM A \linkS4class{BiocParallelParam} object specifying the arguments for parallelisation. By default
                      #' this will evaluate using \code{SerialParam()}. See \code{details}on how to use parallelisation in \code{testNhoods}.
@@ -163,10 +168,15 @@ testNhoods <- function(x, design, design.df, kinship=NULL,
                                             min.mean=0, model.contrasts=NULL, robust=TRUE, reduced.dim="PCA", REML=TRUE,
                                             norm.method=c("TMM", "RLE", "logMS"), cell.sizes=NULL,
                                             max.iters = 50, max.tol = 1e-5, glmm.solver=NULL,
                     -                       subset.nhoods=NULL, fail.on.error=FALSE, BPPARAM=SerialParam(),
                     -                       force=FALSE){
                     +                       subset.nhoods=NULL, intercept.type=c("fixed", "random"),
                     +                       fail.on.error=FALSE, BPPARAM=SerialParam(), force=FALSE){
                          is.lmm <- FALSE
                          geno.only <- FALSE
+                    +
                     +    if(!any(intercept.type %in% c("fixed", "random"))){
                     +        stop("intercept.type: ", intercept.type, " not recognised, must be either 'fixed' or 'random'")
                     +    }
+                    +
                          if(is(design, "formula")){
                              # parse to find random and fixed effects - need to double check the formula is valid
                              parse <- unlist(strsplit(gsub(design, pattern="~", replacement=""), split= "+", fixed=TRUE))
@@ -185,14 +195,25 @@ testNhoods <- function(x, design, design.df, kinship=NULL,
                                  is.lmm <- TRUE
                                  if(find_re | is.null(kinship)){
                                      # make model matrices for fixed and random effects
                     -                z.model <- .parse_formula(design, design.df, vtype="re")
                     +                if(length(intercept.type) > 1){
                     +                    intercept.type <- intercept.type[1]
                     +                }
+                    +
                     +                if(intercept.type == "random"){
                     +                    rand.int <- TRUE
                     +                } else{
                     +                    rand.int <- FALSE
                     +                }
+                    +
                     +                z.model <- .parse_formula(design, design.df, vtype="re", add.int=rand.int)
                                      rownames(z.model) <- rownames(design.df)
                                  } else if(find_re & !is.null(kinship)){
                                      if(!all(rownames(kinship) == rownames(design.df))){
                                          stop("Genotype rownames do not match design.df rownames")
+                                     }
                     -                z.model <- .parse_formula(design, design.df, vtype="re")
                     +                # genetic model MUST have fixed intercept
                     +                z.model <- .parse_formula(design, design.df, vtype="re", add.int=FALSE)
                                      rownames(z.model) <- rownames(design.df)
                                  } else if(!find_re & !is.null(kinship)){
                                      z.model <- diag(nrow(kinship))
@@ -203,7 +224,13 @@ testNhoods <- function(x, design, design.df, kinship=NULL,
                                  # this will always implicitly include a fixed intercept term - perhaps
                                  # this shouldn't be the case?
                     -            x.model <- .parse_formula(design, design.df, vtype="fe")
                     +            if(intercept.type == "fixed"){
                     +                fixed.int <- TRUE
                     +            } else{
                     +                fixed.int <- FALSE
                     +            }
+                    +
                     +            x.model <- .parse_formula(design, design.df, vtype="fe", add.int=fixed.int)
                                  rownames(x.model) <- rownames(design.df)
                                  max.iters <- max.iters
@@ -459,22 +486,24 @@ testNhoods <- function(x, design, design.df, kinship=NULL,
                              #wrapper function is the same for all analyses
                              glmmWrapper <- function(Y, disper, Xmodel, Zmodel, off.sets, randlevels,
                     -                                reml, glmm.contr, genonly=FALSE, kin.ship=NULL,
                     +                                reml, glmm.contr, int.type, genonly=FALSE, kin.ship=NULL,
                                                      BPPARAM=BPPARAM, error.fail=FALSE){
                                  #bp.list <- NULL
                                  # this needs to be able to run with BiocParallel
                                  bp.list <- bptry({bplapply(seq_len(nrow(Y)), BPOPTIONS=bpoptions(stop.on.error = error.fail),
                                                               FUN=function(i, Xmodel, Zmodel, Y, off.sets,
                                                                            randlevels, disper, genonly,
                     -                                                      kin.ship, glmm.contr, reml){
                     +                                                      kin.ship, glmm.contr, reml, int.type){
                                                                   fitGLMM(X=Xmodel, Z=Zmodel, y=Y[i, ], offsets=off.sets,
                                                                           random.levels=randlevels, REML = reml,
                                                                           dispersion=disper[i], geno.only=genonly,
                     -                                                     Kin=kinship, glmm.control=glmm.contr)
                     +                                                     Kin=kinship, glmm.control=glmm.contr,
                     +                                                     intercept.type=int.type)
                                                                   }, BPPARAM=BPPARAM,
                                                               Xmodel=Xmodel, Zmodel=Zmodel, Y=Y, off.sets=off.sets,
                                                               randlevels=randlevels, disper=disper, genonly=genonly,
                     -                                         kin.ship=kin.ship, glmm.cont=glmm.cont, reml=reml)
                     +                                         kin.ship=kin.ship, glmm.cont=glmm.cont, reml=reml,
                     +                                       int.type=intercept.type)
                                                      }) # need to handle this output which is a bplist_error object
                                  # parse the bplist_error object
@@ -512,26 +541,14 @@ testNhoods <- function(x, design, design.df, kinship=NULL,
                                  } else{
                                      message("Running genetic model with ", nrow(z.model), " observations")
+                                 }
+                    -
                     -            if(geno.only){
                     -                fit <- glmmWrapper(Y=dge$counts, disper = 1/dispersion, Xmodel=x.model, Zmodel=z.model,
                     -                                   off.sets=offsets, randlevels=rand.levels, reml=REML, glmm.contr = glmm.cont,
                     -                                   genonly = geno.only, kin.ship=kinship,
                     -                                   BPPARAM=BPPARAM, error.fail=fail.on.error)
                     -            } else{
                     -                fit <- glmmWrapper(Y=dge$counts, disper = 1/dispersion, Xmodel=x.model, Zmodel=z.model,
                     -                                   off.sets=offsets, randlevels=rand.levels, reml=REML, glmm.contr = glmm.cont,
                     -                                   genonly = geno.only, kin.ship=kinship,
                     -                                   BPPARAM=BPPARAM, error.fail=fail.on.error)
                     -            }
+                    -
                     -        } else{
                     -            fit <- glmmWrapper(Y=dge$counts, disper = 1/dispersion, Xmodel=x.model, Zmodel=z.model,
                     -                               off.sets=offsets, randlevels=rand.levels, reml=REML, glmm.contr = glmm.cont,
                     -                               genonly = geno.only, kin.ship=kinship,
                     -                               BPPARAM=BPPARAM, error.fail=fail.on.error)
+                             }
                     +        fit <- glmmWrapper(Y=dge$counts, disper = 1/dispersion, Xmodel=x.model, Zmodel=z.model,
                     +                           off.sets=offsets, randlevels=rand.levels, reml=REML, glmm.contr = glmm.cont,
                     +                           genonly = geno.only, kin.ship=kinship,
                     +                           BPPARAM=BPPARAM, error.fail=fail.on.error,
                     +                           int.type=intercept.type)
+                    +
                              # give warning about how many neighborhoods didn't converge and error if > 50% nhoods failed
                              n.nhoods <- length(fit)
                              half.n <- floor(n.nhoods * 0.5)

R/utils.R

History View file @ 58d5661

@@ -66,7 +66,7 @@
                      # parse design formula
                      #' @export
                     -.parse_formula <- function(in.form, design.df, vtype=c("re", "fe")){
                     +.parse_formula <- function(in.form, design.df, vtype=c("re", "fe"), add.int=FALSE){
                          ## parse the formula and return the X and Z matrices
                          # need to decide on how to handle intercept terms - i.e. FE or RE
                          sp.form <- unlist(strsplit(as.character(in.form),
@@ -83,9 +83,13 @@
                                                                function(x) as.integer(factor(x)))), ncol = length(v.terms))
+                             }
                     -        # add the residual variance term
                     -        d.mat <- cbind(d.mat, matrix(data=1L, nrow=nrow(d.mat), ncol=1))
                     -        colnames(d.mat) <- c(trimws(v.terms), "residual")
                     +        # add the residual variance term if appropriate
                     +        if(isTRUE(add.int)){
                     +            d.mat <- cbind(d.mat, matrix(data=1L, nrow=nrow(d.mat), ncol=1))
                     +            colnames(d.mat) <- c(trimws(v.terms), "residual")
                     +        } else{
                     +            colnames(d.mat) <- trimws(v.terms)
                     +        }
                          } else if(vtype %in% c("fe")){
                              v.terms <- trimws(unlist(sp.form[!grepl(trimws(sp.form), pattern="~|\\|")]))
@@ -93,8 +97,15 @@
                                  v.terms <- paste(v.terms, collapse=" + ")
+                             }
                     +        # the intercept is a fixed effect in this model
                              d.mat <- model.matrix(as.formula(paste("~ 1 +", v.terms)), data = design.df)
                     -        d.mat <- d.mat[ ,!grepl("1*\\|", colnames(d.mat))] # drop the intercept term
                     +        d.mat <- d.mat[ ,!grepl("1*\\|", colnames(d.mat))] # drop the random terms
+                    +
                     +        if(isFALSE(add.int)){
                     +            # drop the intercept term if required.
                     +            d.mat[, !grepl("Intercept", colnames(d.mat)), drop=FALSE]
                     +        }
+                    +
                          } else{
                              stop("vtype ", vtype, " not recognised")
+                         }

vignettes/milo_glmm.Rmd

History View file @ 58d5661

@@ -167,7 +167,7 @@ set.seed(42)
                      rownames(pbmc.design) <- pbmc.design$ObsID
                      da_results <- testNhoods(pbmc.milo, design = ~ adjmfc.time + (1|tenx_lane), design.df = pbmc.design, fdr.weighting="graph-overlap",
                     -                         glmm.solver="Fisher", REML=TRUE, norm.method="TMM", BPPARAM = bpparam, fail.on.error=FALSE)
                     +                         glmm.solver="Fisher", REML=TRUE, norm.method="TMM", BPPARAM = bpparam, fail.on.error=FALSE, intercept.type="fixed")
                      table(da_results$SpatialFDR < 0.1)
                      ```
@@ -219,7 +219,7 @@ the results table when using the GLM.
                      We can inspect the distribution of the variance parameter estimates, and compare between the converged vs. not converged nhoods.
                      ```{r}
                     -ggplot(da_results, aes(x=Converged, y=`tenx_lane variance`)) +
                     +ggplot(da_results, aes(x=Converged, y=`tenx_lane_variance`)) +
                          geom_boxplot() +
                          scale_y_log10() +
                          NULL