Bioconductor Code: miloR

Browse code

finishing vignette

EmmaDann authored on 09/11/2020 17:42:20
Showing 2 changed files

R/findNhoodMarkers.R index e014f03..35a0e82 100644
vignettes/milo_gastrulation.Rmd index e4f204d..25c7a62 100644

History View file @ 7779800

@@ -136,6 +136,10 @@ findNhoodMarkers <- function(x, da.res, da.fdr=0.1, assay="logcounts",
                              }, finally={
                              })
+                         }
+                    +
                     +    if (isTRUE(aggregate.samples) & is.null(sample_col)) {
                     +        stop("if aggregate.samples is TRUE, the column storing sample information must be specified by setting 'sample_col'")
                     +    }
                          n.da <- sum(na.func(da.res$SpatialFDR < da.fdr))
                          if(!is.na(n.da) & n.da == 0){
@@ -174,7 +178,7 @@ findNhoodMarkers <- function(x, da.res, da.fdr=0.1, assay="logcounts",
                          # perform DGE _within_ each group of cells using the input design matrix
                          message(paste0("Nhoods aggregated into ", length(nhood.gr), " groups"))
                     -    fake.meta <- data.frame("CellID"=colnames(x), "Nhood.Group"=rep(NA, ncol(x)), "sample_id" = colData(x)[[sample_col]])
                     +    fake.meta <- data.frame("CellID"=colnames(x), "Nhood.Group"=rep(NA, ncol(x)))
                          rownames(fake.meta) <- fake.meta$CellID
                          # do we want to allow cells to be members of multiple groups? This will create
@@ -222,6 +226,7 @@ findNhoodMarkers <- function(x, da.res, da.fdr=0.1, assay="logcounts",
                          ## Aggregate expression by sample
                          # To avoid treating cells as independent replicates
                          if (isTRUE(aggregate.samples)) {
                     +        fake.meta[,"sample_id"] <- colData(x)[[sample_col]]
                              fake.meta[,'sample_group'] <- paste(fake.meta[,"sample_id"], fake.meta[,"Nhood.Group"], sep="_")
                              sample_gr_mat <- matrix(0, nrow=nrow(fake.meta), ncol=length(unique(fake.meta$sample_group)))
@@ -279,6 +284,7 @@ findNhoodMarkers <- function(x, da.res, da.fdr=0.1, assay="logcounts",
                              } else if(assay == "counts"){
                                  i.res <- .perform_counts_dge(exprs, i.model, model.contrasts=i.contrast,
                                                               gene.offset=gene.offset)
                     +            colnames(i.res)[ncol(i.res)] <- "adj.P.Val"
                              } else{
                                  warning("Assay type is not counts or logcounts - assuming (log)-normal distribution. Use these results at your peril")
                                  i.res <- .perform_lognormal_dge(exprs, i.model,

vignettes/milo_gastrulation.Rmd

History View file @ 7779800

@@ -1,5 +1,5 @@
                      ---
                     -title: "Differential abundance testing with _Milo_"
                     +title: "Differential abundance testing with _Milo_ - Mouse gastrulation example"
                      author:
                        - Emma Dann
                        - Mike Morgan
@@ -9,7 +9,7 @@ output:
                        BiocStyle::pdf_document: default
                      package: miloR
                      vignette: |
                     -  %\VignetteIndexEntry{Differential abundance testing with Milo}
                     +  %\VignetteIndexEntry{Differential abundance testing with Milo - Mouse gastrulation example}
                        %\VignetteEngine{knitr::rmarkdown}
                        %\VignetteEncoding{UTF-8}
                      ---
@@ -17,7 +17,8 @@ vignette: |
                      ```{r, include = FALSE}
                      knitr::opts_chunk$set(
                        collapse = FALSE,
                     -  message=FALSE
                     +  message=FALSE,
                     +  cache=TRUE
+                     )
                      ```
@@ -192,7 +193,7 @@ umap_pl <- plotReducedDim(embryo_milo, dimred = "umap", colour_by="celltype", te
                        guides(fill="none")
                      ## Plot neighbourhood graph
                     -nh_graph_pl <- plotNhoodGraphDA(embryo_milo, da_results, alpha=0.05)
                     +nh_graph_pl <- plotNhoodGraphDA(embryo_milo, da_results, layout="umap",alpha=0.05)
                      umap_pl + nh_graph_pl +
                        plot_layout(guides="collect")
@@ -231,13 +232,30 @@ Here the analyst might get creative, depending on the specific characteristics o
                      In practice, it might be convenient to subset a selected number of neighbourhoods of interest for gene-level downstream analysis. Here, for example, we focus on identifying signatures of DA subpopulations in the epiblast cells.
                      ```{r}
                     -dge_epiblast_smp <- findNhoodMarkers(embryo_milo, da_results, assay = "counts",
                     -                                 aggregate.samples = TRUE, sample_col = "sample",
                     -                                 subset.nhoods = da_results$celltype=="Epiblast")
                     +dge_epiblast_smp <- findNhoodMarkers(embryo_milo, da_results,
                     +                                     assay = "counts", gene.offset = FALSE,
                     +                                     aggregate.samples = TRUE, sample_col = "sample",
                     +                                     subset.nhoods = da_results$celltype=="Epiblast"
                     +                                     )
+                    +
                     +head(dge_epiblast_smp)
                      ```
                     +This identifies n marker genes at FDR 10% that distinguish two main groups within the epiblast neighbourhoods, one significantly depleted in the early stage and one significantly enriched. We can visualize expression of the detected marker genes using the function `plotNhoodExpressionDA`. This shows the average expression in each neighbourhood, ranked by log-Fold Change in the DA test. Note that the gene x nhood expression matrix can be pre-computed and stored using the `calcNhoodExpression` function, to avoid repeating the computation every time you need to plot.
+                    +
                     +In this case we mainly identified negative markers of the epiblast neighbourhoods enriched with age.
                     +```{r, fig.height=7, fig.width=9}
                     +markers <- dge_epiblast_smp[which(dge_epiblast_smp$adj.P.Val_1 < 0.1), "GeneID"]
                     +logcounts(embryo_milo) <- log1p(counts(embryo_milo))
                     +embryo_milo <- calcNhoodExpression(embryo_milo, subset.row=markers)
+                    +
                     +plotNhoodExpressionDA(embryo_milo, da_results, features = markers,
                     +                      subset.nhoods = da_results$celltype=="Epiblast",
                     +                      assay="logcounts", scale_to_1 = TRUE
                     +                      )
                     +```
                      <details>