... | ... |
@@ -136,6 +136,10 @@ findNhoodMarkers <- function(x, da.res, da.fdr=0.1, assay="logcounts", |
136 | 136 |
}, finally={ |
137 | 137 |
}) |
138 | 138 |
} |
139 |
+ |
|
140 |
+ if (isTRUE(aggregate.samples) & is.null(sample_col)) { |
|
141 |
+ stop("if aggregate.samples is TRUE, the column storing sample information must be specified by setting 'sample_col'") |
|
142 |
+ } |
|
139 | 143 |
|
140 | 144 |
n.da <- sum(na.func(da.res$SpatialFDR < da.fdr)) |
141 | 145 |
if(!is.na(n.da) & n.da == 0){ |
... | ... |
@@ -174,7 +178,7 @@ findNhoodMarkers <- function(x, da.res, da.fdr=0.1, assay="logcounts", |
174 | 178 |
# perform DGE _within_ each group of cells using the input design matrix |
175 | 179 |
message(paste0("Nhoods aggregated into ", length(nhood.gr), " groups")) |
176 | 180 |
|
177 |
- fake.meta <- data.frame("CellID"=colnames(x), "Nhood.Group"=rep(NA, ncol(x)), "sample_id" = colData(x)[[sample_col]]) |
|
181 |
+ fake.meta <- data.frame("CellID"=colnames(x), "Nhood.Group"=rep(NA, ncol(x))) |
|
178 | 182 |
rownames(fake.meta) <- fake.meta$CellID |
179 | 183 |
|
180 | 184 |
# do we want to allow cells to be members of multiple groups? This will create |
... | ... |
@@ -222,6 +226,7 @@ findNhoodMarkers <- function(x, da.res, da.fdr=0.1, assay="logcounts", |
222 | 226 |
## Aggregate expression by sample |
223 | 227 |
# To avoid treating cells as independent replicates |
224 | 228 |
if (isTRUE(aggregate.samples)) { |
229 |
+ fake.meta[,"sample_id"] <- colData(x)[[sample_col]] |
|
225 | 230 |
fake.meta[,'sample_group'] <- paste(fake.meta[,"sample_id"], fake.meta[,"Nhood.Group"], sep="_") |
226 | 231 |
|
227 | 232 |
sample_gr_mat <- matrix(0, nrow=nrow(fake.meta), ncol=length(unique(fake.meta$sample_group))) |
... | ... |
@@ -279,6 +284,7 @@ findNhoodMarkers <- function(x, da.res, da.fdr=0.1, assay="logcounts", |
279 | 284 |
} else if(assay == "counts"){ |
280 | 285 |
i.res <- .perform_counts_dge(exprs, i.model, model.contrasts=i.contrast, |
281 | 286 |
gene.offset=gene.offset) |
287 |
+ colnames(i.res)[ncol(i.res)] <- "adj.P.Val" |
|
282 | 288 |
} else{ |
283 | 289 |
warning("Assay type is not counts or logcounts - assuming (log)-normal distribution. Use these results at your peril") |
284 | 290 |
i.res <- .perform_lognormal_dge(exprs, i.model, |
... | ... |
@@ -1,5 +1,5 @@ |
1 | 1 |
--- |
2 |
-title: "Differential abundance testing with _Milo_" |
|
2 |
+title: "Differential abundance testing with _Milo_ - Mouse gastrulation example" |
|
3 | 3 |
author: |
4 | 4 |
- Emma Dann |
5 | 5 |
- Mike Morgan |
... | ... |
@@ -9,7 +9,7 @@ output: |
9 | 9 |
BiocStyle::pdf_document: default |
10 | 10 |
package: miloR |
11 | 11 |
vignette: | |
12 |
- %\VignetteIndexEntry{Differential abundance testing with Milo} |
|
12 |
+ %\VignetteIndexEntry{Differential abundance testing with Milo - Mouse gastrulation example} |
|
13 | 13 |
%\VignetteEngine{knitr::rmarkdown} |
14 | 14 |
%\VignetteEncoding{UTF-8} |
15 | 15 |
--- |
... | ... |
@@ -17,7 +17,8 @@ vignette: | |
17 | 17 |
```{r, include = FALSE} |
18 | 18 |
knitr::opts_chunk$set( |
19 | 19 |
collapse = FALSE, |
20 |
- message=FALSE |
|
20 |
+ message=FALSE, |
|
21 |
+ cache=TRUE |
|
21 | 22 |
) |
22 | 23 |
``` |
23 | 24 |
|
... | ... |
@@ -192,7 +193,7 @@ umap_pl <- plotReducedDim(embryo_milo, dimred = "umap", colour_by="celltype", te |
192 | 193 |
guides(fill="none") |
193 | 194 |
|
194 | 195 |
## Plot neighbourhood graph |
195 |
-nh_graph_pl <- plotNhoodGraphDA(embryo_milo, da_results, alpha=0.05) |
|
196 |
+nh_graph_pl <- plotNhoodGraphDA(embryo_milo, da_results, layout="umap",alpha=0.05) |
|
196 | 197 |
|
197 | 198 |
umap_pl + nh_graph_pl + |
198 | 199 |
plot_layout(guides="collect") |
... | ... |
@@ -231,13 +232,30 @@ Here the analyst might get creative, depending on the specific characteristics o |
231 | 232 |
In practice, it might be convenient to subset a selected number of neighbourhoods of interest for gene-level downstream analysis. Here, for example, we focus on identifying signatures of DA subpopulations in the epiblast cells. |
232 | 233 |
|
233 | 234 |
```{r} |
234 |
-dge_epiblast_smp <- findNhoodMarkers(embryo_milo, da_results, assay = "counts", |
|
235 |
- aggregate.samples = TRUE, sample_col = "sample", |
|
236 |
- subset.nhoods = da_results$celltype=="Epiblast") |
|
235 |
+dge_epiblast_smp <- findNhoodMarkers(embryo_milo, da_results, |
|
236 |
+ assay = "counts", gene.offset = FALSE, |
|
237 |
+ aggregate.samples = TRUE, sample_col = "sample", |
|
238 |
+ subset.nhoods = da_results$celltype=="Epiblast" |
|
239 |
+ ) |
|
240 |
+ |
|
241 |
+head(dge_epiblast_smp) |
|
237 | 242 |
``` |
238 | 243 |
|
244 |
+This identifies n marker genes at FDR 10% that distinguish two main groups within the epiblast neighbourhoods, one significantly depleted in the early stage and one significantly enriched. We can visualize expression of the detected marker genes using the function `plotNhoodExpressionDA`. This shows the average expression in each neighbourhood, ranked by log-Fold Change in the DA test. Note that the gene x nhood expression matrix can be pre-computed and stored using the `calcNhoodExpression` function, to avoid repeating the computation every time you need to plot. |
|
245 |
+ |
|
246 |
+In this case we mainly identified negative markers of the epiblast neighbourhoods enriched with age. |
|
239 | 247 |
|
248 |
+```{r, fig.height=7, fig.width=9} |
|
249 |
+markers <- dge_epiblast_smp[which(dge_epiblast_smp$adj.P.Val_1 < 0.1), "GeneID"] |
|
240 | 250 |
|
251 |
+logcounts(embryo_milo) <- log1p(counts(embryo_milo)) |
|
252 |
+embryo_milo <- calcNhoodExpression(embryo_milo, subset.row=markers) |
|
253 |
+ |
|
254 |
+plotNhoodExpressionDA(embryo_milo, da_results, features = markers, |
|
255 |
+ subset.nhoods = da_results$celltype=="Epiblast", |
|
256 |
+ assay="logcounts", scale_to_1 = TRUE |
|
257 |
+ ) |
|
258 |
+``` |
|
241 | 259 |
|
242 | 260 |
|
243 | 261 |
<details> |