Browse code

finishing vignette

EmmaDann authored on 09/11/2020 17:42:20
Showing 2 changed files

... ...
@@ -136,6 +136,10 @@ findNhoodMarkers <- function(x, da.res, da.fdr=0.1, assay="logcounts",
136 136
         }, finally={
137 137
         })
138 138
     }
139
+    
140
+    if (isTRUE(aggregate.samples) & is.null(sample_col)) {
141
+        stop("if aggregate.samples is TRUE, the column storing sample information must be specified by setting 'sample_col'")
142
+    }
139 143
 
140 144
     n.da <- sum(na.func(da.res$SpatialFDR < da.fdr))
141 145
     if(!is.na(n.da) & n.da == 0){
... ...
@@ -174,7 +178,7 @@ findNhoodMarkers <- function(x, da.res, da.fdr=0.1, assay="logcounts",
174 178
     # perform DGE _within_ each group of cells using the input design matrix
175 179
     message(paste0("Nhoods aggregated into ", length(nhood.gr), " groups"))
176 180
 
177
-    fake.meta <- data.frame("CellID"=colnames(x), "Nhood.Group"=rep(NA, ncol(x)), "sample_id" = colData(x)[[sample_col]])
181
+    fake.meta <- data.frame("CellID"=colnames(x), "Nhood.Group"=rep(NA, ncol(x)))
178 182
     rownames(fake.meta) <- fake.meta$CellID
179 183
 
180 184
     # do we want to allow cells to be members of multiple groups? This will create
... ...
@@ -222,6 +226,7 @@ findNhoodMarkers <- function(x, da.res, da.fdr=0.1, assay="logcounts",
222 226
     ## Aggregate expression by sample
223 227
     # To avoid treating cells as independent replicates
224 228
     if (isTRUE(aggregate.samples)) {
229
+        fake.meta[,"sample_id"] <- colData(x)[[sample_col]]
225 230
         fake.meta[,'sample_group'] <- paste(fake.meta[,"sample_id"], fake.meta[,"Nhood.Group"], sep="_")
226 231
         
227 232
         sample_gr_mat <- matrix(0, nrow=nrow(fake.meta), ncol=length(unique(fake.meta$sample_group)))
... ...
@@ -279,6 +284,7 @@ findNhoodMarkers <- function(x, da.res, da.fdr=0.1, assay="logcounts",
279 284
         } else if(assay == "counts"){
280 285
             i.res <- .perform_counts_dge(exprs, i.model, model.contrasts=i.contrast,
281 286
                                          gene.offset=gene.offset)
287
+            colnames(i.res)[ncol(i.res)] <- "adj.P.Val"
282 288
         } else{
283 289
             warning("Assay type is not counts or logcounts - assuming (log)-normal distribution. Use these results at your peril")
284 290
             i.res <- .perform_lognormal_dge(exprs, i.model,
... ...
@@ -1,5 +1,5 @@
1 1
 ---
2
-title: "Differential abundance testing with _Milo_"
2
+title: "Differential abundance testing with _Milo_ - Mouse gastrulation example"
3 3
 author:
4 4
   - Emma Dann
5 5
   - Mike Morgan
... ...
@@ -9,7 +9,7 @@ output:
9 9
   BiocStyle::pdf_document: default
10 10
 package: miloR
11 11
 vignette: |
12
-  %\VignetteIndexEntry{Differential abundance testing with Milo}
12
+  %\VignetteIndexEntry{Differential abundance testing with Milo - Mouse gastrulation example}
13 13
   %\VignetteEngine{knitr::rmarkdown}
14 14
   %\VignetteEncoding{UTF-8}
15 15
 ---
... ...
@@ -17,7 +17,8 @@ vignette: |
17 17
 ```{r, include = FALSE}
18 18
 knitr::opts_chunk$set(
19 19
   collapse = FALSE,
20
-  message=FALSE
20
+  message=FALSE,
21
+  cache=TRUE
21 22
 )
22 23
 ```
23 24
 
... ...
@@ -192,7 +193,7 @@ umap_pl <- plotReducedDim(embryo_milo, dimred = "umap", colour_by="celltype", te
192 193
   guides(fill="none")
193 194
 
194 195
 ## Plot neighbourhood graph
195
-nh_graph_pl <- plotNhoodGraphDA(embryo_milo, da_results, alpha=0.05) 
196
+nh_graph_pl <- plotNhoodGraphDA(embryo_milo, da_results, layout="umap",alpha=0.05) 
196 197
   
197 198
 umap_pl + nh_graph_pl +
198 199
   plot_layout(guides="collect")
... ...
@@ -231,13 +232,30 @@ Here the analyst might get creative, depending on the specific characteristics o
231 232
 In practice, it might be convenient to subset a selected number of neighbourhoods of interest for gene-level downstream analysis. Here, for example, we focus on identifying signatures of DA subpopulations in the epiblast cells. 
232 233
 
233 234
 ```{r}
234
-dge_epiblast_smp <- findNhoodMarkers(embryo_milo, da_results, assay = "counts", 
235
-                                 aggregate.samples = TRUE, sample_col = "sample",
236
-                                 subset.nhoods = da_results$celltype=="Epiblast")
235
+dge_epiblast_smp <- findNhoodMarkers(embryo_milo, da_results, 
236
+                                     assay = "counts", gene.offset = FALSE,
237
+                                     aggregate.samples = TRUE, sample_col = "sample",
238
+                                     subset.nhoods = da_results$celltype=="Epiblast"
239
+                                     )
240
+
241
+head(dge_epiblast_smp)
237 242
 ```
238 243
 
244
+This identifies n marker genes at FDR 10% that distinguish two main groups within the epiblast neighbourhoods, one significantly depleted in the early stage and one significantly enriched. We can visualize expression of the detected marker genes using the function `plotNhoodExpressionDA`. This shows the average expression in each neighbourhood, ranked by log-Fold Change in the DA test. Note that the gene x nhood expression matrix can be pre-computed and stored using the `calcNhoodExpression` function, to avoid repeating the computation every time you need to plot.
245
+
246
+In this case we mainly identified negative markers of the epiblast neighbourhoods enriched with age.
239 247
 
248
+```{r, fig.height=7, fig.width=9}
249
+markers <- dge_epiblast_smp[which(dge_epiblast_smp$adj.P.Val_1 < 0.1), "GeneID"]
240 250
 
251
+logcounts(embryo_milo) <- log1p(counts(embryo_milo))
252
+embryo_milo <- calcNhoodExpression(embryo_milo, subset.row=markers)
253
+
254
+plotNhoodExpressionDA(embryo_milo, da_results, features = markers,
255
+                      subset.nhoods = da_results$celltype=="Epiblast",
256
+                      assay="logcounts", scale_to_1 = TRUE
257
+                      )
258
+```
241 259
 
242 260
 
243 261
 <details>