Browse code

Reducing runtime bioconductor request

Tiago Silva authored on 15/02/2023 22:20:37
Showing 1 changed files

... ...
@@ -64,7 +64,7 @@ Other useful clinical information available are:
64 64
 ## Clinical
65 65
 In this example we will fetch clinical data from  BCR Biotab files.
66 66
 
67
-```{r results='hide', echo=TRUE, message=FALSE, warning=FALSE}
67
+```{r BCR_Biotab, results='hide', echo=TRUE, message=FALSE, warning=FALSE}
68 68
 query <- GDCquery(
69 69
     project = "TCGA-ACC", 
70 70
     data.category = "Clinical",
... ...
@@ -94,37 +94,27 @@ clinical.BCRtab.all$clinical_drug_acc  %>%
94 94
 ```
95 95
 
96 96
 
97
-In this example we will fetch all BRCA BCR Biotab files, and look for the ER status.
97
+In this example we will fetch all ACC BCR Biotab files, and look for the ER status.
98 98
 
99 99
 ```{r, results = "hide",cache=TRUE, message=FALSE}
100 100
 library(TCGAbiolinks)
101
-query <- GDCquery(project = "TCGA-BRCA", 
102
-                  data.category = "Clinical",
103
-                  data.type = "Clinical Supplement", 
104
-                  data.format = "BCR Biotab")
101
+query <- GDCquery(
102
+    project = "TCGA-ACC", 
103
+    data.category = "Clinical",
104
+    data.type = "Clinical Supplement", 
105
+    data.format = "BCR Biotab"
106
+)
107
+
105 108
 GDCdownload(query)
106
-clinical.BCRtab.all <- GDCprepare(query)
109
+clinical_tab_all <- GDCprepare(query)
107 110
 ```
108 111
 
109 112
 ```{R}
110 113
 # All available tables
111
-names(clinical.BCRtab.all)
112
-
113
-# colnames from clinical_patient_brca
114
-tibble::tibble(sort(colnames(clinical.BCRtab.all$clinical_patient_brca)))
114
+names(clinical_tab_all)
115 115
 
116
-# ER status count
117
-plyr::count(clinical.BCRtab.all$clinical_patient_brca$er_status_by_ihc)
118
-
119
-# ER content 
120
-er.cols <- grep("^er",colnames(clinical.BCRtab.all$clinical_patient_brca))
121
-clinical.BCRtab.all$clinical_patient_brca[,c(2,er.cols)] %>% 
122
-    DT::datatable(options = list(scrollX = TRUE))
123
-
124
-# All columns content first rows
125
-clinical.BCRtab.all$clinical_patient_brca %>% 
126
-    head  %>% 
127
-    DT::datatable(options = list(scrollX = TRUE, keys = TRUE))
116
+# columns from clinical_patient
117
+dplyr::glimpse(clinical_tab_all$clinical_patient_acc)
128 118
 ```
129 119
 
130 120
 ## Biospecimen
... ...
@@ -132,19 +122,21 @@ clinical.BCRtab.all$clinical_patient_brca %>%
132 122
 
133 123
 ```{r, results = "hide",cache=TRUE, message=FALSE,warning=FALSE}
134 124
 # Biospecimen BCR Biotab
135
-query.biospecimen <- GDCquery(project = "TCGA-BRCA", 
136
-                              data.category = "Biospecimen",
137
-                              data.type = "Biospecimen Supplement", 
138
-                              data.format = "BCR Biotab")
139
-GDCdownload(query.biospecimen)
140
-biospecimen.BCRtab.all <- GDCprepare(query.biospecimen)
125
+query_biospecimen <- GDCquery(
126
+    project = "TCGA-ACC", 
127
+    data.category = "Biospecimen",
128
+    data.type = "Biospecimen Supplement", 
129
+    data.format = "BCR Biotab"
130
+)
131
+GDCdownload(query_biospecimen)
132
+biospecimen_tab_all <- GDCprepare(query_biospecimen)
141 133
 ```
142 134
 
143 135
 ```{R}
144 136
 # All available tables
145
-names(biospecimen.BCRtab.all)
137
+names(biospecimen_tab_all)
146 138
 
147
-biospecimen.BCRtab.all$ssf_normal_controls_ov  %>% 
139
+biospecimen_tab_all$biospecimen_sample_acc  %>% 
148 140
     head  %>% 
149 141
     DT::datatable(options = list(scrollX = TRUE, keys = TRUE))
150 142
 ```
... ...
@@ -155,53 +147,35 @@ biospecimen.BCRtab.all$ssf_normal_controls_ov  %>%
155 147
 In this example we will fetch clinical indexed data (same as showed in the data portal).
156 148
 
157 149
 ```{r results='hide', echo=TRUE, message=FALSE, warning=FALSE}
158
-clinical <- GDCquery_clinic(project = "TCGA-LUAD", type = "clinical")
150
+clinical <- GDCquery_clinic(project = "TCGA-ACC", type = "clinical")
159 151
 ```
160 152
 
161 153
 ```{r  echo=TRUE, message=FALSE, warning=FALSE}
162 154
 clinical %>%
163 155
     head %>% 
164
-    DT::datatable(filter = 'top', 
165
-                  options = list(scrollX = TRUE, keys = TRUE, pageLength = 5),  
166
-                  rownames = FALSE)
167
-```
168
-
169
-
170
-```{r results='hide', echo=TRUE, message=FALSE, warning=FALSE}
171
-clinical <- GDCquery_clinic(project = "BEATAML1.0-COHORT", type = "clinical")
172
-```
173
-
174
-```{r  echo=TRUE, message=FALSE, warning=FALSE}
175
-clinical %>% 
176
-    head %>% 
177
-    DT::datatable(filter = 'top', 
178
-                  options = list(scrollX = TRUE, keys = TRUE, pageLength = 5),  
179
-                  rownames = FALSE)
156
+    DT::datatable(
157
+        filter = 'top', 
158
+        options = list(scrollX = TRUE, keys = TRUE, pageLength = 5),  
159
+        rownames = FALSE
160
+    )
180 161
 ```
181 162
 
182 163
 
183
-```{r results='hide', echo=TRUE, message=FALSE, warning=FALSE}
184
-clinical <- GDCquery_clinic(project = "CPTAC-2", type = "clinical")
185
-```
186
-
187
-```{r  echo=TRUE, message=FALSE, warning=FALSE}
188
-clinical %>% 
189
-    head %>% 
190
-    DT::datatable(filter = 'top', 
191
-                  options = list(scrollX = TRUE, keys = TRUE, pageLength = 5),  
192
-                  rownames = FALSE)
193
-```
164
+```{r eval=FALSE,results='hide', echo=TRUE, message=FALSE, warning=FALSE}
165
+clinical_beataml <- GDCquery_clinic(
166
+    project = "BEATAML1.0-COHORT", 
167
+    type = "clinical"
168
+)
194 169
 
195
-```{r results='hide', echo=TRUE, message=FALSE, warning=FALSE}
196
-clinical <- GDCquery_clinic(project = "GENIE-MSK", type = "clinical")
197
-```
170
+clinical_cptac2 <- GDCquery_clinic(
171
+    project = "CPTAC-2", 
172
+    type = "clinical"
173
+)
198 174
 
199
-```{r  echo=TRUE, message=FALSE, warning=FALSE}
200
-clinical %>% 
201
-    head %>% 
202
-    DT::datatable(filter = 'top', 
203
-                  options = list(scrollX = TRUE, keys = TRUE, pageLength = 5),  
204
-                  rownames = FALSE)
175
+clinical_genie <- GDCquery_clinic(
176
+    project = "GENIE-MSK", 
177
+    type = "clinical"
178
+)
205 179
 ```
206 180
 
207 181
 
... ...
@@ -242,22 +216,27 @@ The selection of the table is done by the argument `clinical.info`.
242 216
 Below are several examples fetching clinical data directly from the clinical XML files.
243 217
 
244 218
 ```{r results = 'hide',echo=TRUE, message=FALSE, warning=FALSE}
245
-query <- GDCquery(project = "TCGA-COAD", 
246
-                  data.category = "Clinical", 
247
-                  file.type = "xml", 
248
-                  barcode = c("TCGA-RU-A8FL","TCGA-AA-3972"))
219
+query <- GDCquery(
220
+    project = "TCGA-COAD", 
221
+    data.category = "Clinical", 
222
+    file.type = "xml", 
223
+    barcode = c("TCGA-RU-A8FL","TCGA-AA-3972")
224
+)
249 225
 GDCdownload(query)
250 226
 clinical <- GDCprepare_clinic(query, clinical.info = "patient")
251 227
 ```
228
+
252 229
 ```{r  echo = TRUE, message = FALSE, warning = FALSE}
253 230
 clinical %>% 
254 231
     datatable(filter = 'top', 
255 232
               options = list(scrollX = TRUE, keys = TRUE, pageLength = 5),  
256 233
               rownames = FALSE)
257 234
 ```
235
+
258 236
 ```{r results = 'hide', echo=TRUE, message=FALSE, warning=FALSE}
259 237
 clinical.drug <- GDCprepare_clinic(query, clinical.info = "drug")
260 238
 ```
239
+
261 240
 ```{r  echo = TRUE, message = FALSE, warning = FALSE}
262 241
 clinical.drug %>% 
263 242
     datatable(filter = 'top', 
... ...
@@ -268,15 +247,18 @@ clinical.drug %>%
268 247
 ```{r results = 'hide', echo=TRUE, message=FALSE, warning=FALSE}
269 248
 clinical.radiation <- GDCprepare_clinic(query, clinical.info = "radiation")
270 249
 ```
250
+
271 251
 ```{r  echo = TRUE, message = FALSE, warning = FALSE}
272 252
 clinical.radiation %>% 
273 253
     datatable(filter = 'top', 
274 254
               options = list(scrollX = TRUE, keys = TRUE, pageLength = 5),  
275 255
               rownames = FALSE)
276 256
 ```
257
+
277 258
 ```{r results = 'hide', echo=TRUE, message=FALSE, warning=FALSE}
278 259
 clinical.admin <- GDCprepare_clinic(query, clinical.info = "admin")
279 260
 ```
261
+
280 262
 ```{r  echo = TRUE, message = FALSE, warning = FALSE}
281 263
 clinical.admin %>% 
282 264
     datatable(filter = 'top', 
... ...
@@ -297,12 +279,14 @@ Reference: [TCGA wiki](https://wiki.nci.nih.gov/display/TCGA/Microsatellite+data
297 279
 Level 3 data is included in BCR clinical-based submissions and can be downloaded as follows:
298 280
 
299 281
 ```{r results = 'hide', echo=TRUE, message=FALSE, warning=FALSE,eval = F}
300
-query <- GDCquery(project = "TCGA-COAD", 
301
-                  data.category = "Other",
302
-                  legacy = TRUE,
303
-                  access = "open",
304
-                  data.type = "Auxiliary test",
305
-                  barcode = c("TCGA-AD-A5EJ","TCGA-DM-A0X9"))  
282
+query <- GDCquery(
283
+    project = "TCGA-COAD", 
284
+    data.category = "Other",
285
+    legacy = TRUE,
286
+    access = "open",
287
+    data.type = "Auxiliary test",
288
+    barcode = c("TCGA-AD-A5EJ","TCGA-DM-A0X9")
289
+)  
306 290
 GDCdownload(query)
307 291
 msi_results <- GDCprepare_clinic(query, "msi")
308 292
 ```
... ...
@@ -315,20 +299,24 @@ msi_results %>% DT::datatable(options = list(scrollX = TRUE, keys = TRUE))
315 299
 
316 300
 ```{r results = 'hide', echo=TRUE, message=FALSE, warning=FALSE}
317 301
 # Tissue slide image files from legacy database
318
-query.legacy <- GDCquery(project = "TCGA-COAD", 
319
-                         data.category = "Clinical", 
320
-                         data.type = "Tissue slide image",
321
-                         legacy = TRUE,
322
-                         barcode = c("TCGA-RU-A8FL","TCGA-AA-3972")) 
302
+query_legacy <- GDCquery(
303
+    project = "TCGA-COAD", 
304
+    data.category = "Clinical", 
305
+    data.type = "Tissue slide image",
306
+    legacy = TRUE,
307
+    barcode = c("TCGA-RU-A8FL","TCGA-AA-3972")
308
+) 
323 309
 
324 310
 # Tissue slide image files from harmonized database
325
-query.harmonized <- GDCquery(project = "TCGA-OV",
326
-                             data.category = "Biospecimen",
327
-                             data.type = 'Slide Image')
311
+query.harmonized <- GDCquery(
312
+    project = "TCGA-OV",
313
+    data.category = "Biospecimen",
314
+    data.type = 'Slide Image'
315
+)
328 316
 ```
329 317
 
330 318
 ```{r  echo=TRUE, message=FALSE, warning=FALSE}
331
-query.legacy %>% 
319
+query_legacy %>% 
332 320
     getResults %>% 
333 321
     DT::datatable(options = list(scrollX = TRUE, keys = TRUE))
334 322
 
... ...
@@ -342,11 +330,13 @@ query.harmonized  %>%
342 330
 
343 331
 ```{r results = 'hide', echo=TRUE, message=FALSE, warning=FALSE}
344 332
 # Pathology report from harmonized portal 
345
-query.harmonized <- GDCquery(project = "TCGA-COAD", 
346
-                             data.category = "Biospecimen", 
347
-                             data.type = "Slide Image",
348
-                             experimental.strategy = "Diagnostic Slide",
349
-                             barcode = c("TCGA-RU-A8FL","TCGA-AA-3972"))  
333
+query.harmonized <- GDCquery(
334
+    project = "TCGA-COAD", 
335
+    data.category = "Biospecimen", 
336
+    data.type = "Slide Image",
337
+    experimental.strategy = "Diagnostic Slide",
338
+    barcode = c("TCGA-RU-A8FL","TCGA-AA-3972")
339
+)  
350 340
 ```
351 341
 
352 342
 ```{r  echo=TRUE, message=FALSE, warning=FALSE}
... ...
@@ -370,15 +360,17 @@ The clinical data types available in legacy database are:
370 360
 ## Pathology report (PDF)
371 361
 ```{r results = 'hide', echo=TRUE, message=FALSE, warning=FALSE}
372 362
 # Pathology report from legacy portal 
373
-query.legacy <- GDCquery(project = "TCGA-COAD", 
374
-                         data.category = "Clinical", 
375
-                         data.type = "Pathology report",
376
-                         legacy = TRUE,
377
-                         barcode = c("TCGA-RU-A8FL","TCGA-AA-3972"))  
363
+query_legacy <- GDCquery(
364
+    project = "TCGA-COAD", 
365
+    data.category = "Clinical", 
366
+    data.type = "Pathology report",
367
+    legacy = TRUE,
368
+    barcode = c("TCGA-RU-A8FL","TCGA-AA-3972")
369
+)  
378 370
 ```
379 371
 
380 372
 ```{r  echo=TRUE, message=FALSE, warning=FALSE}
381
-query.legacy %>% 
373
+query_legacy %>% 
382 374
     getResults %>% 
383 375
     DT::datatable(options = list(scrollX = TRUE, keys = TRUE))
384 376
 ```
... ...
@@ -387,11 +379,13 @@ query.legacy %>%
387 379
 
388 380
 ```{r results = 'hide', echo=TRUE, message=FALSE, warning=FALSE, eval=FALSE}
389 381
 # Tissue slide image
390
-query <- GDCquery(project = "TCGA-COAD", 
391
-                  data.category = "Clinical", 
392
-                  data.type = "Tissue slide image",
393
-                  legacy = TRUE,
394
-                  barcode = c("TCGA-RU-A8FL","TCGA-AA-3972")) 
382
+query <- GDCquery(
383
+    project = "TCGA-COAD", 
384
+    data.category = "Clinical", 
385
+    data.type = "Tissue slide image",
386
+    legacy = TRUE,
387
+    barcode = c("TCGA-RU-A8FL","TCGA-AA-3972")
388
+) 
395 389
 ```
396 390
 
397 391
 ```{r  echo = TRUE, message = FALSE, warning = FALSE}
... ...
@@ -404,11 +398,13 @@ query %>%
404 398
 
405 399
 ```{r results = 'hide', echo = TRUE, message = FALSE, warning = FALSE}
406 400
 # Clinical Supplement
407
-query <- GDCquery(project = "TCGA-COAD", 
408
-                  data.category = "Clinical", 
409
-                  data.type = "Clinical Supplement",
410
-                  legacy = TRUE,
411
-                  barcode = c("TCGA-RU-A8FL","TCGA-AA-3972")) 
401
+query <- GDCquery(
402
+    project = "TCGA-COAD", 
403
+    data.category = "Clinical", 
404
+    data.type = "Clinical Supplement",
405
+    legacy = TRUE,
406
+    barcode = c("TCGA-RU-A8FL","TCGA-AA-3972")
407
+) 
412 408
 ```
413 409
 
414 410
 ```{r  echo=TRUE, message=FALSE, warning=FALSE}
... ...
@@ -421,11 +417,13 @@ query %>%
421 417
 ## Clinical data (Biotab format)
422 418
 ```{r results = 'hide', echo=TRUE, message=FALSE, warning=FALSE}
423 419
 # Clinical data
424
-query <- GDCquery(project = "TCGA-COAD", 
425
-                  data.category = "Clinical", 
426
-                  data.type = "Clinical data",
427
-                  legacy = TRUE,
428
-                  file.type = "txt")  
420
+query <- GDCquery(
421
+    project = "TCGA-COAD", 
422
+    data.category = "Clinical", 
423
+    data.type = "Clinical data",
424
+    legacy = TRUE,
425
+    file.type = "txt"
426
+)  
429 427
 ```
430 428
 
431 429
 ```{r  echo=TRUE, message=FALSE, warning=FALSE}
... ...
@@ -498,7 +496,8 @@ bar <- c("TCGA-G9-6378-02A-11R-1789-07", "TCGA-CH-5767-04A-11R-1789-07",
498 496
          "TCGA-B6-A1KN-60A-13R-1789-07", "TCGA-AO-A0J5-01A-11R-1789-07",
499 497
          "TCGA-AO-A0J5-01A-11R-1789-07", "TCGA-G9-6336-11A-11R-1789-07",
500 498
          "TCGA-G9-6380-11A-11R-1789-07", "TCGA-G9-6380-01A-11R-1789-07",
501
-         "TCGA-G9-6340-01A-11R-1789-07", "TCGA-G9-6340-11A-11R-1789-07")
499
+         "TCGA-G9-6340-01A-11R-1789-07", "TCGA-G9-6340-11A-11R-1789-07"
500
+)
502 501
 
503 502
 S <- TCGAquery_SampleTypes(bar,"TP")
504 503
 S2 <- TCGAquery_SampleTypes(bar,"NB")