... | ... |
@@ -64,7 +64,7 @@ Other useful clinical information available are: |
64 | 64 |
## Clinical |
65 | 65 |
In this example we will fetch clinical data from BCR Biotab files. |
66 | 66 |
|
67 |
-```{r results='hide', echo=TRUE, message=FALSE, warning=FALSE} |
|
67 |
+```{r BCR_Biotab, results='hide', echo=TRUE, message=FALSE, warning=FALSE} |
|
68 | 68 |
query <- GDCquery( |
69 | 69 |
project = "TCGA-ACC", |
70 | 70 |
data.category = "Clinical", |
... | ... |
@@ -94,37 +94,27 @@ clinical.BCRtab.all$clinical_drug_acc %>% |
94 | 94 |
``` |
95 | 95 |
|
96 | 96 |
|
97 |
-In this example we will fetch all BRCA BCR Biotab files, and look for the ER status. |
|
97 |
+In this example we will fetch all ACC BCR Biotab files, and look for the ER status. |
|
98 | 98 |
|
99 | 99 |
```{r, results = "hide",cache=TRUE, message=FALSE} |
100 | 100 |
library(TCGAbiolinks) |
101 |
-query <- GDCquery(project = "TCGA-BRCA", |
|
102 |
- data.category = "Clinical", |
|
103 |
- data.type = "Clinical Supplement", |
|
104 |
- data.format = "BCR Biotab") |
|
101 |
+query <- GDCquery( |
|
102 |
+ project = "TCGA-ACC", |
|
103 |
+ data.category = "Clinical", |
|
104 |
+ data.type = "Clinical Supplement", |
|
105 |
+ data.format = "BCR Biotab" |
|
106 |
+) |
|
107 |
+ |
|
105 | 108 |
GDCdownload(query) |
106 |
-clinical.BCRtab.all <- GDCprepare(query) |
|
109 |
+clinical_tab_all <- GDCprepare(query) |
|
107 | 110 |
``` |
108 | 111 |
|
109 | 112 |
```{R} |
110 | 113 |
# All available tables |
111 |
-names(clinical.BCRtab.all) |
|
112 |
- |
|
113 |
-# colnames from clinical_patient_brca |
|
114 |
-tibble::tibble(sort(colnames(clinical.BCRtab.all$clinical_patient_brca))) |
|
114 |
+names(clinical_tab_all) |
|
115 | 115 |
|
116 |
-# ER status count |
|
117 |
-plyr::count(clinical.BCRtab.all$clinical_patient_brca$er_status_by_ihc) |
|
118 |
- |
|
119 |
-# ER content |
|
120 |
-er.cols <- grep("^er",colnames(clinical.BCRtab.all$clinical_patient_brca)) |
|
121 |
-clinical.BCRtab.all$clinical_patient_brca[,c(2,er.cols)] %>% |
|
122 |
- DT::datatable(options = list(scrollX = TRUE)) |
|
123 |
- |
|
124 |
-# All columns content first rows |
|
125 |
-clinical.BCRtab.all$clinical_patient_brca %>% |
|
126 |
- head %>% |
|
127 |
- DT::datatable(options = list(scrollX = TRUE, keys = TRUE)) |
|
116 |
+# columns from clinical_patient |
|
117 |
+dplyr::glimpse(clinical_tab_all$clinical_patient_acc) |
|
128 | 118 |
``` |
129 | 119 |
|
130 | 120 |
## Biospecimen |
... | ... |
@@ -132,19 +122,21 @@ clinical.BCRtab.all$clinical_patient_brca %>% |
132 | 122 |
|
133 | 123 |
```{r, results = "hide",cache=TRUE, message=FALSE,warning=FALSE} |
134 | 124 |
# Biospecimen BCR Biotab |
135 |
-query.biospecimen <- GDCquery(project = "TCGA-BRCA", |
|
136 |
- data.category = "Biospecimen", |
|
137 |
- data.type = "Biospecimen Supplement", |
|
138 |
- data.format = "BCR Biotab") |
|
139 |
-GDCdownload(query.biospecimen) |
|
140 |
-biospecimen.BCRtab.all <- GDCprepare(query.biospecimen) |
|
125 |
+query_biospecimen <- GDCquery( |
|
126 |
+ project = "TCGA-ACC", |
|
127 |
+ data.category = "Biospecimen", |
|
128 |
+ data.type = "Biospecimen Supplement", |
|
129 |
+ data.format = "BCR Biotab" |
|
130 |
+) |
|
131 |
+GDCdownload(query_biospecimen) |
|
132 |
+biospecimen_tab_all <- GDCprepare(query_biospecimen) |
|
141 | 133 |
``` |
142 | 134 |
|
143 | 135 |
```{R} |
144 | 136 |
# All available tables |
145 |
-names(biospecimen.BCRtab.all) |
|
137 |
+names(biospecimen_tab_all) |
|
146 | 138 |
|
147 |
-biospecimen.BCRtab.all$ssf_normal_controls_ov %>% |
|
139 |
+biospecimen_tab_all$biospecimen_sample_acc %>% |
|
148 | 140 |
head %>% |
149 | 141 |
DT::datatable(options = list(scrollX = TRUE, keys = TRUE)) |
150 | 142 |
``` |
... | ... |
@@ -155,53 +147,35 @@ biospecimen.BCRtab.all$ssf_normal_controls_ov %>% |
155 | 147 |
In this example we will fetch clinical indexed data (same as showed in the data portal). |
156 | 148 |
|
157 | 149 |
```{r results='hide', echo=TRUE, message=FALSE, warning=FALSE} |
158 |
-clinical <- GDCquery_clinic(project = "TCGA-LUAD", type = "clinical") |
|
150 |
+clinical <- GDCquery_clinic(project = "TCGA-ACC", type = "clinical") |
|
159 | 151 |
``` |
160 | 152 |
|
161 | 153 |
```{r echo=TRUE, message=FALSE, warning=FALSE} |
162 | 154 |
clinical %>% |
163 | 155 |
head %>% |
164 |
- DT::datatable(filter = 'top', |
|
165 |
- options = list(scrollX = TRUE, keys = TRUE, pageLength = 5), |
|
166 |
- rownames = FALSE) |
|
167 |
-``` |
|
168 |
- |
|
169 |
- |
|
170 |
-```{r results='hide', echo=TRUE, message=FALSE, warning=FALSE} |
|
171 |
-clinical <- GDCquery_clinic(project = "BEATAML1.0-COHORT", type = "clinical") |
|
172 |
-``` |
|
173 |
- |
|
174 |
-```{r echo=TRUE, message=FALSE, warning=FALSE} |
|
175 |
-clinical %>% |
|
176 |
- head %>% |
|
177 |
- DT::datatable(filter = 'top', |
|
178 |
- options = list(scrollX = TRUE, keys = TRUE, pageLength = 5), |
|
179 |
- rownames = FALSE) |
|
156 |
+ DT::datatable( |
|
157 |
+ filter = 'top', |
|
158 |
+ options = list(scrollX = TRUE, keys = TRUE, pageLength = 5), |
|
159 |
+ rownames = FALSE |
|
160 |
+ ) |
|
180 | 161 |
``` |
181 | 162 |
|
182 | 163 |
|
183 |
-```{r results='hide', echo=TRUE, message=FALSE, warning=FALSE} |
|
184 |
-clinical <- GDCquery_clinic(project = "CPTAC-2", type = "clinical") |
|
185 |
-``` |
|
186 |
- |
|
187 |
-```{r echo=TRUE, message=FALSE, warning=FALSE} |
|
188 |
-clinical %>% |
|
189 |
- head %>% |
|
190 |
- DT::datatable(filter = 'top', |
|
191 |
- options = list(scrollX = TRUE, keys = TRUE, pageLength = 5), |
|
192 |
- rownames = FALSE) |
|
193 |
-``` |
|
164 |
+```{r eval=FALSE,results='hide', echo=TRUE, message=FALSE, warning=FALSE} |
|
165 |
+clinical_beataml <- GDCquery_clinic( |
|
166 |
+ project = "BEATAML1.0-COHORT", |
|
167 |
+ type = "clinical" |
|
168 |
+) |
|
194 | 169 |
|
195 |
-```{r results='hide', echo=TRUE, message=FALSE, warning=FALSE} |
|
196 |
-clinical <- GDCquery_clinic(project = "GENIE-MSK", type = "clinical") |
|
197 |
-``` |
|
170 |
+clinical_cptac2 <- GDCquery_clinic( |
|
171 |
+ project = "CPTAC-2", |
|
172 |
+ type = "clinical" |
|
173 |
+) |
|
198 | 174 |
|
199 |
-```{r echo=TRUE, message=FALSE, warning=FALSE} |
|
200 |
-clinical %>% |
|
201 |
- head %>% |
|
202 |
- DT::datatable(filter = 'top', |
|
203 |
- options = list(scrollX = TRUE, keys = TRUE, pageLength = 5), |
|
204 |
- rownames = FALSE) |
|
175 |
+clinical_genie <- GDCquery_clinic( |
|
176 |
+ project = "GENIE-MSK", |
|
177 |
+ type = "clinical" |
|
178 |
+) |
|
205 | 179 |
``` |
206 | 180 |
|
207 | 181 |
|
... | ... |
@@ -242,22 +216,27 @@ The selection of the table is done by the argument `clinical.info`. |
242 | 216 |
Below are several examples fetching clinical data directly from the clinical XML files. |
243 | 217 |
|
244 | 218 |
```{r results = 'hide',echo=TRUE, message=FALSE, warning=FALSE} |
245 |
-query <- GDCquery(project = "TCGA-COAD", |
|
246 |
- data.category = "Clinical", |
|
247 |
- file.type = "xml", |
|
248 |
- barcode = c("TCGA-RU-A8FL","TCGA-AA-3972")) |
|
219 |
+query <- GDCquery( |
|
220 |
+ project = "TCGA-COAD", |
|
221 |
+ data.category = "Clinical", |
|
222 |
+ file.type = "xml", |
|
223 |
+ barcode = c("TCGA-RU-A8FL","TCGA-AA-3972") |
|
224 |
+) |
|
249 | 225 |
GDCdownload(query) |
250 | 226 |
clinical <- GDCprepare_clinic(query, clinical.info = "patient") |
251 | 227 |
``` |
228 |
+ |
|
252 | 229 |
```{r echo = TRUE, message = FALSE, warning = FALSE} |
253 | 230 |
clinical %>% |
254 | 231 |
datatable(filter = 'top', |
255 | 232 |
options = list(scrollX = TRUE, keys = TRUE, pageLength = 5), |
256 | 233 |
rownames = FALSE) |
257 | 234 |
``` |
235 |
+ |
|
258 | 236 |
```{r results = 'hide', echo=TRUE, message=FALSE, warning=FALSE} |
259 | 237 |
clinical.drug <- GDCprepare_clinic(query, clinical.info = "drug") |
260 | 238 |
``` |
239 |
+ |
|
261 | 240 |
```{r echo = TRUE, message = FALSE, warning = FALSE} |
262 | 241 |
clinical.drug %>% |
263 | 242 |
datatable(filter = 'top', |
... | ... |
@@ -268,15 +247,18 @@ clinical.drug %>% |
268 | 247 |
```{r results = 'hide', echo=TRUE, message=FALSE, warning=FALSE} |
269 | 248 |
clinical.radiation <- GDCprepare_clinic(query, clinical.info = "radiation") |
270 | 249 |
``` |
250 |
+ |
|
271 | 251 |
```{r echo = TRUE, message = FALSE, warning = FALSE} |
272 | 252 |
clinical.radiation %>% |
273 | 253 |
datatable(filter = 'top', |
274 | 254 |
options = list(scrollX = TRUE, keys = TRUE, pageLength = 5), |
275 | 255 |
rownames = FALSE) |
276 | 256 |
``` |
257 |
+ |
|
277 | 258 |
```{r results = 'hide', echo=TRUE, message=FALSE, warning=FALSE} |
278 | 259 |
clinical.admin <- GDCprepare_clinic(query, clinical.info = "admin") |
279 | 260 |
``` |
261 |
+ |
|
280 | 262 |
```{r echo = TRUE, message = FALSE, warning = FALSE} |
281 | 263 |
clinical.admin %>% |
282 | 264 |
datatable(filter = 'top', |
... | ... |
@@ -297,12 +279,14 @@ Reference: [TCGA wiki](https://wiki.nci.nih.gov/display/TCGA/Microsatellite+data |
297 | 279 |
Level 3 data is included in BCR clinical-based submissions and can be downloaded as follows: |
298 | 280 |
|
299 | 281 |
```{r results = 'hide', echo=TRUE, message=FALSE, warning=FALSE,eval = F} |
300 |
-query <- GDCquery(project = "TCGA-COAD", |
|
301 |
- data.category = "Other", |
|
302 |
- legacy = TRUE, |
|
303 |
- access = "open", |
|
304 |
- data.type = "Auxiliary test", |
|
305 |
- barcode = c("TCGA-AD-A5EJ","TCGA-DM-A0X9")) |
|
282 |
+query <- GDCquery( |
|
283 |
+ project = "TCGA-COAD", |
|
284 |
+ data.category = "Other", |
|
285 |
+ legacy = TRUE, |
|
286 |
+ access = "open", |
|
287 |
+ data.type = "Auxiliary test", |
|
288 |
+ barcode = c("TCGA-AD-A5EJ","TCGA-DM-A0X9") |
|
289 |
+) |
|
306 | 290 |
GDCdownload(query) |
307 | 291 |
msi_results <- GDCprepare_clinic(query, "msi") |
308 | 292 |
``` |
... | ... |
@@ -315,20 +299,24 @@ msi_results %>% DT::datatable(options = list(scrollX = TRUE, keys = TRUE)) |
315 | 299 |
|
316 | 300 |
```{r results = 'hide', echo=TRUE, message=FALSE, warning=FALSE} |
317 | 301 |
# Tissue slide image files from legacy database |
318 |
-query.legacy <- GDCquery(project = "TCGA-COAD", |
|
319 |
- data.category = "Clinical", |
|
320 |
- data.type = "Tissue slide image", |
|
321 |
- legacy = TRUE, |
|
322 |
- barcode = c("TCGA-RU-A8FL","TCGA-AA-3972")) |
|
302 |
+query_legacy <- GDCquery( |
|
303 |
+ project = "TCGA-COAD", |
|
304 |
+ data.category = "Clinical", |
|
305 |
+ data.type = "Tissue slide image", |
|
306 |
+ legacy = TRUE, |
|
307 |
+ barcode = c("TCGA-RU-A8FL","TCGA-AA-3972") |
|
308 |
+) |
|
323 | 309 |
|
324 | 310 |
# Tissue slide image files from harmonized database |
325 |
-query.harmonized <- GDCquery(project = "TCGA-OV", |
|
326 |
- data.category = "Biospecimen", |
|
327 |
- data.type = 'Slide Image') |
|
311 |
+query.harmonized <- GDCquery( |
|
312 |
+ project = "TCGA-OV", |
|
313 |
+ data.category = "Biospecimen", |
|
314 |
+ data.type = 'Slide Image' |
|
315 |
+) |
|
328 | 316 |
``` |
329 | 317 |
|
330 | 318 |
```{r echo=TRUE, message=FALSE, warning=FALSE} |
331 |
-query.legacy %>% |
|
319 |
+query_legacy %>% |
|
332 | 320 |
getResults %>% |
333 | 321 |
DT::datatable(options = list(scrollX = TRUE, keys = TRUE)) |
334 | 322 |
|
... | ... |
@@ -342,11 +330,13 @@ query.harmonized %>% |
342 | 330 |
|
343 | 331 |
```{r results = 'hide', echo=TRUE, message=FALSE, warning=FALSE} |
344 | 332 |
# Pathology report from harmonized portal |
345 |
-query.harmonized <- GDCquery(project = "TCGA-COAD", |
|
346 |
- data.category = "Biospecimen", |
|
347 |
- data.type = "Slide Image", |
|
348 |
- experimental.strategy = "Diagnostic Slide", |
|
349 |
- barcode = c("TCGA-RU-A8FL","TCGA-AA-3972")) |
|
333 |
+query.harmonized <- GDCquery( |
|
334 |
+ project = "TCGA-COAD", |
|
335 |
+ data.category = "Biospecimen", |
|
336 |
+ data.type = "Slide Image", |
|
337 |
+ experimental.strategy = "Diagnostic Slide", |
|
338 |
+ barcode = c("TCGA-RU-A8FL","TCGA-AA-3972") |
|
339 |
+) |
|
350 | 340 |
``` |
351 | 341 |
|
352 | 342 |
```{r echo=TRUE, message=FALSE, warning=FALSE} |
... | ... |
@@ -370,15 +360,17 @@ The clinical data types available in legacy database are: |
370 | 360 |
## Pathology report (PDF) |
371 | 361 |
```{r results = 'hide', echo=TRUE, message=FALSE, warning=FALSE} |
372 | 362 |
# Pathology report from legacy portal |
373 |
-query.legacy <- GDCquery(project = "TCGA-COAD", |
|
374 |
- data.category = "Clinical", |
|
375 |
- data.type = "Pathology report", |
|
376 |
- legacy = TRUE, |
|
377 |
- barcode = c("TCGA-RU-A8FL","TCGA-AA-3972")) |
|
363 |
+query_legacy <- GDCquery( |
|
364 |
+ project = "TCGA-COAD", |
|
365 |
+ data.category = "Clinical", |
|
366 |
+ data.type = "Pathology report", |
|
367 |
+ legacy = TRUE, |
|
368 |
+ barcode = c("TCGA-RU-A8FL","TCGA-AA-3972") |
|
369 |
+) |
|
378 | 370 |
``` |
379 | 371 |
|
380 | 372 |
```{r echo=TRUE, message=FALSE, warning=FALSE} |
381 |
-query.legacy %>% |
|
373 |
+query_legacy %>% |
|
382 | 374 |
getResults %>% |
383 | 375 |
DT::datatable(options = list(scrollX = TRUE, keys = TRUE)) |
384 | 376 |
``` |
... | ... |
@@ -387,11 +379,13 @@ query.legacy %>% |
387 | 379 |
|
388 | 380 |
```{r results = 'hide', echo=TRUE, message=FALSE, warning=FALSE, eval=FALSE} |
389 | 381 |
# Tissue slide image |
390 |
-query <- GDCquery(project = "TCGA-COAD", |
|
391 |
- data.category = "Clinical", |
|
392 |
- data.type = "Tissue slide image", |
|
393 |
- legacy = TRUE, |
|
394 |
- barcode = c("TCGA-RU-A8FL","TCGA-AA-3972")) |
|
382 |
+query <- GDCquery( |
|
383 |
+ project = "TCGA-COAD", |
|
384 |
+ data.category = "Clinical", |
|
385 |
+ data.type = "Tissue slide image", |
|
386 |
+ legacy = TRUE, |
|
387 |
+ barcode = c("TCGA-RU-A8FL","TCGA-AA-3972") |
|
388 |
+) |
|
395 | 389 |
``` |
396 | 390 |
|
397 | 391 |
```{r echo = TRUE, message = FALSE, warning = FALSE} |
... | ... |
@@ -404,11 +398,13 @@ query %>% |
404 | 398 |
|
405 | 399 |
```{r results = 'hide', echo = TRUE, message = FALSE, warning = FALSE} |
406 | 400 |
# Clinical Supplement |
407 |
-query <- GDCquery(project = "TCGA-COAD", |
|
408 |
- data.category = "Clinical", |
|
409 |
- data.type = "Clinical Supplement", |
|
410 |
- legacy = TRUE, |
|
411 |
- barcode = c("TCGA-RU-A8FL","TCGA-AA-3972")) |
|
401 |
+query <- GDCquery( |
|
402 |
+ project = "TCGA-COAD", |
|
403 |
+ data.category = "Clinical", |
|
404 |
+ data.type = "Clinical Supplement", |
|
405 |
+ legacy = TRUE, |
|
406 |
+ barcode = c("TCGA-RU-A8FL","TCGA-AA-3972") |
|
407 |
+) |
|
412 | 408 |
``` |
413 | 409 |
|
414 | 410 |
```{r echo=TRUE, message=FALSE, warning=FALSE} |
... | ... |
@@ -421,11 +417,13 @@ query %>% |
421 | 417 |
## Clinical data (Biotab format) |
422 | 418 |
```{r results = 'hide', echo=TRUE, message=FALSE, warning=FALSE} |
423 | 419 |
# Clinical data |
424 |
-query <- GDCquery(project = "TCGA-COAD", |
|
425 |
- data.category = "Clinical", |
|
426 |
- data.type = "Clinical data", |
|
427 |
- legacy = TRUE, |
|
428 |
- file.type = "txt") |
|
420 |
+query <- GDCquery( |
|
421 |
+ project = "TCGA-COAD", |
|
422 |
+ data.category = "Clinical", |
|
423 |
+ data.type = "Clinical data", |
|
424 |
+ legacy = TRUE, |
|
425 |
+ file.type = "txt" |
|
426 |
+) |
|
429 | 427 |
``` |
430 | 428 |
|
431 | 429 |
```{r echo=TRUE, message=FALSE, warning=FALSE} |
... | ... |
@@ -498,7 +496,8 @@ bar <- c("TCGA-G9-6378-02A-11R-1789-07", "TCGA-CH-5767-04A-11R-1789-07", |
498 | 496 |
"TCGA-B6-A1KN-60A-13R-1789-07", "TCGA-AO-A0J5-01A-11R-1789-07", |
499 | 497 |
"TCGA-AO-A0J5-01A-11R-1789-07", "TCGA-G9-6336-11A-11R-1789-07", |
500 | 498 |
"TCGA-G9-6380-11A-11R-1789-07", "TCGA-G9-6380-01A-11R-1789-07", |
501 |
- "TCGA-G9-6340-01A-11R-1789-07", "TCGA-G9-6340-11A-11R-1789-07") |
|
499 |
+ "TCGA-G9-6340-01A-11R-1789-07", "TCGA-G9-6340-11A-11R-1789-07" |
|
500 |
+) |
|
502 | 501 |
|
503 | 502 |
S <- TCGAquery_SampleTypes(bar,"TP") |
504 | 503 |
S2 <- TCGAquery_SampleTypes(bar,"NB") |