1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,34 @@ |
1 |
+library(cBioPortalData) |
|
2 |
+library(dplyr) |
|
3 |
+ |
|
4 |
+cbio <- cBioPortal() |
|
5 |
+studies <- getStudies(cbio) |
|
6 |
+ |
|
7 |
+#get studyIds |
|
8 |
+study_names <- |
|
9 |
+ data.frame(cbind(gsub(" \\(TCGA, PanCancer Atlas\\)", "",studies$name[grepl("tcga_pan_can_atlas", studies$studyId)]), |
|
10 |
+ studies$studyId[grepl("tcga_pan_can_atlas", studies$studyId)])) |
|
11 |
+colnames(study_names) <- c("Cancer", "studyId") |
|
12 |
+ |
|
13 |
+#loop through studyIds to get all_tcga2018_data |
|
14 |
+all_tcga2018_data <- data.frame() |
|
15 |
+for(study in study_names$studyId){ |
|
16 |
+ cbio_table <- getDataByGenePanel(cbio, study, genePanelId = "IMPACT468", |
|
17 |
+ molecularProfileId = paste0(study, "_gistic"), |
|
18 |
+ sampleListId = paste0(study, "_cna")) |
|
19 |
+ cbio_dat <- data.frame(cbio_table[[1]], stringsAsFactors = FALSE) |
|
20 |
+ cbio_summ <- cbio_dat %>% group_by(hugoGeneSymbol) %>% |
|
21 |
+ summarise(Gain = sum(value ==1)/n(), |
|
22 |
+ Amplification = sum(value == 2)/n(), |
|
23 |
+ ShallowDeletion = sum(value == -1)/n(), |
|
24 |
+ DeepDeletion = sum(value == -2)/n()) |
|
25 |
+ cbio_summ$sample_size <- rep(paste0(" (N = ", length(unique(cbio_dat$uniquePatientKey)), ")"), nrow(cbio_summ)) |
|
26 |
+ cbio_summ$studyId <- rep(study, nrow(cbio_summ)) |
|
27 |
+ all_tcga2018_data <- rbind(all_tcga2018_data, cbio_summ) |
|
28 |
+} |
|
29 |
+ |
|
30 |
+#add sample sizes to get cbio_studies |
|
31 |
+ss <- all_tcga2018_data %>% group_by(studyId, sample_size) %>% summarise() |
|
32 |
+cbio_studies <- inner_join(study_names, ss, by = c("studyId")) |
|
33 |
+cbio_studies$Cancer <- paste0(cbio_studies$Cancer, cbio_studies$sample_size) |
|
34 |
+cbio_studies <- dplyr::select(cbio_studies, Cancer, studyId) |