Browse code

accepted granges objects as input data

Rebecca Greenblatt authored on 15/04/2021 15:44:35
Showing 1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,34 @@
1
+library(cBioPortalData)
2
+library(dplyr)
3
+
4
+cbio <- cBioPortal()
5
+studies <- getStudies(cbio)
6
+
7
+#get studyIds
8
+study_names <-
9
+  data.frame(cbind(gsub(" \\(TCGA, PanCancer Atlas\\)", "",studies$name[grepl("tcga_pan_can_atlas", studies$studyId)]),
10
+                   studies$studyId[grepl("tcga_pan_can_atlas", studies$studyId)]))
11
+colnames(study_names) <- c("Cancer", "studyId")
12
+
13
+#loop through studyIds to get all_tcga2018_data
14
+all_tcga2018_data <- data.frame()
15
+for(study in study_names$studyId){
16
+  cbio_table <- getDataByGenePanel(cbio, study, genePanelId = "IMPACT468",
17
+                          molecularProfileId = paste0(study, "_gistic"),
18
+                     sampleListId = paste0(study, "_cna"))
19
+  cbio_dat <- data.frame(cbio_table[[1]], stringsAsFactors = FALSE)
20
+  cbio_summ <- cbio_dat %>% group_by(hugoGeneSymbol) %>%
21
+    summarise(Gain = sum(value ==1)/n(),
22
+              Amplification = sum(value == 2)/n(),
23
+              ShallowDeletion = sum(value == -1)/n(),
24
+              DeepDeletion = sum(value == -2)/n())
25
+  cbio_summ$sample_size <- rep(paste0(" (N = ", length(unique(cbio_dat$uniquePatientKey)), ")"), nrow(cbio_summ))
26
+  cbio_summ$studyId <- rep(study, nrow(cbio_summ))
27
+  all_tcga2018_data <- rbind(all_tcga2018_data, cbio_summ)
28
+}
29
+
30
+#add sample sizes to get cbio_studies
31
+ss <- all_tcga2018_data %>% group_by(studyId, sample_size) %>% summarise()
32
+cbio_studies <- inner_join(study_names, ss, by = c("studyId"))
33
+cbio_studies$Cancer <- paste0(cbio_studies$Cancer, cbio_studies$sample_size)
34
+cbio_studies <- dplyr::select(cbio_studies, Cancer, studyId)