Browse code

Add days to last follow up from the new GDC API model #639

Tiago Chedraoui Silva authored on 07/04/2025 15:24:11
Showing 2 changed files

... ...
@@ -1,7 +1,7 @@
1 1
 Package: TCGAbiolinks
2 2
 Type: Package
3 3
 Title: TCGAbiolinks: An R/Bioconductor package for integrative analysis with GDC data
4
-Version: 2.35.3
4
+Version: 2.35.4
5 5
 Date: 2024-01-01
6 6
 Author: Antonio Colaprico,
7 7
     Tiago Chedraoui Silva,
... ...
@@ -229,7 +229,7 @@ GDCquery_clinic <- function(
229 229
     options.pretty <- "pretty=true"
230 230
 
231 231
     if (grepl("clinical",type,ignore.case = TRUE)) {
232
-        options.expand <- "expand=diagnoses,diagnoses.treatments,annotations,family_histories,demographic,exposures"
232
+        options.expand <- "expand=diagnoses,follow_ups,diagnoses.treatments,annotations,family_histories,demographic,exposures"
233 233
         option.size <- paste0("size=",getNbCases(project,"Clinical"))
234 234
         files.data_category <- "Clinical"
235 235
     } else {
... ...
@@ -266,7 +266,7 @@ GDCquery_clinic <- function(
266 266
 
267 267
     #message(paste0(baseURL,paste(options.pretty,options.expand, option.size, options.filter, sep = "&")))
268 268
     results <- json$data$hits
269
-
269
+    saveRDS(results,"tcgabiolinks_debug.rda")
270 270
     if (grepl("clinical",type,ignore.case = TRUE)) {
271 271
         if (grepl("TCGA",project)) {
272 272
             df <- data.frame("submitter_id" = results$submitter_id)
... ...
@@ -276,6 +276,7 @@ GDCquery_clinic <- function(
276 276
 
277 277
                 # we are getting more results than what we should
278 278
                 diagnoses <- diagnoses[diagnoses$submitter_id %in% df$submitter_id,]
279
+                diagnoses$days_to_last_follow_up <- NULL
279 280
                 df <- merge(df,diagnoses, by="submitter_id", all = TRUE, sort = FALSE)
280 281
             }
281 282
 
... ...
@@ -300,6 +301,23 @@ GDCquery_clinic <- function(
300 301
                 )
301 302
             }
302 303
 
304
+            if ("follow_ups" %in% colnames(results)){
305
+                follow_ups <- rbindlist(lapply(results$follow_ups, function(x) if(is.null(x)) data.frame(NA) else x),fill = T)
306
+                follow_ups$submitter_id <- gsub("_follow_up*","", follow_ups$submitter_id)
307
+
308
+                # we are getting more results than what we should
309
+                follow_ups <- follow_ups[follow_ups$submitter_id %in% df$submitter_id,]
310
+
311
+                # Get the max value of days to follow up
312
+                follow_ups_last <- follow_ups %>%
313
+                    dplyr::group_by(submitter_id) %>%
314
+                    dplyr::summarise(
315
+                        days_to_last_follow_up = ifelse(any(!is.na(days_to_follow_up)),max(days_to_follow_up,na.rm = TRUE),NA)
316
+                    )
317
+
318
+                df <- dplyr::full_join(df,follow_ups_last, by = "submitter_id")
319
+            }
320
+
303 321
             if( "treatments" %in% colnames(df)) {
304 322
 
305 323
                 treatments <- purrr::map_dfr(