Browse code

Merge pull request #583 from BioinformaticsFMRP/devel

Fix clinical parsing if no follow up information if found

Tiago Chedraoui Silva authored on 24/05/2023 02:38:24 • GitHub committed on 24/05/2023 02:38:24
Showing 3 changed files

... ...
@@ -590,20 +590,26 @@ GDCprepare_clinic <- function(
590 590
         message("Updating days_to_last_followup and vital_status from follow_up information using last entry")
591 591
         followup <- parseFollowup(files,xpath,clinical.info)
592 592
 
593
-        followup_last <- followup %>%
594
-            dplyr::group_by(bcr_patient_barcode) %>%
595
-            dplyr::summarise(
596
-                days_to_last_followup = max(as.numeric(days_to_last_followup),na.rm = TRUE),
597
-                vital_status = vital_status[
598
-                    ifelse(
599
-                        any(followup$days_to_last_followup %in% ""),
600
-                        which(followup$days_to_last_followup %in% ""),
601
-                        which.max(days_to_last_followup)
602
-                    )
603
-                ]
604
-            )
605
-        clin$days_to_last_followup <- followup_last$days_to_last_followup[match(clin$bcr_patient_barcode,followup_last$bcr_patient_barcode)]
606
-        clin$vital_status <- followup_last$vital_status[match(clin$bcr_patient_barcode,followup_last$bcr_patient_barcode)]
593
+        if(nrow(followup) > 0){
594
+            followup_last <- followup %>%
595
+                dplyr::group_by(bcr_patient_barcode) %>%
596
+                dplyr::summarise(
597
+                    days_to_last_followup = ifelse(
598
+                        all(is.na(as.numeric(days_to_last_followup))),
599
+                        NA,
600
+                        max(as.numeric(days_to_last_followup),na.rm = TRUE)
601
+                    ),
602
+                    vital_status = vital_status[
603
+                        ifelse(
604
+                            any(followup$days_to_last_followup %in% ""),
605
+                            which(followup$days_to_last_followup %in% ""),
606
+                            which.max(days_to_last_followup)
607
+                        )
608
+                    ]
609
+                )
610
+            clin$days_to_last_followup <- followup_last$days_to_last_followup[match(clin$bcr_patient_barcode,followup_last$bcr_patient_barcode)]
611
+            clin$vital_status <- followup_last$vital_status[match(clin$bcr_patient_barcode,followup_last$bcr_patient_barcode)]
612
+        }
607 613
     }
608 614
 
609 615
     if (tolower(clinical.info) == "sample") {
... ...
@@ -64,3 +64,25 @@ test_that("TCGAquery_subtype returns the a data frame if exists data", {
64 64
 })
65 65
 
66 66
 
67
+test_that("GDCprepare_clinic works if no follow up data is available", {
68
+
69
+    skip_on_bioc()
70
+    skip_if_offline()
71
+    query <- GDCquery(
72
+        project = "TCGA-LAML",
73
+        data.category = "Clinical",
74
+        data.format = "bcr xml"
75
+    )
76
+
77
+    GDCdownload(
78
+        query = query,
79
+        directory = "."
80
+    )
81
+
82
+    clinical <- GDCprepare_clinic(
83
+        query = query,
84
+        directory = ".",
85
+        clinical.info = "patient"
86
+    )
87
+
88
+})
... ...
@@ -152,20 +152,3 @@ test_that("getNbFiles and getNbCases works", {
152 152
     cases <- getNbCases("TCGA-LUAD","Raw microarray data")
153 153
     expect_true(cases < files)
154 154
 })
155
-
156
-test_that("getNbFiles and getNbCases works", {
157
-    skip_on_bioc()
158
-    skip_if_offline()
159
-
160
-    # This test was added for further study  of the TARGET-AML data
161
-    # There are multiple files for the same patient and the query
162
-    # gives a warning although the cases are different
163
-    # should we change to verification and warning output ?
164
-    query_target <- GDCquery(
165
-        project = "TARGET-AML",
166
-        data.category = "Transcriptome Profiling",
167
-        data.type = "Gene Expression Quantification",
168
-        workflow.type = "STAR - Counts",
169
-        barcode = c("TARGET-20-PANLXK","TARGET-20-PATIAK")
170
-    )
171
-})