Fix clinical parsing if no follow up information if found
... | ... |
@@ -590,20 +590,26 @@ GDCprepare_clinic <- function( |
590 | 590 |
message("Updating days_to_last_followup and vital_status from follow_up information using last entry") |
591 | 591 |
followup <- parseFollowup(files,xpath,clinical.info) |
592 | 592 |
|
593 |
- followup_last <- followup %>% |
|
594 |
- dplyr::group_by(bcr_patient_barcode) %>% |
|
595 |
- dplyr::summarise( |
|
596 |
- days_to_last_followup = max(as.numeric(days_to_last_followup),na.rm = TRUE), |
|
597 |
- vital_status = vital_status[ |
|
598 |
- ifelse( |
|
599 |
- any(followup$days_to_last_followup %in% ""), |
|
600 |
- which(followup$days_to_last_followup %in% ""), |
|
601 |
- which.max(days_to_last_followup) |
|
602 |
- ) |
|
603 |
- ] |
|
604 |
- ) |
|
605 |
- clin$days_to_last_followup <- followup_last$days_to_last_followup[match(clin$bcr_patient_barcode,followup_last$bcr_patient_barcode)] |
|
606 |
- clin$vital_status <- followup_last$vital_status[match(clin$bcr_patient_barcode,followup_last$bcr_patient_barcode)] |
|
593 |
+ if(nrow(followup) > 0){ |
|
594 |
+ followup_last <- followup %>% |
|
595 |
+ dplyr::group_by(bcr_patient_barcode) %>% |
|
596 |
+ dplyr::summarise( |
|
597 |
+ days_to_last_followup = ifelse( |
|
598 |
+ all(is.na(as.numeric(days_to_last_followup))), |
|
599 |
+ NA, |
|
600 |
+ max(as.numeric(days_to_last_followup),na.rm = TRUE) |
|
601 |
+ ), |
|
602 |
+ vital_status = vital_status[ |
|
603 |
+ ifelse( |
|
604 |
+ any(followup$days_to_last_followup %in% ""), |
|
605 |
+ which(followup$days_to_last_followup %in% ""), |
|
606 |
+ which.max(days_to_last_followup) |
|
607 |
+ ) |
|
608 |
+ ] |
|
609 |
+ ) |
|
610 |
+ clin$days_to_last_followup <- followup_last$days_to_last_followup[match(clin$bcr_patient_barcode,followup_last$bcr_patient_barcode)] |
|
611 |
+ clin$vital_status <- followup_last$vital_status[match(clin$bcr_patient_barcode,followup_last$bcr_patient_barcode)] |
|
612 |
+ } |
|
607 | 613 |
} |
608 | 614 |
|
609 | 615 |
if (tolower(clinical.info) == "sample") { |
... | ... |
@@ -64,3 +64,25 @@ test_that("TCGAquery_subtype returns the a data frame if exists data", { |
64 | 64 |
}) |
65 | 65 |
|
66 | 66 |
|
67 |
+test_that("GDCprepare_clinic works if no follow up data is available", { |
|
68 |
+ |
|
69 |
+ skip_on_bioc() |
|
70 |
+ skip_if_offline() |
|
71 |
+ query <- GDCquery( |
|
72 |
+ project = "TCGA-LAML", |
|
73 |
+ data.category = "Clinical", |
|
74 |
+ data.format = "bcr xml" |
|
75 |
+ ) |
|
76 |
+ |
|
77 |
+ GDCdownload( |
|
78 |
+ query = query, |
|
79 |
+ directory = "." |
|
80 |
+ ) |
|
81 |
+ |
|
82 |
+ clinical <- GDCprepare_clinic( |
|
83 |
+ query = query, |
|
84 |
+ directory = ".", |
|
85 |
+ clinical.info = "patient" |
|
86 |
+ ) |
|
87 |
+ |
|
88 |
+}) |
... | ... |
@@ -152,20 +152,3 @@ test_that("getNbFiles and getNbCases works", { |
152 | 152 |
cases <- getNbCases("TCGA-LUAD","Raw microarray data") |
153 | 153 |
expect_true(cases < files) |
154 | 154 |
}) |
155 |
- |
|
156 |
-test_that("getNbFiles and getNbCases works", { |
|
157 |
- skip_on_bioc() |
|
158 |
- skip_if_offline() |
|
159 |
- |
|
160 |
- # This test was added for further study of the TARGET-AML data |
|
161 |
- # There are multiple files for the same patient and the query |
|
162 |
- # gives a warning although the cases are different |
|
163 |
- # should we change to verification and warning output ? |
|
164 |
- query_target <- GDCquery( |
|
165 |
- project = "TARGET-AML", |
|
166 |
- data.category = "Transcriptome Profiling", |
|
167 |
- data.type = "Gene Expression Quantification", |
|
168 |
- workflow.type = "STAR - Counts", |
|
169 |
- barcode = c("TARGET-20-PANLXK","TARGET-20-PATIAK") |
|
170 |
- ) |
|
171 |
-}) |