Former-commit-id: 08934f702c8dcff6e4574012757e1573a72cca89 [formerly 953900335b20e98368e9c51b963013b85c7daa19]
Former-commit-id: 618018322e5cf03660ef741bc13c8f2d958c2dd8
... | ... |
@@ -120,7 +120,8 @@ importFrom(dplyr,funs) |
120 | 120 |
importFrom(dplyr,left_join) |
121 | 121 |
importFrom(dplyr,mutate_all) |
122 | 122 |
importFrom(dplyr,pull) |
123 |
-importFrom(dplyr,relocate) |
|
123 |
+importFrom(dplyr,row_number) |
|
124 |
+importFrom(dplyr,slice) |
|
124 | 125 |
importFrom(edgeR,DGEList) |
125 | 126 |
importFrom(edgeR,estimateCommonDisp) |
126 | 127 |
importFrom(edgeR,estimateGLMCommonDisp) |
... | ... |
@@ -183,7 +183,7 @@ getBarcodefromAliquot <- function(aliquot){ |
183 | 183 |
#' (list of barcodes, aliquot ids, etc) |
184 | 184 |
#' @param step How many items to be evaluated per API call |
185 | 185 |
#' @param FUN function that calls the API |
186 |
-splitAPICall <- function(FUN,step = 20,items){ |
|
186 |
+splitAPICall <- function(FUN, step = 20, items){ |
|
187 | 187 |
info <- NULL |
188 | 188 |
info <- tryCatch({ |
189 | 189 |
for(i in 0:(ceiling(length(items)/step) - 1)){ |
... | ... |
@@ -1283,7 +1283,8 @@ getAliquot_ids <- function(barcode){ |
1283 | 1283 |
|
1284 | 1284 |
# getBarcodeInfo(c("TCGA-OR-A5K3-01A","C3N-00321-01")) |
1285 | 1285 |
# barcode is: sample_submitter_id |
1286 |
-#' @importFrom dplyr bind_cols |
|
1286 |
+#' @importFrom dplyr bind_cols slice row_number |
|
1287 |
+#' |
|
1287 | 1288 |
getBarcodeInfo <- function(barcode) { |
1288 | 1289 |
baseURL <- "https://api.gdc.cancer.gov/cases/?" |
1289 | 1290 |
options.pretty <- "pretty=true" |
... | ... |
@@ -1332,7 +1333,7 @@ getBarcodeInfo <- function(barcode) { |
1332 | 1333 |
tryCatch({ |
1333 | 1334 |
samples$submitter_id <- |
1334 | 1335 |
str_extract_all(samples$submitter_id, |
1335 |
- paste(submitter_id, collapse = "|"), |
|
1336 |
+ paste(c(submitter_id,barcode), collapse = "|"), |
|
1336 | 1337 |
simplify = TRUE) %>% as.character |
1337 | 1338 |
}, error = function(e){ |
1338 | 1339 |
samples$submitter_id <- submitter_id |
... | ... |
@@ -1356,7 +1357,22 @@ getBarcodeInfo <- function(barcode) { |
1356 | 1357 |
# this is required since the sample might not have a diagnosis |
1357 | 1358 |
if(!any(df$submitter_id %in% diagnoses$submitter_id)){ |
1358 | 1359 |
diagnoses$submitter_id <- NULL |
1359 |
- df <- dplyr::bind_cols(df,diagnoses) |
|
1360 |
+ # The sample migth have different sample types |
|
1361 |
+ # The diagnosis the same for each one of these samples |
|
1362 |
+ # in that case we will have a 1 to mapping and binding will |
|
1363 |
+ # not work. We need then to replicate diagnosis to each sample |
|
1364 |
+ # and not each patient |
|
1365 |
+ # Cases can be replicated with getBarcodeInfo(c("BA2691R","BA2577R","BA2748R")) |
|
1366 |
+ if(nrow(diagnoses) < nrow(df)){ |
|
1367 |
+ diagnoses <- plyr::ldply( |
|
1368 |
+ 1:length(results$submitter_sample_ids), |
|
1369 |
+ .fun = function(x){ |
|
1370 |
+ diagnoses[x] %>% # replicate diagnoses the number of samples |
|
1371 |
+ as.data.frame() %>% |
|
1372 |
+ dplyr::slice(rep(dplyr::row_number(), sum(results$submitter_sample_ids[[x]] %in% barcode)))}) |
|
1373 |
+ } |
|
1374 |
+ |
|
1375 |
+ df <- dplyr::bind_cols(df %>% as.data.frame,diagnoses %>% as.data.frame) |
|
1360 | 1376 |
} else { |
1361 | 1377 |
df <- left_join(df, diagnoses, by = "submitter_id") |
1362 | 1378 |
} |
... | ... |
@@ -1389,7 +1405,24 @@ getBarcodeInfo <- function(barcode) { |
1389 | 1405 |
|
1390 | 1406 |
if(!any(df$submitter_id %in% demographic$submitter_id)){ |
1391 | 1407 |
demographic$submitter_id <- NULL |
1392 |
- df <- dplyr::bind_cols(df,demographic) |
|
1408 |
+ demographic$updated_datetime <- NULL |
|
1409 |
+ demographic$created_datetime <- NULL |
|
1410 |
+ # The sample migth have different sample types |
|
1411 |
+ # The diagnosis the same for each one of these samples |
|
1412 |
+ # in that case we will have a 1 to mapping and binding will |
|
1413 |
+ # not work. We need then to replicate diagnosis to each sample |
|
1414 |
+ # and not each patient |
|
1415 |
+ # Cases can be replicated with getBarcodeInfo(c("BA2691R","BA2577R","BA2748R")) |
|
1416 |
+ if(nrow(demographic) < nrow(df)){ |
|
1417 |
+ demographic <- plyr::ldply( |
|
1418 |
+ 1:length(results$submitter_sample_ids), |
|
1419 |
+ .fun = function(x){ |
|
1420 |
+ demographic[x,] %>% # replicate diagnoses the number of samples |
|
1421 |
+ as.data.frame() %>% |
|
1422 |
+ dplyr::slice(rep(dplyr::row_number(), sum(results$submitter_sample_ids[[x]] %in% barcode)))}) |
|
1423 |
+ } |
|
1424 |
+ |
|
1425 |
+ df <- dplyr::bind_cols(df %>% as.data.frame,demographic) |
|
1393 | 1426 |
} else { |
1394 | 1427 |
df <- left_join(df,demographic, by = "submitter_id") |
1395 | 1428 |
} |
... | ... |
@@ -1408,6 +1441,16 @@ getBarcodeInfo <- function(barcode) { |
1408 | 1441 |
by = "submitter_id") |
1409 | 1442 |
}) |
1410 | 1443 |
} else { |
1444 |
+ |
|
1445 |
+ if(nrow(projects.info) < nrow(df)){ |
|
1446 |
+ projects.info <- plyr::ldply( |
|
1447 |
+ 1:length(results$submitter_sample_ids), |
|
1448 |
+ .fun = function(x){ |
|
1449 |
+ projects.info[x,] %>% # replicate diagnoses the number of samples |
|
1450 |
+ as.data.frame() %>% |
|
1451 |
+ dplyr::slice(rep(dplyr::row_number(), sum(results$submitter_sample_ids[[x]] %in% barcode)))}) |
|
1452 |
+ } |
|
1453 |
+ |
|
1411 | 1454 |
df <- dplyr::bind_cols(df,projects.info) |
1412 | 1455 |
} |
1413 | 1456 |
|
... | ... |
@@ -163,7 +163,7 @@ |
163 | 163 |
#' @importFrom jsonlite fromJSON |
164 | 164 |
#' @importFrom knitr kable |
165 | 165 |
#' @importFrom httr timeout |
166 |
-#' @importFrom dplyr pull relocate |
|
166 |
+#' @importFrom dplyr pull |
|
167 | 167 |
GDCquery <- function(project, |
168 | 168 |
data.category, |
169 | 169 |
data.type, |
... | ... |
@@ -600,17 +600,18 @@ GDCquery <- function(project, |
600 | 600 |
message("ooo Check if there results for the query") |
601 | 601 |
if(nrow(results) == 0) stop("Sorry, no results were found for this query") |
602 | 602 |
|
603 |
- # Try ordering |
|
603 |
+ # Try ordering (needs dplyr 1.0 - still not published) |
|
604 | 604 |
results <- tryCatch({ |
605 |
- results %>% relocate("project") %>% |
|
606 |
- relocate(contains("type"), .after = project) %>% |
|
607 |
- relocate(contains("category"), .after = project) %>% |
|
608 |
- relocate(contains("experimental_strategy"), .after = project) %>% |
|
609 |
- relocate(contains("submitter_id"), .after = project) %>% |
|
610 |
- relocate(contains("sample_type"), .before = experimental_strategy) %>% |
|
611 |
- relocate(access,.after = last_col()) %>% |
|
612 |
- relocate(starts_with("analysis"), .before = access) %>% |
|
613 |
- relocate(contains("datetime"),.after = last_col()) |
|
605 |
+ results |
|
606 |
+ # results %>% relocate("project") %>% |
|
607 |
+ # relocate(contains("type"), .after = project) %>% |
|
608 |
+ # relocate(contains("category"), .after = project) %>% |
|
609 |
+ # relocate(contains("experimental_strategy"), .after = project) %>% |
|
610 |
+ # relocate(contains("submitter_id"), .after = project) %>% |
|
611 |
+ # relocate(contains("sample_type"), .before = experimental_strategy) %>% |
|
612 |
+ # relocate(access,.after = last_col()) %>% |
|
613 |
+ # relocate(starts_with("analysis"), .before = access) %>% |
|
614 |
+ # relocate(contains("datetime"),.after = last_col()) |
|
614 | 615 |
},error = function(e){ |
615 | 616 |
results |
616 | 617 |
}) |
... | ... |
@@ -145,9 +145,18 @@ test_that("IDAT files is processed", { |
145 | 145 |
# expect_true(ncol(betas) == 1) |
146 | 146 |
}) |
147 | 147 |
|
148 |
-test_that("Prepare Samples without clinical data", { |
|
148 |
+test_that("Prepare samples without clinical data", { |
|
149 | 149 |
# x <- GDCquery_clinic(project = "TCGA-LUAD", type = "clinical") |
150 | 150 |
# x[is.na(x$diagnosis_id),] |
151 | 151 |
x <- colDataPrepare(c("TCGA-80-5608-01A","TCGA-17-Z053-01A","TCGA-78-7158-01A")) |
152 | 152 |
expect_true(nrow(x) == 3) |
153 | 153 |
}) |
154 |
+ |
|
155 |
+test_that("Prepare multiple samples from the same patient", { |
|
156 |
+ # https://portal.gdc.cancer.gov/cases/d7d3de82-802d-4664-8e42-d40408b129b0?bioId=548a300f-a7eb-4dc0-b9bc-5a643ef03d5d |
|
157 |
+ x <- colDataPrepare(c("BA2691R","BA2577R","BA2748R","BA2577D")) |
|
158 |
+ expect_true(nrow(x) == 4) |
|
159 |
+ expect_equal(x["BA2748R","sample_type"],"Primary Blood Derived Cancer - Bone Marrow") |
|
160 |
+ expect_equal(x["BA2577D","sample_type"],"Recurrent Blood Derived Cancer - Bone Marrow") |
|
161 |
+ expect_true("age_at_diagnosis" %in% colnames(x)) |
|
162 |
+}) |