... | ... |
@@ -186,9 +186,9 @@ cbioportal2clinicaldf <- function(files) { |
186 | 186 |
#' @description **Note** that these functions should be used when a particular |
187 | 187 |
#' study is _not_ currently available as a `MultiAssayExperiment` |
188 | 188 |
#' representation. Otherwise, use `cBioDataPack`. Provide a `cancer_study_id` |
189 |
-#' from the `studiesTable` and retrieve the study tarball from cBioPortal. |
|
190 |
-#' These functions are used by `cBioDataPack` under the hood to download, |
|
191 |
-#' untar, and load the tarball datasets with caching. As stated in |
|
189 |
+#' from the `studiesTable` and retrieve the study tarball from the cBio |
|
190 |
+#' Genomics Portal. These functions are used by `cBioDataPack` under the hood |
|
191 |
+#' to download,untar, and load the tarball datasets with caching. As stated in |
|
192 | 192 |
#' `?cBioDataPack`, not all studies are currently working as |
193 | 193 |
#' `MultiAssayExperiment` objects. As of July 2020, about ~80% of |
194 | 194 |
#' datasets can be successfully imported into the `MultiAssayExperiment` data |
... | ... |
@@ -196,6 +196,13 @@ cbioportal2clinicaldf <- function(files) { |
196 | 196 |
#' study. You may also check `studiesTable$pack_build` for a more current |
197 | 197 |
#' status. |
198 | 198 |
#' |
199 |
+#' @details When attempting to load a dataset using `loadStudy`, note that |
|
200 |
+#' the `cleanup` argument is set to `TRUE` by default. Change the argument |
|
201 |
+#' to `FALSE` if you would like to keep the untarred data in the `exdir` |
|
202 |
+#' location. `downloadStudy` and `untarStudy` are not affected by this change. |
|
203 |
+#' The tarball of the downloaded data is cached via `BiocFileCache` when |
|
204 |
+#' `use_cache` is `TRUE`. |
|
205 |
+#' |
|
199 | 206 |
#' @param cancer_study_id character(1) The study identifier from cBioPortal as |
200 | 207 |
#' in \url{https://cbioportal.org/webAPI} |
201 | 208 |
#' |
... | ... |
@@ -203,16 +210,16 @@ cbioportal2clinicaldf <- function(files) { |
203 | 210 |
#' and use it to track downloaded data. If data found in the cache, data will |
204 | 211 |
#' not be re-downloaded. A path can also be provided to data cache location. |
205 | 212 |
#' |
206 |
-#' @param force logical(1) (default FALSE) whether to force re-download data from |
|
207 |
-#' remote location |
|
213 |
+#' @param force logical(1) (default FALSE) whether to force re-download data |
|
214 |
+#' from remote location |
|
208 | 215 |
#' |
209 | 216 |
#' @param url_location character(1) |
210 | 217 |
#' (default "https://cbioportal-datahub.s3.amazonaws.com") the URL location for |
211 | 218 |
#' downloading packaged data. Can be set using the 'cBio_URL' option (see |
212 | 219 |
#' `?cBioDataPack` for more details) |
213 | 220 |
#' |
214 |
-#' @param names.field A character vector of possible column names for the column |
|
215 |
-#' that is used to label ranges from a mutations or copy number file. |
|
221 |
+#' @param names.field A character vector of possible column names for the |
|
222 |
+#' column that is used to label ranges from a mutations or copy number file. |
|
216 | 223 |
#' |
217 | 224 |
#' @param cancer_study_file character(1) indicates the on-disk location |
218 | 225 |
#' of the downloaded tarball |
... | ... |
@@ -223,6 +230,9 @@ cbioportal2clinicaldf <- function(files) { |
223 | 230 |
#' @param filepath character(1) indicates the folder location where |
224 | 231 |
#' the contents of the tarball are *located* (usually the same as `exdir`) |
225 | 232 |
#' |
233 |
+#' @param cleanup logical(1) whether to delete the `untar`-red contents from |
|
234 |
+#' the `exdir` folder (default TRUE) |
|
235 |
+#' |
|
226 | 236 |
#' @return \itemize{ |
227 | 237 |
#' \item {downloadStudy - The file location of the data tarball} |
228 | 238 |
#' \item {untarStudy - The directory location of the contents} |
... | ... |
@@ -294,11 +304,14 @@ untarStudy <- function(cancer_study_file, exdir = tempdir()) { |
294 | 304 |
#' @rdname downloadStudy |
295 | 305 |
#' |
296 | 306 |
#' @export |
297 |
-loadStudy <- |
|
298 |
- function( |
|
299 |
- filepath, names.field = c("Hugo_Symbol", "Entrez_Gene_Id", "Gene") |
|
300 |
- ) |
|
301 |
-{ |
|
307 |
+loadStudy <- function( |
|
308 |
+ filepath, |
|
309 |
+ names.field = c("Hugo_Symbol", "Entrez_Gene_Id", "Gene"), |
|
310 |
+ cleanup = TRUE |
|
311 |
+) { |
|
312 |
+ if (cleanup) |
|
313 |
+ on.exit(unlink(filepath, recursive = TRUE)) |
|
314 |
+ |
|
302 | 315 |
datafiles <- getRelevantFilesFromStudy( |
303 | 316 |
list.files(filepath, recursive = TRUE) |
304 | 317 |
) |
... | ... |
@@ -454,8 +467,9 @@ loadStudy <- |
454 | 467 |
#' |
455 | 468 |
#' @export |
456 | 469 |
cBioDataPack <- function(cancer_study_id, use_cache = TRUE, |
457 |
- names.field = c("Hugo_Symbol", "Entrez_Gene_Id", "Gene"), ask = TRUE) { |
|
458 |
- |
|
470 |
+ names.field = c("Hugo_Symbol", "Entrez_Gene_Id", "Gene"), |
|
471 |
+ cleanup = TRUE, ask = TRUE) |
|
472 |
+{ |
|
459 | 473 |
denv <- new.env(parent = emptyenv()) |
460 | 474 |
data("studiesTable", package = "cBioPortalData", envir = denv) |
461 | 475 |
studiesTable <- denv[["studiesTable"]] |
... | ... |
@@ -481,6 +495,6 @@ cBioDataPack <- function(cancer_study_id, use_cache = TRUE, |
481 | 495 |
|
482 | 496 |
cancer_study_file <- downloadStudy(cancer_study_id, use_cache) |
483 | 497 |
exdir <- untarStudy(cancer_study_file) |
484 |
- loadStudy(exdir, names.field) |
|
498 |
+ loadStudy(exdir, names.field, cleanup) |
|
485 | 499 |
} |
486 | 500 |
|
... | ... |
@@ -9,6 +9,7 @@ cBioDataPack( |
9 | 9 |
cancer_study_id, |
10 | 10 |
use_cache = TRUE, |
11 | 11 |
names.field = c("Hugo_Symbol", "Entrez_Gene_Id", "Gene"), |
12 |
+ cleanup = TRUE, |
|
12 | 13 |
ask = TRUE |
13 | 14 |
) |
14 | 15 |
} |
... | ... |
@@ -23,6 +24,9 @@ not be re-downloaded. A path can also be provided to data cache location.} |
23 | 24 |
\item{names.field}{A character vector of possible column names for the column |
24 | 25 |
that is used to label ranges from a mutations or copy number file.} |
25 | 26 |
|
27 |
+\item{cleanup}{logical(1) whether to delete the \code{untar}-red contents from |
|
28 |
+the \code{exdir} folder (default TRUE)} |
|
29 |
+ |
|
26 | 30 |
\item{ask}{A logical vector of length one indicating whether to prompt the |
27 | 31 |
the user before downloading and loading study \code{MultiAssayExperiment}. If |
28 | 32 |
TRUE, the user will be prompted to continue for studies that are not |
... | ... |
@@ -15,7 +15,11 @@ downloadStudy( |
15 | 15 |
|
16 | 16 |
untarStudy(cancer_study_file, exdir = tempdir()) |
17 | 17 |
|
18 |
-loadStudy(filepath, names.field = c("Hugo_Symbol", "Entrez_Gene_Id", "Gene")) |
|
18 |
+loadStudy( |
|
19 |
+ filepath, |
|
20 |
+ names.field = c("Hugo_Symbol", "Entrez_Gene_Id", "Gene"), |
|
21 |
+ cleanup = TRUE |
|
22 |
+) |
|
19 | 23 |
} |
20 | 24 |
\arguments{ |
21 | 25 |
\item{cancer_study_id}{character(1) The study identifier from cBioPortal as |
... | ... |
@@ -25,8 +29,8 @@ in \url{https://cbioportal.org/webAPI}} |
25 | 29 |
and use it to track downloaded data. If data found in the cache, data will |
26 | 30 |
not be re-downloaded. A path can also be provided to data cache location.} |
27 | 31 |
|
28 |
-\item{force}{logical(1) (default FALSE) whether to force re-download data from |
|
29 |
-remote location} |
|
32 |
+\item{force}{logical(1) (default FALSE) whether to force re-download data |
|
33 |
+from remote location} |
|
30 | 34 |
|
31 | 35 |
\item{url_location}{character(1) |
32 | 36 |
(default "https://cbioportal-datahub.s3.amazonaws.com") the URL location for |
... | ... |
@@ -42,8 +46,11 @@ the contents of the tarball (default \code{tempdir()}; see also \code{?untar})} |
42 | 46 |
\item{filepath}{character(1) indicates the folder location where |
43 | 47 |
the contents of the tarball are \emph{located} (usually the same as \code{exdir})} |
44 | 48 |
|
45 |
-\item{names.field}{A character vector of possible column names for the column |
|
46 |
-that is used to label ranges from a mutations or copy number file.} |
|
49 |
+\item{names.field}{A character vector of possible column names for the |
|
50 |
+column that is used to label ranges from a mutations or copy number file.} |
|
51 |
+ |
|
52 |
+\item{cleanup}{logical(1) whether to delete the \code{untar}-red contents from |
|
53 |
+the \code{exdir} folder (default TRUE)} |
|
47 | 54 |
} |
48 | 55 |
\value{ |
49 | 56 |
\itemize{ |
... | ... |
@@ -56,9 +63,9 @@ that is used to label ranges from a mutations or copy number file.} |
56 | 63 |
\strong{Note} that these functions should be used when a particular |
57 | 64 |
study is \emph{not} currently available as a \code{MultiAssayExperiment} |
58 | 65 |
representation. Otherwise, use \code{cBioDataPack}. Provide a \code{cancer_study_id} |
59 |
-from the \code{studiesTable} and retrieve the study tarball from cBioPortal. |
|
60 |
-These functions are used by \code{cBioDataPack} under the hood to download, |
|
61 |
-untar, and load the tarball datasets with caching. As stated in |
|
66 |
+from the \code{studiesTable} and retrieve the study tarball from the cBio |
|
67 |
+Genomics Portal. These functions are used by \code{cBioDataPack} under the hood |
|
68 |
+to download,untar, and load the tarball datasets with caching. As stated in |
|
62 | 69 |
\code{?cBioDataPack}, not all studies are currently working as |
63 | 70 |
\code{MultiAssayExperiment} objects. As of July 2020, about ~80\% of |
64 | 71 |
datasets can be successfully imported into the \code{MultiAssayExperiment} data |
... | ... |
@@ -66,6 +73,14 @@ class. Please open an issue if you would like the team to prioritize a |
66 | 73 |
study. You may also check \code{studiesTable$pack_build} for a more current |
67 | 74 |
status. |
68 | 75 |
} |
76 |
+\details{ |
|
77 |
+When attempting to load a dataset using \code{loadStudy}, note that |
|
78 |
+the \code{cleanup} argument is set to \code{TRUE} by default. Change the argument |
|
79 |
+to \code{FALSE} if you would like to keep the untarred data in the \code{exdir} |
|
80 |
+location. \code{downloadStudy} and \code{untarStudy} are not affected by this change. |
|
81 |
+The tarball of the downloaded data is cached via \code{BiocFileCache} when |
|
82 |
+\code{use_cache} is \code{TRUE}. |
|
83 |
+} |
|
69 | 84 |
\examples{ |
70 | 85 |
|
71 | 86 |
(acc_file <- downloadStudy("acc_tcga")) |