utils::globalVariables(c("clinicalAttributeId", "value", "sampleId")) #' @name cBioPortal-class #' #' @title A class for representing the cBioPortal API protocol #' #' @description The `cBioPortal` class is a representation of the cBioPortal #' API protocol that directly inherits from the `Service` class in the #' `AnVIL` package. For more information, see the 'AnVIL' package. #' #' @details This class takes the static API as provided at #' \url{https://www.cbioportal.org/api/api-docs} and creates an R object #' with the help from underlying infrastructure (i.e., 'rapiclient' and #' 'AnVIL') to give the user a unified representation of the API #' specification provided by the cBioPortal group. Users are not #' expected to interact with this class other than to use it as input #' to the functionality provided by the rest of the package. #' #' @importFrom methods new #' #' @seealso \link{cBioPortal}, \link[AnVIL]{Service} #' #' @examples #' #' cBioPortal() #' #' @export .cBioPortal <- setClass("cBioPortal", contains = "Service") #' @rdname cBioPortal #' #' @aliases cBioPortal #' #' @title The R interface to the cBioPortal API Data Service #' #' @description This function gives users access to the cBioPortal data with an #' API representation invoked from `cBioPortal()` #' #' @param api An API object of class `cBioPortal` from the `cBioPortal` #' function #' #' @param studyId character(1) Indicates the "studyId" as taken from #' `getStudies` #' #' @param keyword character(1) Keyword or pattern for searching through #' available operations #' #' @param molecularProfileId character(1) Indicates a molecular profile ID #' #' @param molecularProfileIds character() A vector of molecular profile IDs #' #' @param entrezGeneIds numeric() A vector indicating entrez gene IDs #' #' @param sampleIds character() Sample identifiers #' #' @param genePanelId character(1) Identifies the gene panel, as obtained #' from the `genePanels` function #' #' @return #' #' cBioPortal: An API object of class 'cBioPortal' #' #' cBioPortalData: A data object of class 'MultiAssayExperiment' #' #' @importFrom AnVIL Service #' #' @examples #' cbio <- cBioPortal() #' #' getStudies(api = cbio) #' #' searchOps(api = cbio, keyword = "molecular") #' #' clinicalData(api = cbio, studyId = "acc_tcga") #' #' molecularProfiles(api = cbio, studyId = "acc_tcga") #' #' molecularData( #' api = cbio, #' molecularProfileId = "acc_tcga_rna_seq_v2_mrna", #' entrezGeneIds = c(1, 2), #' sampleIds = c("TCGA-OR-A5J1-01", "TCGA-OR-A5J2-01") #' ) #' #' sampleLists(api = cbio, studyId = "acc_tcga") #' #' samplesInSampleLists( #' api = cbio, #' sampleListIds = c("acc_tcga_rppa", "acc_tcga_cnaseq") #' ) #' #' genePanels(api = cbio) #' #' getGenePanel(api = cbio, genePanelId = "IMPACT341") #' #' @export cBioPortal <- function() { .cBioPortal( Service( service = "cBioPortal", host = "www.cbioportal.org", config = httr::config(ssl_verifypeer = 0L, ssl_verifyhost = 0L, http_version = 0L), authenticate = FALSE, api_url = "https://www.cbioportal.org/api/api-docs", package = "cBioPortalData", schemes = "http" ) ) } #' @name cBioPortal #' #' @section API Metadata: #' * getStudies - Obtain a table of studies and associated metadata #' #' @export getStudies <- function(api) { if (missing(api)) stop("Provide a valid 'api' from 'cBioPortal()'") digi <- .inputDigest(match.call(), "getStudies") cacheloc <- .getHashCache(digi) if (file.exists(cacheloc)) { load(cacheloc) } else { query <- .invoke_fun(api, "getAllStudiesUsingGET") studies <- httr::content(query) studies <- lapply(studies, function(x) { if (is.null(x[["pmid"]])) x[["pmid"]] <- NA_character_ if (is.null(x[["citation"]])) x[["citation"]] <- NA_character_ x }) save(studies, file = cacheloc) } dplyr::bind_rows(studies) } #' @name cBioPortal #' #' @section Patient Data: #' * clinicalData - Obtain clinical data for a particular study identifier #' ('studyId') #' #' @export clinicalData <- function(api, studyId = NA_character_) { if (missing(api)) stop("Provide a valid 'api' from 'cBioPortal()'") validStudy <- .checkIdValidity(api, element = studyId, ename = "studyId") if (!validStudy) stop("Provide a valid 'studyId' from 'getStudies()'") digi <- .inputDigest(match.call(), "clinicalData") cacheloc <- .getHashCache(digi) if (file.exists(cacheloc)) { load(cacheloc) } else { pttable <- .invoke_bind(api, "getAllPatientsInStudyUsingGET", studyId = studyId) ptrow <- lapply(pttable[["patientId"]], function(pt) { .invoke_bind(api, "getAllClinicalDataOfPatientInStudyUsingGET", studyId = studyId, patientId = pt) }) save(ptrow, file = cacheloc) } clin <- dplyr::bind_rows(ptrow) tidyr::spread(clin, clinicalAttributeId, value) } #' @name cBioPortal #' #' @section Molecular Profiles: #' * molecularProfiles - Produce a molecular profiles dataset for a given #' study identifier ('studyId') #' #' @param projection character(default: "SUMMARY") Specify the projection #' type for data retrieval for details see API documentation #' #' @export molecularProfiles <- function(api, studyId = NA_character_, projection = c("SUMMARY", "ID", "DETAILED", "META")) { if (missing(api)) stop("Provide a valid 'api' from 'cBioPortal()'") validStudy <- .checkIdValidity(api, element = studyId, ename = "studyId") if (!validStudy) stop("Provide a valid 'studyId' from 'getStudies()'") projection <- match.arg(projection) mols <- .invoke_fun(api, "getAllMolecularProfilesInStudyUsingGET", use_cache = TRUE, studyId = studyId, projection = projection) cmols <- httr::content(mols) if (projection %in% c("SUMMARY", "ID")) dplyr::bind_rows(cmols) else cmols } #' @name cBioPortal #' #' @section Molecular Profiles: #' * molecularData - Produce a dataset of molecular profile data based on #' `molecularProfileId`, `entrezGeneIds`, and `sampleIds` #' #' @export molecularData <- function(api, molecularProfileId = NA_character_, entrezGeneIds = NULL, sampleIds = NULL, check = TRUE) { if (missing(api)) stop("Provide a valid 'api' from 'cBioPortal()'") validMolProf <- .checkIdValidity(api, element = molecularProfileId, ename = "molecularProfileId", check = check) if (!validMolProf) stop("Provide a valid 'molecularProfileId' from 'molecularProfiles()'") if (is.null(entrezGeneIds)) stop("Provide a character vector of 'entrezGeneIds'") if (is.null(sampleIds)) stop("Provide a character vector of 'sampleIds'") mutation <- grepl("mutation", molecularProfileId) endpoint <- if (mutation) "fetchMutationsInMolecularProfileUsingPOST" else "fetchAllMolecularDataInMolecularProfileUsingPOST" byGene <- .invoke_bind(api, endpoint, use_cache = TRUE, molecularProfileId = molecularProfileId, entrezGeneIds = sort(entrezGeneIds), sampleIds = sort(sampleIds) ) if ("message" %in% names(byGene) || !length(byGene)) { msg <- byGene[["message"]] if (length(msg)) warning(msg) else warning("No data found for molecularProfileId: ", molecularProfileId) dplyr::tibble() } else { byGene } } #' @name cBioPortal #' #' @section API Metadata: #' * searchOps - Search through API operations with a keyword #' #' @export searchOps <- function(api, keyword) { grep(keyword, names(AnVIL::operations(api)), value = TRUE, ignore.case = TRUE) } #' @name cBioPortal #' #' @section API Metadata: #' * geneTable - Get a table of all genes by 'entrezGeneId' or #' 'hugoGeneSymbol' #' #' @param ... Additional arguments to lower level API functions #' #' @export geneTable <- function(api, ...) { if (missing(api)) stop("Provide a valid 'api' from 'cBioPortal()'") gres <- .invoke_fun(api, "getAllGenesUsingGET", TRUE, ...) glist <- httr::content(gres) glix <- lapply(glist, function(x) { if (is.null(x[["cytoband"]])) x[["cytoband"]] <- NA_character_ if (is.null(x[["length"]])) x[["length"]] <- NA_integer_ if (is.null(x[["chromosome"]])) x[["chromosome"]] <- NA_character_ x }) dplyr::bind_rows(glix) } #' @name cBioPortal #' #' @section Sample Data: #' * samplesInSampleLists - get all samples associated with a 'sampleListId' #' #' @param sampleListIds character() A vector of 'sampleListId' as obtained from #' `sampleLists` #' #' @export samplesInSampleLists <- function(api, sampleListIds = NA_character_, check = TRUE) { if (missing(api)) stop("Provide a valid 'api' from 'cBioPortal()'") validSLI <- .checkIdValidity(api, element = sampleListIds, ename = "sampleListId", use_cache = TRUE, check = check) if (!validSLI) stop("Provide valid 'sampleListIds' from 'sampleLists()'") sampleListIds <- sort(sampleListIds) sampleListIds <- stats::setNames(sampleListIds, sampleListIds) digi <- .inputDigest(match.call(), "samplesInSampleLists") cacheloc <- .getHashCache(digi) if (file.exists(cacheloc)) { load(cacheloc) } else { meta <- structure(vector("list", length(sampleListIds)), .Names = sampleListIds) res <- lapply(sampleListIds, function(x) { res <- .invoke_fun(api, "getSampleListUsingGET", sampleListId = x) res2 <- httr::content(res) meta[[x]] <<- res2[names(res2) != "sampleIds"] unlist(res2[["sampleIds"]]) }) res <- IRanges::CharacterList(res) meta <- dplyr::bind_rows(meta) metadata(res) <- meta save(res, file = cacheloc) } res } #' @name cBioPortal #' #' @section API Metadata: #' * sampleLists - obtain all `sampleListIds` for a particular `studyId` #' #' @export sampleLists <- function(api, studyId = NA_character_) { if (missing(api)) stop("Provide a valid 'api' from 'cBioPortal()'") validStudy <- .checkIdValidity(api, element = studyId, ename = "studyId") if (!validStudy) stop("Provide a valid 'studyId' from 'getStudies()'") .invoke_bind(api, "getAllSampleListsInStudyUsingGET", TRUE, studyId = studyId) } #' @name cBioPortal #' #' @section API Metadata: #' * allSamples - obtain all samples within a particular `studyId` #' #' @export allSamples <- function(api, studyId = NA_character_) { if (missing(api)) stop("Provide a valid 'api' from 'cBioPortal()'") validStudy <- .checkIdValidity(api, element = studyId, ename = "studyId") if (!validStudy) stop("Provide a valid 'studyId' from 'getStudies()'") .invoke_bind(api, "getAllSamplesInStudyUsingGET", TRUE, studyId = studyId) } #' @name cBioPortal #' #' @section API Metadata: #' * genePanels - Show all available gene panels #' #' @export genePanels <- function(api) { if (missing(api)) stop("Provide a valid 'api' from 'cBioPortal()'") .invoke_bind(api, "getAllGenePanelsUsingGET", TRUE) } #' @name cBioPortal #' #' @section Gene Panels: #' * getGenePanels - Obtain the gene panel for a particular 'genePanelId' #' #' @export getGenePanel <- function(api, genePanelId = NA_character_) { if (missing(api)) stop("Provide a valid 'api' from 'cBioPortal()'") validGP <- .checkIdValidity(api, element = genePanelId, ename = "genePanelId") if (!validGP) stop("Provide a valid 'genePanelId' from 'genePanels()'") res <- .invoke_fun(api, "getGenePanelUsingGET", TRUE, genePanelId = genePanelId) res <- httr::content(res)[["genes"]] dplyr::bind_rows(res) } #' @name cBioPortal #' #' @section Gene Panels: #' * genePanelMolecular - get gene panel data for a paricular #' `molecularProfileId` and `sampleListId` combination #' #' @param sampleListId character(1) A sample list identifier as obtained from #' `sampleLists()`` #' #' @export genePanelMolecular <- function(api, molecularProfileId = NA_character_, sampleListId = NULL, sampleIds = NULL) { if (missing(api)) stop("Provide a valid 'api' from 'cBioPortal()'") validMolProf <- .checkIdValidity(api, element = molecularProfileId, ename = "molecularProfileId") if (!validMolProf) stop("Provide a valid 'molecularProfileId' from 'molecularProfiles()'") if (!is.null(sampleListId)) .invoke_bind(api, "getGenePanelDataUsingPOST", use_cache = TRUE, molecularProfileId = molecularProfileId, sampleListId = list(sampleListId = sampleListId) ) else if (!is.null(sampleIds)) .invoke_bind(api, "getGenePanelDataUsingPOST", use_cache = TRUE, molecularProfileId = molecularProfileId, sampleIds = list(sampleIds = sort(sampleIds)) ) else stop("Provide either 'sampleIds' or a 'sampleListId'") } #' @name cBioPortal #' #' @section Gene Panels: #' * getGenePanelMolecular - get gene panel data for a combination of #' `molecularProfileId` and `sampleListId` vectors #' #' @export getGenePanelMolecular <- function(api, molecularProfileIds = NA_character_, sampleIds) { validMolProf <- .checkIdValidity(api, element = molecularProfileIds, ename = "molecularProfileId") if (!validMolProf) stop( paste0("Provide multiple valid 'molecularProfileIds' from", " 'molecularProfiles()'") ) if (missing(sampleIds)) stop( paste0("Provide valid 'sampleIds' from 'samplesInSampleLists()'", " or 'allSamples()'") ) SampMolIds <- S4Vectors::expand.grid( molecularProfileId = sort(molecularProfileIds), sampleId = sort(sampleIds) ) SampMolIds <- SampMolIds[order(SampMolIds[["molecularProfileId"]]), ] .invoke_bind(api, "fetchGenePanelDataInMultipleMolecularProfilesUsingPOST", TRUE, sampleMolecularIdentifiers = SampMolIds ) } #' @name cBioPortal #' #' @section Sample Data: #' * getSampleInfo - Obtain sample metadata for a particular `studyId` or #' `sampleListId` #' @export getSampleInfo <- function(api, studyId = NA_character_, sampleListIds = NULL, projection = c("SUMMARY", "ID", "DETAILED", "META")) { if (missing(api)) stop("Provide a valid 'api' from 'cBioPortal()'") validStudy <- .checkIdValidity(api, element = studyId, ename = "studyId") if (!validStudy) stop("Provide a valid 'studyId' from 'getStudies()'") projection <- match.arg(projection) if (!is.null(sampleListIds)) queryobj <- list(sampleListIds = sampleListIds) else queryobj <- list(sampleIdentifiers = as.data.frame( allSamples(api, studyId)[, c("sampleId", "studyId")] ) ) .invoke_bind(api, "fetchSamplesUsingPOST", TRUE, projection = projection, sampleIdentifiers = queryobj ) } #' @name cBioPortal #' #' @section Gene Panels: #' * getDataByGenePanel - Download data for a gene panel and #' `molecularProfileId` combination, optionally a `sampleListId` can be #' provided. #' #' @param check logical(1) Whether to check the inputs against values from the #' API (i.e., for 'studyId', 'genePanelId', 'molecularProfileId', and #' 'sampleListId') #' #' @examples #' #' getDataByGenePanel(cbio, studyId = "acc_tcga", genePanelId = "IMPACT341", #' molecularProfileId = "acc_tcga_rppa") #' #' @export getDataByGenePanel <- function(api, studyId = NA_character_, genePanelId = NA_character_, molecularProfileId = NULL, sampleListId = NULL, check = TRUE) { if (missing(api)) stop("Provide a valid 'api' from 'cBioPortal()'") validStudy <- .checkIdValidity(api, element = studyId, ename = "studyId", check = check) if (!validStudy) stop("Provide a valid 'studyId' from 'getStudies()'") validGP <- .checkIdValidity(api, element = genePanelId, ename = "genePanelId", check = check) if (!validGP) stop("Provide a valid 'genePanelId' from 'genePanels()'") if (!is.null(sampleListId)) samples <- samplesInSampleLists(api, sampleListId, check = check)[[1L]] else samples <- allSamples(api, studyId)[["sampleId"]] panel <- getGenePanel(api, genePanelId = genePanelId) molecularData <- molecularData(api = api, molecularProfileId = molecularProfileId, entrezGeneIds = panel[["entrezGeneId"]], sampleIds = samples, check = check) suppressMessages({ dplyr::left_join(molecularData, panel) }) }